Changes

Jump to: navigation, search

Team Lion

7,312 bytes added, 11:09, 5 April 2016
Source Code
{{GPU621/DPS921 Index | 20161}}
= Image Processing Performance Using Parallel Programming =
This assignment introduces simple image processing using MPI parallel programming.This assignment also explains about performance comparison from 1 core to 16 cores. 
== Team Lion Member ==
Byungho Kim
 
== Basic Concept of Image Processing ==
[[Image:GPU621-LION-2.png|640px]]
 
[[Image:GPU621-LION-3.png|640px]]
[[Image:GPU621-LION-4.png|640px|Edge Handling(Extend)]]
 
 
== False Sharing Consideration ==
*Threading each pixel – The worst
*Threading each row – Good
*Threading multiple rows band – The best
 
 
== Test Environment Consideration ==
*How to test performance more than 4 cores computer.
 
 
== Azure VM Environment ==
*Cloud service from Microsoft
*You can rent many kind of Virtual Machines.
*Access remotely using Windows Remote Desktop Connection.
 
[[Image:GPU621-LION-6-2.png|640px|Edge Handling(Extend)]]
 
[[Image:GPU621-LION-6.png|640px|Edge Handling(Extend)]]
 
 
== Test Result ==
[[Image:GPU621-LION-5.png|640px|Edge Handling(Extend)]]
 
[[Image:GPU621-LION-7.png|640px|Edge Handling(Extend)]]
 
[[Image:GPU621-LION-8.png|640px|Edge Handling(Extend)]]
 
 
== Conclusion ==
*Performance depends on number of cores.
*OpenMP is easy to use. Much easier than normal(POSIX) Thread method.
 
 
== Source Code ==
 
<syntaxhighlight lang="c" line="1" >
#include "corona.h"
#include <omp.h>
#include <iostream>
#include <cstdlib>
 
 
corona::Image* calcConvolutitonSerial(corona::Image* image, const float* kernal, const int kernalSizeWidth, const int kernalSizeHeight) {
 
int width = image->getWidth();
int height = image->getHeight();
unsigned char* pixels = (unsigned char*)(image->getPixels());
 
corona::Image* result = corona::CloneImage(image);
unsigned char* resultPixels = (unsigned char*)(result->getPixels());
 
 
for (int py = 0; py < height; py++) {
 
int pixelIndex = 0;
int targetPixelX = 0;
int targetPixelY = 0;
 
 
for (int px = 0; px < width; px++) {
 
float accuR = 0;
float accuG = 0;
float accuB = 0;
float accuA = 0;
 
for (int kernalY = 0; kernalY < kernalSizeHeight; kernalY++) {
for (int kernalX = 0; kernalX < kernalSizeWidth; kernalX++) {
 
targetPixelX = px - kernalSizeWidth / 2 + kernalX;
if (targetPixelX < 0) targetPixelX = 0;
else if (targetPixelX >= width) targetPixelX = width - 1;
 
targetPixelY = py - kernalSizeHeight / 2 + kernalY;
if (targetPixelY < 0) targetPixelY = 0;
else if (targetPixelY >= height) targetPixelY = height - 1;
 
pixelIndex = (targetPixelY * width + targetPixelX) * 4;
 
accuR += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX];
accuG += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX];
accuB += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX];
}
}
 
pixelIndex = (py * width + px) * 4;
 
if (accuR > 255) accuR = 255;
else if (accuR < 0) accuR = 0;
resultPixels[pixelIndex++] = accuR;
 
if (accuG > 255) accuG = 255;
else if (accuG < 0) accuG = 0;
resultPixels[pixelIndex++] = accuG;
 
if (accuB > 255) accuB = 255;
else if (accuB < 0) accuB = 0;
resultPixels[pixelIndex++] = accuB;
}
}
 
return result;
}
 
 
corona::Image* calcConvolutitonParallel(corona::Image* image, const float* kernal, const int kernalSizeWidth, const int kernalSizeHeight, int requestedCore) {
int width = image->getWidth();
int height = image->getHeight();
unsigned char* pixels = (unsigned char*)(image->getPixels());
 
corona::Image* result = corona::CloneImage(image);
unsigned char* resultPixels = (unsigned char*)(result->getPixels());
 
omp_set_dynamic(0);
omp_set_num_threads(requestedCore);
 
#pragma omp parallel
{
int tid = omp_get_thread_num();
int nt = omp_get_num_threads();
 
int jumpRange = height / nt;
 
for (int py = jumpRange * tid; py < jumpRange * (tid + 1) && py < height; py++) {
 
int pixelIndex = 0;
int targetPixelX = 0;
int targetPixelY = 0;
 
 
for (int px = 0; px < width; px++) {
 
float accuR = 0;
float accuG = 0;
float accuB = 0;
float accuA = 0;
 
for (int kernalY = 0; kernalY < kernalSizeHeight; kernalY++) {
for (int kernalX = 0; kernalX < kernalSizeWidth; kernalX++) {
 
targetPixelX = px - kernalSizeWidth / 2 + kernalX;
if (targetPixelX < 0) targetPixelX = 0;
else if (targetPixelX >= width) targetPixelX = width - 1;
 
targetPixelY = py - kernalSizeHeight / 2 + kernalY;
if (targetPixelY < 0) targetPixelY = 0;
else if (targetPixelY >= height) targetPixelY = height - 1;
 
pixelIndex = (targetPixelY * width + targetPixelX) * 4;
 
accuR += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX];
accuG += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX];
accuB += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX];
}
}
 
pixelIndex = (py * width + px) * 4;
 
if (accuR > 255) accuR = 255;
else if (accuR < 0) accuR = 0;
resultPixels[pixelIndex++] = accuR;
 
if (accuG > 255) accuG = 255;
else if (accuG < 0) accuG = 0;
resultPixels[pixelIndex++] = accuG;
 
if (accuB > 255) accuB = 255;
else if (accuB < 0) accuB = 0;
resultPixels[pixelIndex++] = accuB;
}
}
}
 
return result;
}
 
 
int main() {
 
corona::Image* image = corona::OpenImage("sample.jpg", corona::PF_R8G8B8A8);
if (!image) {
return 1;
// error!
}
 
int width = image->getWidth();
int height = image->getHeight();
void* pixels = image->getPixels();
 
 
float Laplacian9x9Kernal[] = {
0, -1, -1, -2, -2, -2, -1, -1, 0,
-1, -2, -4, -5, -5, -5, -4, -2, -1,
-1, -4, -5, -3, -0, -3, -5, -4, -1,
-2, -5, -3, 12, 24, 12, -3, -5, -2,
-2, -5, -0, 24, 40, 24, -0, -5, -2,
-2, -5, -3, 12, 24, 12, -3, -5, -2,
-1, -4, -5, -3, -0, -3, -5, -4, -1,
-1, -2, -4, -5, -5, -5, -4, -2, -1,
0, -1, -1, -2, -2, -2, -1, -1, 0 };
 
 
double start;
double end;
 
 
start = omp_get_wtime();
corona::Image* resultImage = calcConvolutitonSerial(image, Laplacian9x9Kernal, 9, 9);
end = omp_get_wtime();
std::cout << "Serial processing time = " << end - start << std::endl;
 
//corona::SaveImage("resultSerial.png", corona::FF_AUTODETECT, resultImage);
 
for (int coreNumber = 1; coreNumber <= 16; coreNumber++) {
start = omp_get_wtime();
resultImage = calcConvolutitonParallel(image, Laplacian9x9Kernal, 9, 9, coreNumber);
end = omp_get_wtime();
std::cout << "Parllel processing time (" << coreNumber << " core(s)) = " << end - start << std::endl;
}
//corona::SaveImage("resultParallel.png", corona::FF_AUTODETECT, resultImage);
}
 
</syntaxhighlight>

Navigation menu