Changes

Jump to: navigation, search

Team Lion

7,551 bytes added, 11:09, 5 April 2016
Source Code
{{GPU621/DPS921 Index | 20161}}
= Image Processing Performance Using Parallel Programming =
This assignment introduces simple image processing using MPI parallel programming.This assignment also explains about performance comparison from 1 core to 16 cores. 
== Team Lion Member ==
Byungho Kim
 
== Basic Concept of Image Processing ==
 
* Convolution between image and filter(kernel)
* Each processing of pixel independent from another pixels
* Can be used for Edge finding, Blur and image enhancement.
[[Image:GPU621-LION-1.png|640px]]  == Convolution == [[Image:GPU621-LION-2.png|640px]] [[Image:GPU621-LION-3.png|640px]]  == Edge Handling == All pixels in edges need special treatment.* Extend* Tile* Crop [[Image:GPU621-LION-4.png|640px|Edge Handling(Extend)]]  == False Sharing Consideration ==*Threading each pixel – The worst*Threading each row – Good*Threading multiple rows band – The best  == Test Environment Consideration ==*How to test performance more than 4 cores computer.  == Azure VM Environment ==*Cloud service from Microsoft*You can rent many kind of Virtual Machines.*Access remotely using Windows Remote Desktop Connection. [[Image:GPU621-LION-6-2.png|640px|Edge Handling(Extend)]] [[Image:GPU621-LION-6.png|640px|Edge Handling(Extend)]]  == Test Result ==[[Image:GPU621-LION-5.png|640px|640Edge Handling(Extend)]] [[Image:GPU621-LION-7.png|640px|Edge Handling(Extend)]] [[Image:GPU621-LION-8.png|640px|Edge Handling(Extend)]]  == Conclusion ==*Performance depends on number of cores.*OpenMP is easy to use. Much easier than normal(POSIX) Thread method.  == Source Code == <syntaxhighlight lang="c" line="1" >#include "corona.h"#include <omp.h>#include <iostream>#include <cstdlib>  corona::Image* calcConvolutitonSerial(corona::Image* image, const float* kernal, const int kernalSizeWidth, const int kernalSizeHeight) {  int width = image->getWidth(); int height = image->getHeight(); unsigned char* pixels = (unsigned char*)(image->getPixels());  corona::Image* result = corona::CloneImage(image); unsigned char* resultPixels = (unsigned char*)(result->getPixels());   for (int py = 0; py < height; py++) {  int pixelIndex = 0; int targetPixelX = 0; int targetPixelY = 0;   for (int px = 0; px < width; px++) {  float accuR = 0; float accuG = 0; float accuB = 0; float accuA = 0;  for (int kernalY = 0; kernalY < kernalSizeHeight; kernalY++) { for (int kernalX = 0; kernalX < kernalSizeWidth; kernalX++) {  targetPixelX = px - kernalSizeWidth / 2 + kernalX; if (targetPixelX < 0) targetPixelX = 0; else if (targetPixelX >= width) targetPixelX = width - 1;  targetPixelY = py - kernalSizeHeight / 2 + kernalY; if (targetPixelY < 0) targetPixelY = 0; else if (targetPixelY >= height) targetPixelY = height - 1;  pixelIndex = (targetPixelY * width + targetPixelX) * 4;  accuR += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX]; accuG += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX]; accuB += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX]; } }  pixelIndex = (py * width + px) * 4;  if (accuR > 255) accuR = 255; else if (accuR < 0) accuR = 0; resultPixels[pixelIndex++] = accuR;  if (accuG > 255) accuG = 255; else if (accuG < 0) accuG = 0; resultPixels[pixelIndex++] = accuG;  if (accuB > 255) accuB = 255; else if (accuB < 0) accuB = 0; resultPixels[pixelIndex++] = accuB; } }  return result;}  corona::Image* calcConvolutitonParallel(corona::Image* image, const float* kernal, const int kernalSizeWidth, const int kernalSizeHeight, int requestedCore) { int width = image->getWidth(); int height = image->getHeight(); unsigned char* pixels = (unsigned char*)(image->getPixels());  corona::Image* result = corona::CloneImage(image); unsigned char* resultPixels = (unsigned char*)(result->getPixels());  omp_set_dynamic(0); omp_set_num_threads(requestedCore); #pragma omp parallel { int tid = omp_get_thread_num(); int nt = omp_get_num_threads();  int jumpRange = height / nt;  for (int py = jumpRange * tid; py < jumpRange * (tid + 1) && py < height; py++) {  int pixelIndex = 0; int targetPixelX = 0; int targetPixelY = 0;   for (int px = 0; px < width; px++) {  float accuR = 0; float accuG = 0; float accuB = 0; float accuA = 0;  for (int kernalY = 0; kernalY < kernalSizeHeight; kernalY++) { for (int kernalX = 0; kernalX < kernalSizeWidth; kernalX++) {  targetPixelX = px - kernalSizeWidth / 2 + kernalX; if (targetPixelX < 0) targetPixelX = 0; else if (targetPixelX >= width) targetPixelX = width - 1;  targetPixelY = py - kernalSizeHeight / 2 + kernalY; if (targetPixelY < 0) targetPixelY = 0; else if (targetPixelY >= height) targetPixelY = height - 1;  pixelIndex = (targetPixelY * width + targetPixelX) * 4;  accuR += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX]; accuG += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX]; accuB += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX]; } }  pixelIndex = (py * width + px) * 4;  if (accuR > 255) accuR = 255; else if (accuR < 0) accuR = 0; resultPixels[pixelIndex++] = accuR;  if (accuG > 255) accuG = 255; else if (accuG < 0) accuG = 0; resultPixels[pixelIndex++] = accuG;  if (accuB > 255) accuB = 255; else if (accuB < 0) accuB = 0; resultPixels[pixelIndex++]= accuB; } } }  return result;}  int main() {  corona::Image* image = corona::OpenImage("sample.jpg", corona::PF_R8G8B8A8); if (!image) { return 1; // error! }  int width = image->getWidth(); int height = image->getHeight(); void* pixels = image->getPixels();   float Laplacian9x9Kernal[]= { 0, -1, -1, -2, -2, -2, -1, -1, 0, -1, -2, -4, -5, -5, -5, -4, -2, -1, -1, -4, -5, -3, -0, -3, -5, -4, -1, -2, -5, -3, 12, 24, 12, -3, -5, -2, -2, -5, -0, 24, 40, 24, -0, -5, -2, -2, -5, -3, 12, 24, 12, -3, -5, -2, -1, -4, -5, -3, -0, -3, -5, -4, -1, -1, -2, -4, -5, -5, -5, -4, -2, -1, 0, -1, -1, -2, -2, -2, -1, -1, 0 };   double start; double end;   start = omp_get_wtime(); corona::Image* resultImage = calcConvolutitonSerial(image, Laplacian9x9Kernal, 9, 9); end = omp_get_wtime(); std::cout << "Serial processing time = " << end - start << std::endl;  //corona::SaveImage("resultSerial.png", corona::FF_AUTODETECT, resultImage);  for (int coreNumber = 1; coreNumber <= 16; coreNumber++) { start = omp_get_wtime(); resultImage = calcConvolutitonParallel(image, Laplacian9x9Kernal, 9, 9, coreNumber); end = omp_get_wtime(); std::cout << "Parllel processing time (" << coreNumber << " core(s)) = " << end - start << std::endl; } //corona::SaveImage("resultParallel.png", corona::FF_AUTODETECT, resultImage);} </syntaxhighlight>

Navigation menu