Changes

← Older edit

Team Lion

7,312 bytes added, 11:09, 5 April 2016

→‎Source Code

= Image Processing Performance Using Parallel Programming =

This assignment introduces simple image processing using MPI parallel programming.This assignment also explains about performance comparison from 1 core to 16 cores.

== Team Lion Member ==

Byungho Kim

== Basic Concept of Image Processing ==

[[Image:GPU621-LION-2.png|640px]]

[[Image:GPU621-LION-3.png|640px]]

[[Image:GPU621-LION-4.png|640px|Edge Handling(Extend)]]

== False Sharing Consideration ==

*Threading each pixel – The worst

*Threading each row – Good

*Threading multiple rows band – The best

== Test Environment Consideration ==

*How to test performance more than 4 cores computer.

== Azure VM Environment ==

*Cloud service from Microsoft

*You can rent many kind of Virtual Machines.

*Access remotely using Windows Remote Desktop Connection.

[[Image:GPU621-LION-6-2.png|640px|Edge Handling(Extend)]]

[[Image:GPU621-LION-6.png|640px|Edge Handling(Extend)]]

== Test Result ==

[[Image:GPU621-LION-5.png|640px|Edge Handling(Extend)]]

[[Image:GPU621-LION-7.png|640px|Edge Handling(Extend)]]

[[Image:GPU621-LION-8.png|640px|Edge Handling(Extend)]]

== Conclusion ==

*Performance depends on number of cores.

*OpenMP is easy to use. Much easier than normal(POSIX) Thread method.

== Source Code ==

#include "corona.h"

#include <omp.h>

#include <iostream>

#include <cstdlib>

corona::Image* calcConvolutitonSerial(corona::Image* image, const float* kernal, const int kernalSizeWidth, const int kernalSizeHeight) {

int width = image->getWidth();

int height = image->getHeight();

unsigned char* pixels = (unsigned char*)(image->getPixels());

corona::Image* result = corona::CloneImage(image);

unsigned char* resultPixels = (unsigned char*)(result->getPixels());

for (int py = 0; py < height; py++) {

int pixelIndex = 0;

int targetPixelX = 0;

int targetPixelY = 0;

for (int px = 0; px < width; px++) {

float accuR = 0;

float accuG = 0;

float accuB = 0;

float accuA = 0;

for (int kernalY = 0; kernalY < kernalSizeHeight; kernalY++) {

for (int kernalX = 0; kernalX < kernalSizeWidth; kernalX++) {

targetPixelX = px - kernalSizeWidth / 2 + kernalX;

if (targetPixelX < 0) targetPixelX = 0;

else if (targetPixelX >= width) targetPixelX = width - 1;

targetPixelY = py - kernalSizeHeight / 2 + kernalY;

if (targetPixelY < 0) targetPixelY = 0;

else if (targetPixelY >= height) targetPixelY = height - 1;

pixelIndex = (targetPixelY * width + targetPixelX) * 4;

accuR += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX];

accuG += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX];

accuB += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX];

}

pixelIndex = (py * width + px) * 4;

if (accuR > 255) accuR = 255;

else if (accuR < 0) accuR = 0;

resultPixels[pixelIndex++] = accuR;

if (accuG > 255) accuG = 255;

else if (accuG < 0) accuG = 0;

resultPixels[pixelIndex++] = accuG;

if (accuB > 255) accuB = 255;

else if (accuB < 0) accuB = 0;

resultPixels[pixelIndex++] = accuB;

}

return result;

}

corona::Image* calcConvolutitonParallel(corona::Image* image, const float* kernal, const int kernalSizeWidth, const int kernalSizeHeight, int requestedCore) {

int width = image->getWidth();

int height = image->getHeight();

unsigned char* pixels = (unsigned char*)(image->getPixels());

corona::Image* result = corona::CloneImage(image);

unsigned char* resultPixels = (unsigned char*)(result->getPixels());

omp_set_dynamic(0);

omp_set_num_threads(requestedCore);

#pragma omp parallel

{

int tid = omp_get_thread_num();

int nt = omp_get_num_threads();

int jumpRange = height / nt;

for (int py = jumpRange * tid; py < jumpRange * (tid + 1) && py < height; py++) {

int pixelIndex = 0;

int targetPixelX = 0;

int targetPixelY = 0;

for (int px = 0; px < width; px++) {

float accuR = 0;

float accuG = 0;

float accuB = 0;

float accuA = 0;

for (int kernalY = 0; kernalY < kernalSizeHeight; kernalY++) {

for (int kernalX = 0; kernalX < kernalSizeWidth; kernalX++) {

targetPixelX = px - kernalSizeWidth / 2 + kernalX;

if (targetPixelX < 0) targetPixelX = 0;

else if (targetPixelX >= width) targetPixelX = width - 1;

targetPixelY = py - kernalSizeHeight / 2 + kernalY;

if (targetPixelY < 0) targetPixelY = 0;

else if (targetPixelY >= height) targetPixelY = height - 1;

pixelIndex = (targetPixelY * width + targetPixelX) * 4;

accuR += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX];

accuG += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX];

accuB += pixels[pixelIndex++] * kernal[kernalY * kernalSizeWidth + kernalX];

}

pixelIndex = (py * width + px) * 4;

if (accuR > 255) accuR = 255;

else if (accuR < 0) accuR = 0;

resultPixels[pixelIndex++] = accuR;

if (accuG > 255) accuG = 255;

else if (accuG < 0) accuG = 0;

resultPixels[pixelIndex++] = accuG;

if (accuB > 255) accuB = 255;

else if (accuB < 0) accuB = 0;

resultPixels[pixelIndex++] = accuB;

}

return result;

}

int main() {

corona::Image* image = corona::OpenImage("sample.jpg", corona::PF_R8G8B8A8);

if (!image) {

return 1;

// error!

}

int width = image->getWidth();

int height = image->getHeight();

void* pixels = image->getPixels();

float Laplacian9x9Kernal[] = {

0, -1, -1, -2, -2, -2, -1, -1, 0,

-1, -2, -4, -5, -5, -5, -4, -2, -1,

-1, -4, -5, -3, -0, -3, -5, -4, -1,

-2, -5, -3, 12, 24, 12, -3, -5, -2,

-2, -5, -0, 24, 40, 24, -0, -5, -2,

-2, -5, -3, 12, 24, 12, -3, -5, -2,

-1, -4, -5, -3, -0, -3, -5, -4, -1,

-1, -2, -4, -5, -5, -5, -4, -2, -1,

0, -1, -1, -2, -2, -2, -1, -1, 0 };

double start;

double end;

start = omp_get_wtime();

corona::Image* resultImage = calcConvolutitonSerial(image, Laplacian9x9Kernal, 9, 9);

end = omp_get_wtime();

std::cout << "Serial processing time = " << end - start << std::endl;

//corona::SaveImage("resultSerial.png", corona::FF_AUTODETECT, resultImage);

for (int coreNumber = 1; coreNumber <= 16; coreNumber++) {

start = omp_get_wtime();

resultImage = calcConvolutitonParallel(image, Laplacian9x9Kernal, 9, 9, coreNumber);

end = omp_get_wtime();

std::cout << "Parllel processing time (" << coreNumber << " core(s)) = " << end - start << std::endl;

}

//corona::SaveImage("resultParallel.png", corona::FF_AUTODETECT, resultImage);

}

</syntaxhighlight>

Byungho Kim

1

edit

Changes

Team Lion

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Navigation

get involved with CDOT

courses

course projects

links

Tools