Changes

The parallelizing Express

1,904 bytes added, 18:14, 21 February 2017

→‎Analysis

==== ''' Analysis ''' ====

Color3d v;

Color3d mt = Color3d(0.0, 0.0, 0.0);

Color3d st = Color3d(0.0, 0.0, 0.0);

for(int y=0; y<target.rows; y++) {

for(int x=0; x<target.cols; x++) {

v = target.at<Color3d>(y, x);

v = mRGB2LMS * v;

for(int c=0; c<3; c++) v(c) = v(c) > eps ? log10(v(c)) : log10(eps);

target.at<Color3d>(y, x) = mLMS2lab * v;

mt = mt + target.at<Color3d>(y, x);

st = st + target.at<Color3d>(y, x) * target.at<Color3d>(y, x);

}

Color3d mr = Color3d(0.0, 0.0, 0.0);

Color3d sr = Color3d(0.0, 0.0, 0.0);

for(int y=0; y<refer.rows; y++) {

for(int x=0; x<refer.cols; x++) {

v = refer.at<Color3d>(y, x);

v = mRGB2LMS * v;

for(int c=0; c<3; c++) v(c) = v(c) > eps ? log10(v(c)) : log10(eps);

refer.at<Color3d>(y, x) = mLMS2lab * v;

mr = mr + refer.at<Color3d>(y, x);

sr = sr + refer.at<Color3d>(y, x) * refer.at<Color3d>(y, x);

}

for(int y=0; y<target.rows; y++) {

for(int x=0; x<target.cols; x++) {

for(int c=0; c<3; c++) {

double val = target.at<double>(y, x*3+c);

target.at<double>(y, x*3+c) = (val - mt(c)) / st(c) * sr(c) + mr(c);

}

// Transform back from lab to RGB

for(int y=0; y<target.rows; y++) {

for(int x=0; x<target.cols; x++) {

v = target.at<Color3d>(y, x);

v = mlab2LMS * v;

for(int c=0; c<3; c++) v(c) = v(c) > -5.0 ? pow(10.0, v(c)) : eps;

target.at<Color3d>(y, x) = mLMS2RGB * v;

}

As seen in the call graph of the profile data 98% of the time spent in the program is spent processing in the main file. Within this main multiple calls to the Color3d overloaded operator is made (located in the Color3d file). This makes the program ideal for parallelization as many of the steps used to process the pixel colours can be delegated to the Nvidia GPU. This can be seen in the above code snippets taken from the main, where calls to Color3d operator are done multiple times in a triple for loop.

----

Jjsunga

49

edits

CDOT Wiki β

Changes

The parallelizing Express

CDOT Wiki ^β