1
edit
Changes
TudyBert
,→Assignment 3
Here's the flat profile for 50 runs of enlarging a 512px x 512px image 4 times:
% cumulative self self total
19.64 2.75 0.55 Image::enlargeImage(int, Image&)
1.07 2.78 0.03 4194304 0.00 0.00 Image::getPixelVal(int, int)
The code for enlargeImage():
int rows, cols, gray;
}
oldImage = tempImage;
The four for loops look like they could be parallelized since they just serve as counters. From the flat file, it seems that the majority of the time is spent in the overloaded operator=() method. The code for this is:
<div>
N <span style='color:#808030; '>=</span> oldImage<span style='color:#808030; '>.</span>N<span style='color:#800080; '>;</span>
M <span style='color:#808030; '>=</span> oldImage<span style='color:#808030; '>.</span>M<span style='color:#800080; '>;</span>
Q <span style='color:#808030; '>=</span> oldImage<span style='color:#808030; '>.</span>Q<span style='color:#800080; '>;</span>
<span style='color:#800000; font-weight:bold; '>if</span><span style='color:#808030; '>(</span>dim1 <span style='color:#808030; '>!</span><span style='color:#808030; '>=</span> <span style='color:#7d0045; '>NULL</span><span style='color:#808030; '>)</span>
<span style='color:#800080; '>{</span>
<span style='color:#800000; font-weight:bold; '>delete</span><span style='color:#808030; '>[</span><span style='color:#808030; '>]</span> dim1<span style='color:#800080; '>;</span>
<span style='color:#800080; '>}</span>
pixelVal <span style='color:#808030; '>=</span> <span style='color:#800000; font-weight:bold; '>new</span> <span style='color:#800000; font-weight:bold; '>int</span><span style='color:#808030; '>*</span> <span style='color:#808030; '>[</span>N<span style='color:#808030; '>]</span><span style='color:#800080; '>;</span>
dim1 <span style='color:#808030; '>=</span> <span style='color:#800000; font-weight:bold; '>new</span> <span style='color:#800000; font-weight:bold; '>int</span><span style='color:#808030; '>[</span>N<span style='color:#808030; '>*</span>M<span style='color:#808030; '>]</span><span style='color:#800080; '>;</span>
<span style='color:#800000; font-weight:bold; '>for</span><span style='color:#808030; '>(</span><span style='color:#800000; font-weight:bold; '>int</span> i <span style='color:#808030; '>=</span> <span style='color:#008c00; '>0</span><span style='color:#800080; '>;</span> i <span style='color:#808030; '><</span> N<span style='color:#800080; '>;</span> i<span style='color:#808030; '>+</span><span style='color:#808030; '>+</span><span style='color:#808030; '>)</span>
<span style='color:#800080; '>{</span>
pixelVal<span style='color:#808030; '>[</span>i<span style='color:#808030; '>]</span> <span style='color:#808030; '>=</span> <span style='color:#800000; font-weight:bold; '>new</span> <span style='color:#800000; font-weight:bold; '>int</span> <span style='color:#808030; '>[</span>M<span style='color:#808030; '>]</span><span style='color:#800080; '>;</span>
<span style='color:#800000; font-weight:bold; '>for</span><span style='color:#808030; '>(</span><span style='color:#800000; font-weight:bold; '>int</span> j <span style='color:#808030; '>=</span> <span style='color:#008c00; '>0</span><span style='color:#800080; '>;</span> j <span style='color:#808030; '><</span> M<span style='color:#800080; '>;</span> j<span style='color:#808030; '>+</span><span style='color:#808030; '>+</span><span style='color:#808030; '>)</span>
<span style='color:#800080; '>{</span>
pixelVal<span style='color:#808030; '>[</span>i<span style='color:#808030; '>]</span><span style='color:#808030; '>[</span>j<span style='color:#808030; '>]</span> <span style='color:#808030; '>=</span> oldImage<span style='color:#808030; '>.</span>pixelVal<span style='color:#808030; '>[</span>i<span style='color:#808030; '>]</span><span style='color:#808030; '>[</span>j<span style='color:#808030; '>]</span><span style='color:#800080; '>;</span>
dim1<span style='color:#808030; '>[</span>i<span style='color:#808030; '>*</span>N <span style='color:#808030; '>+</span> j<span style='color:#808030; '>]</span> <span style='color:#808030; '>=</span> oldImage<span style='color:#808030; '>.</span>dim1<span style='color:#808030; '>[</span>i<span style='color:#808030; '>*</span>N <span style='color:#808030; '>+</span> j<span style='color:#808030; '>]</span><span style='color:#800080; '>;</span>
<span style='color:#800080; '>}</span>
<span style='color:#800080; '>}</span>
</div>
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int enlargeRow, enlargeCol;
__shared__ int pixel;
for(int j = 0; j < nj; j++)
{
pixel = work[idx * nj + j];
enlargeRow = idx * factor;
enlargeCol = j * factor;
for(int c = enlargeRow; c < (enlargeRow + factor); c++)
{
for(int d = enlargeCol; d < (enlargeCol + factor); d++)
{
result[d + c * blockDim.x * gridDim.x * factor] = pixel;
}
}
}
<span style='color:#7f0055; font-weight:bold; '>int</span> k = idx + jdx * blockDim.x * gridDim.x;
<span style='color:#7f0055; font-weight:bold; '>int</span> enlargeRow, enlargeCol;
enlargeRow = idx * factor;
enlargeCol = jdx * factor;
{
}