Open main menu

CDOT Wiki β

Changes

GPU610/TeamKappa

4,317 bytes added, 16:00, 7 November 2015
Assignment 2
== Assignment 2 ==
 
=== Image Manipulation: Rotation ===
 
==== Benchmark ====
 
{| class="wikitable" border="1"
! image size !! algorithm time - gpu !! full time - gpu !! algorithm time - scalar !! full time - scalar
|-
| 500 x 600 || 109ms || 117ms || 119ms || 127ms
|-
| 800 x 800 || 113ms || 127ms || 254ms || 256ms
|-
| 1600 x 900 || 122ms || 168ms || 567ms || 592ms
|-
| 1920 x 1080 || 130ms || 203ms || 930ms || 972ms
|-
| 2747 x 1545 || 153ms || 293ms || 1704ms || 1783ms
|}
 
[[Image:Gpu610_rotateprofile.jpg|481px| ]]
 
==== Scalar Code ====
 
void Image::rotateImage(int theta, Image& source) {
steady_clock::time_point first_start;
first_start = steady_clock::now();
int rows = source.N;
int cols = source.M;
Image temp(rows, cols, source.Q);
steady_clock::time_point second_start;
second_start = steady_clock::now();
float rads = (theta * 3.14159265) / 180.0;
for (int r = 0; r < rows; r++) {
for (int c = 0; c < cols; c++) {
int new_row = (int)(rows / 2 + ((r - rows / 2) * cos(rads)) - ((c - cols / 2) * sin(rads)));
int new_col = (int)(cols / 2 + ((r - rows / 2) * sin(rads)) + ((c - cols / 2) * cos(rads)));
if (inBounds(new_row, new_col)) {
temp.pixelVal[new_row][new_col] = source.pixelVal[r][c];
}
}
}
profile("rotate - cuda", steady_clock::now() - second_start);
for (int r = 0; r < rows; r++) {
for (int c = 0; c < cols; c++) {
if (temp.pixelVal[r][c] == 0) {
temp.pixelVal[r][c] = temp.pixelVal[r][c + 1];
}
}
}
source = temp;
profile("rotate - full", steady_clock::now() - first_start);
}
 
==== GPU Code ====
 
__global__ void kernel_rotate(int * old_image, int * temp_image, float rads, int rows, int cols) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index > rows * cols) {
return;
}
int row = index % rows;
int col = index / rows;
int new_row = (int)(rows / 2 + ((row - rows / 2) * cos(rads)) - ((col - cols / 2) * sin(rads)));
int new_col = (int)(cols / 2 + ((row - rows / 2) * sin(rads)) + ((col - cols / 2) * cos(rads)));
if (!(new_row >= rows || new_row < 0 || new_col >= cols || new_col < 0)) {
temp_image[rows * new_col + new_row] = old_image[index];
}
}
 
void Image::rotateImage(int theta, Image & source) {
steady_clock::time_point first_start;
first_start = steady_clock::now();
int rows = source.N;
int cols = source.M;
int nb = (rows * cols + ntpb - 1) / ntpb;
int * d_temp_image;
int * d_old_image;
int * h_temp_image = new int[rows * cols];
int * h_old_image = new int[rows * cols];
for (int r = 0; r < rows; r++) {
for (int c = 0; c < cols; c++) {
h_old_image[rows * c + r] = source.pixelVal[r][c];
}
}
steady_clock::time_point second_start;
second_start = steady_clock::now();
cudaMalloc((void**)&d_old_image, rows * cols * sizeof(int));
if (!d_old_image) {
cout << "CUDA: out of memory (d_old_image)" << endl;
return;
}
cudaMalloc((void**)&d_temp_image, rows * cols * sizeof(int));
if (!d_temp_image) {
cout << "CUDA: out of memory (d_temp_image)" << endl;
return;
}
cudaMemcpy(d_old_image, h_old_image, rows * cols * sizeof(int), cudaMemcpyHostToDevice);
dim3 dGrid(nb);
dim3 dBlock(ntpb);
kernel_rotate <<<dGrid, dBlock>>>(d_old_image, d_temp_image, (theta * 3.14159265) / 180.0, rows, cols);
cudaDeviceSynchronize();
cudaMemcpy(h_temp_image, d_temp_image, rows * cols * sizeof(int), cudaMemcpyDeviceToHost);
profile("rotate - cuda", steady_clock::now() - second_start);
for (int r = 0; r < rows; r++) {
for (int c = 0; c < cols; c++) {
if (h_temp_image[rows * c + r] == 0 && c + 1 < cols)
source.pixelVal[r][c] = h_temp_image[rows * (c + 1) + r];
else
source.pixelVal[r][c] = h_temp_image[rows * c + r];
}
}
profile("rotate - full", steady_clock::now() - first_start);
}
 
== Assignment 3 ==
1
edit