Changes

← Older edit

GPU610/gpuchill

2,777 bytes added, 22:29, 4 April 2019

→‎Beginning Information

===== Results =====

You need many billions of points and maybe even trillions to reach a high precision for the final result but using just 2 billion dots causes the program to take over 30 seconds to run. The most intensive part of the program is the loop which is what ~~loops~~ executes 2 billion times in my run of the program while profiling, which can all be parallelized. We can determine from the profiling that 100% of the time executing the program is spent in the loop but of course that is not possible so we will go with 99.9%, using a GTX 1080 as an example GPU which has 20 processors and each having 2048 threads, and using Amdahl's Law we can expect a speedup of 976.191 times

=== Assignment 2 ===

==== Beginning Information ====

Image used for all of the testing

[[File:Duck.JPG||400px]]

==== Enlarge Image====

<pre>

__global__ void enlargeImg(int* a, int* b, int matrixSize, int growthVal, int imgCols, int enlargedCols) {

int idx = blockIdx.x * blockDim.x + threadIdx.x;

int x = idx / enlargedCols;

int y = idx % enlargedCols;

if (idx < matrixSize) {

a[idx] = b[(x / growthVal) * imgCols + (y / growthVal)];

}

</pre>

==== Shrink Image ====

<pre>

__global__ void shrinkImg(int* a, int* b, int matrixSize, int shrinkVal, int imgCols, int shrinkCols) {

int idx = blockIdx.x * blockDim.x + threadIdx.x;

int x = idx / shrinkCols;

int y = idx % shrinkCols;

if (idx < matrixSize) {

a[idx] = b[(x / shrinkVal) * imgCols + (y / shrinkVal)];

}

</pre>

==== Reflect Image====

<pre>

// Reflect Image Horizontally

__global__ void reflectImgH(int* a, int* b, int rows, int cols) {

int i = blockIdx.x * blockDim.x + threadIdx.x;

int j = blockIdx.y * blockDim.y + threadIdx.y;

//tempImage.pixelVal[rows - (i + 1)][j] = oldImage.pixelVal[i][j];

a[j * cols + (rows - (i + 1))] = b[j * cols + i];

}

//Reflect Image Vertically

__global__ void reflectImgV(int* a, int* b, int rows, int cols) {

int i = blockIdx.x * blockDim.x + threadIdx.x;

int j = blockIdx.y * blockDim.y + threadIdx.y;

//tempImage.pixelVal[i][cols - (j + 1)] = oldImage.pixelVal[i][j];

a[(cols - (j + 1) * cols) + i] = b[j * cols + i];

}

</pre>

==== Translate Image====

<pre>

__global__ void translateImg(int* a, int* b, int cols, int value) {

int i = blockIdx.x * blockDim.x + threadIdx.x;

int j = blockIdx.y * blockDim.y + threadIdx.y;

//tempImage.pixelVal[i + value][j + value] = oldImage.pixelVal[i][j];

a[(j-value) * cols + (i+value)] = b[j * cols + i];

}

</pre>

==== Rotate Image====

<pre>

__global__ void rotateImg(int* a, int* b, int matrixSize, int imgCols, int imgRows, int r0, int c0, float rads) {

int idx = blockIdx.x * blockDim.x + threadIdx.x;

int r = idx / imgCols;

int c = idx % imgCols;

if (idx < matrixSize) {

int r1 = (int)(r0 + ((r - r0) * cos(rads)) - ((c - c0) * sin(rads)));

int c1 = (int)(c0 + ((r - r0) * sin(rads)) + ((c - c0) * cos(rads)));

if (r1 >= imgRows || r1 < 0 || c1 >= imgCols || c1 < 0) {

}

else {

a[c1 * imgCols + r1] = b[c * imgCols + r];

}

__global__ void rotateImgBlackFix(int* a, int imgCols) {

int idx = blockIdx.x * blockDim.x + threadIdx.x;

int r = idx / imgCols;

int c = idx % imgCols;

if (a[c * imgCols + r] == 0)

a[c * imgCols + r] = a[(c + 1) * imgCols + r];

}

</pre>

==== Negate Image====

<pre>

__global__ void negateImg(int* a, int* b, int matrixSize) {

int matrixCol = blockIdx.x * blockDim.x + threadIdx.x;

if(matrixCol < matrixSize)

</pre>

====Results====

[[File:CHART2GOOD.png]]

=== Assignment 3 ===

Jtardif1

46

edits

CDOT Wiki β

Changes

GPU610/gpuchill

CDOT Wiki ^β