116
edits
Changes
TeamDS
,→GPU Optimization Phase 2
=== GPU Optimization Phase 2 ===
For every n, we are calculating the XYCoords n^2 number of times which is a lottotal of n^2 times. Since the XYCoord of pixels are fixed for every pixel, we can pre generate XYCoord arrays to a single array. However, this will increase the GPU's access to global memory. We will need to benchmark and see if this will give better times or not. === GenerateXYCoord Kernel === <syntaxhighlight lang="cpp">__global__ void GenerateXYCoord(int xCoord[], int yCoord[], int width, int height){ int size = width * height; int i = blockIdx.x * blockDim.x + threadIdx.x; if (i >= size) return; int y = (i / width); xCoord[i] = i - (y * width); yCoord[i] = y;} </syntaxhighlight > === SDFGenerateCuda Kernel Optimized Phase 2 === <syntaxhighlight lang="cpp"> __global__ void SDFGenerateCuda(const float src[], float dst[], const int xCoord[], const int yCoord[], int size, int spread){ int i = blockIdx.x * blockDim.x + threadIdx.x; if (i >= size) return; // Used for avoiding unnecessary sqrt calc. // Just compare the two sqaured distances and // only use sqrt if it is the shorest distance float shortestDistSquared = MAX_FLOAT_VALUE; float pixelVal = src[i]; if (pixelVal > 0) // It's an inside pixel { // Find closest outside pixel for (int j = 0; j < size; j++) { float pixelVal2 = src[j]; if (pixelVal2 == 0)// Outside pixel { // Calculate distance float dx = xCoord[j] - xCoord[i]; float dy = yCoord[j] - yCoord[i]; float distSqured = dx * dx + dy * dy; if (distSqured < shortestDistSquared) shortestDistSquared = distSqured; } } float shortestDist = sqrtf(shortestDistSquared); float spread01 = (shortestDist / spread); if (spread01 > 1) spread01 = 1; // clamp it dst[i] = (spread01 * .5f) + 0.5f; } else // It's an outisde pixel { // Find closest inside pixel for (int j = 0; j < size; j++) { float pixelVal2 = src[j]; if (pixelVal2 > 0)// Inside pixel { // Calculate distance float dx = xCoord[j] - xCoord[i]; float dy = yCoord[j] - yCoord[i]; float distSqured = dx * dx + dy * dy; if (distSqured < shortestDistSquared) shortestDistSquared = distSqured; } } float shortestDist = sqrtf(shortestDistSquared); float spread01 = (shortestDist / spread); if (spread01 > 1) spread01 = 1; // clamp it dst[i] = (1 - spread01) *.5f; } } </syntaxhighlight >