116
edits
Changes
TeamDS
,→Signed Distance Field Generator
This code will be a direct conversion to Cuda code without any special consideration for GPU related optimizations.
=== Launch Config ===
<syntaxhighlight lang="cpp">
int main(int argc, char **argv)
{
if (argc != 2)
{
cout << "Incorrect number of arg";
return 0;
}
char* path = argv[1];
BinaryBitmap* bitmap = LoadBitmap(path);
if (bitmap == NULL)
return 0;
int d;
cudaDeviceProp prop;
cudaGetDevice(&d);
cudaGetDeviceProperties(&prop, d);
unsigned ntpb = prop.maxThreadsDim[0];
int size = bitmap->GetSize();
int width = bitmap->_width;
int height = bitmap->_height;
// Allocate memory
float* d_src;
float * d_dst;
cudaMalloc((void**)&d_src, size * sizeof(float));
cudaMalloc((void**)&d_dst, size * sizeof(float));
// Copy src to device src
cudaMemcpy(d_src, bitmap->_pixels, size * sizeof(float), cudaMemcpyHostToDevice);
// Prepare kernal launch
// Calc how many blocks to launch
int numOfBlocks = (size / ntpb) + 1;
// Launch grid
SDFGenerateCuda << <numOfBlocks, ntpb >> >(d_src, d_dst, width, height, 64);
// Wait for kernel to finish before copying
cudaDeviceSynchronize();
// buffer array for SDF pixels
float* dst = new float[bitmap->GetSize()];
cudaMemcpy(dst, d_dst, size * sizeof(float), cudaMemcpyDeviceToHost);
Pause();
// Free memory back
cudaFree(d_src);
cudaFree(d_dst);
delete bitmap;
delete dst;
return 1;
}
</syntaxhighlight>
=== Assignment 3 ===