Changes

Jump to: navigation, search

TeamDS

1,643 bytes added, 23:22, 11 April 2017
SDFGenerateCuda Kernel Optimized Phase 2
}
</syntaxhighlight >
 
 
=== Launch Config GPU Optimized Phase 2 ===
 
<syntaxhighlight lang="cpp">
int main(int argc, char **argv)
{
if (argc != 2)
{
cout << "Incorrect number of arg";
return 1;
}
 
char* path = argv[1];
BinaryBitmap* bitmap = LoadBitmap(path);
if (bitmap == NULL)
return 1;
 
 
int d;
cudaDeviceProp prop;
cudaGetDevice(&d);
cudaGetDeviceProperties(&prop, d);
unsigned ntpb = prop.maxThreadsDim[0];
 
int size = bitmap->GetSize();
int width = bitmap->_width;
int height = bitmap->_height;
 
// Allocate GPU memory
float* d_src;
float * d_dst;
int* d_xCoord;
int* d_yCoord;
cudaMalloc((void**)&d_src, size * sizeof(float));
cudaMalloc((void**)&d_dst, size * sizeof(float));
cudaMalloc((void**)&d_xCoord, size * sizeof(int));
cudaMalloc((void**)&d_yCoord, size * sizeof(int));
 
 
// Copy src to device src
cudaMemcpy(d_src, bitmap->_pixels, size * sizeof(float), cudaMemcpyHostToDevice);
 
// Prepare kernal launch
// Calc how many blocks to launch
int numOfBlocks = ((size + ntpb - 1) / ntpb);
 
 
// Launch grid for pre-calculating XYCoords
GenerateXYCoord << <numOfBlocks, ntpb >> >(d_xCoord, d_yCoord, width, height);
 
// Launch grid for converting
SDFGenerateCuda << <numOfBlocks, ntpb >> >(d_src, d_dst, d_xCoord, d_yCoord, size, 64);
// Wait for kernel to finish before copying
cudaDeviceSynchronize();
 
// buffer array for SDF pixels
float* dst = new float[bitmap->GetSize()];
cudaMemcpy(dst, d_dst, size * sizeof(float), cudaMemcpyDeviceToHost);
 
 
SaveBitmap(path, dst);
 
Pause();
 
// Free memory back
cudaFree(d_src);
cudaFree(d_dst);
delete bitmap;
delete dst;
 
return 0;
 
}
</syntaxhighlight >
116
edits

Navigation menu