Changes

TeamDS

1,643 bytes added, 23:22, 11 April 2017

→‎SDFGenerateCuda Kernel Optimized Phase 2

}

</syntaxhighlight >

=== Launch Config GPU Optimized Phase 2 ===

int main(int argc, char **argv)

{

if (argc != 2)

{

cout << "Incorrect number of arg";

return 1;

}

char* path = argv[1];

BinaryBitmap* bitmap = LoadBitmap(path);

if (bitmap == NULL)

return 1;

int d;

cudaDeviceProp prop;

cudaGetDevice(&d);

cudaGetDeviceProperties(&prop, d);

unsigned ntpb = prop.maxThreadsDim[0];

int size = bitmap->GetSize();

int width = bitmap->_width;

int height = bitmap->_height;

// Allocate GPU memory

float* d_src;

float * d_dst;

int* d_xCoord;

int* d_yCoord;

cudaMalloc((void**)&d_src, size * sizeof(float));

cudaMalloc((void**)&d_dst, size * sizeof(float));

cudaMalloc((void**)&d_xCoord, size * sizeof(int));

cudaMalloc((void**)&d_yCoord, size * sizeof(int));

// Copy src to device src

cudaMemcpy(d_src, bitmap->_pixels, size * sizeof(float), cudaMemcpyHostToDevice);

// Prepare kernal launch

// Calc how many blocks to launch

int numOfBlocks = ((size + ntpb - 1) / ntpb);

// Launch grid for pre-calculating XYCoords

GenerateXYCoord << <numOfBlocks, ntpb >> >(d_xCoord, d_yCoord, width, height);

// Launch grid for converting

SDFGenerateCuda << <numOfBlocks, ntpb >> >(d_src, d_dst, d_xCoord, d_yCoord, size, 64);

// Wait for kernel to finish before copying

cudaDeviceSynchronize();

// buffer array for SDF pixels

float* dst = new float[bitmap->GetSize()];

cudaMemcpy(dst, d_dst, size * sizeof(float), cudaMemcpyDeviceToHost);

SaveBitmap(path, dst);

Pause();

// Free memory back

cudaFree(d_src);

cudaFree(d_dst);

delete bitmap;

delete dst;

return 0;

}

</syntaxhighlight >

Dshirzad

116

edits

CDOT Wiki β

Changes

TeamDS

CDOT Wiki ^β