96
edits
Changes
→Assignment 2
'''Main function
<syntaxhighlight lang="cpp">
// Main routine that executes on the hostint main(voidint argc, char** argv) { // interpret command-line argument if (argc != 2) { std::cerr << argv[0] << ": invalid number of arguments\n"; return 1; } float n = std::atoi(argv[1]); int nblocks = 30;
steady_clock::time_point ts, te;
dim3 dimGrid(NUM_BLOCKnblocks, 1, 1); // Grid dimensions dim3 dimBlock(NUM_THREADntpb, 1, 1); // Block dimensions
float *sumHost, *sumDev; // Pointer to host & device arrays
float step = 1.0 / NBINn; // Step size size_t size = NUM_BLOCKnblocks*NUM_THREAD ntpb * sizeof(float); //Array memory size
sumHost = (float *)malloc(size); // Allocate array on host
cudaMalloc((void **)&sumDev, size); // Allocate array on device
// Initialize array in device to 0
cudaMemset(sumDev, 0, size);
// initialization
std::srand(std::time(nullptr));
ts = steady_clock::now();
// Do calculation on device
te = steady_clock::now();
cudaMemcpy(sumHost, sumDev, size, cudaMemcpyDeviceToHost);
for (tid = 0; tid<NUM_THREADntpb*NUM_BLOCKnblocks; tid++)
pi += sumHost[tid];
pi *= step;
// Print results
printf("PI Number of iterations= %f\nPI = %f\n", n, pi); reportTime("Pi calculation took ", te - ts);
// Cleanup