Changes

Kernal Blas

1,374 bytes removed, 21:57, 2 April 2018

→‎Assignment 2

</syntaxhighlight>

<br>

~~'''Main function~~

~~<syntaxhighlight lang="cpp">~~

~~// Main routine that executes on the host~~

~~int main(int argc, char** argv) {~~

~~// interpret command-line argument~~

~~if (argc != 2) {~~

~~std::cerr << argv[0] << ": invalid number of arguments\n";~~

~~return 1;~~

}

~~float n = std::atoi(argv[1]);~~

~~int nblocks = 30;~~

~~steady_clock::time_point ts, te;~~

~~dim3 dimGrid(nblocks, 1, 1); // Grid dimensions~~

~~dim3 dimBlock(ntpb, 1, 1); // Block dimensions~~

~~float *sumHost, *sumDev; // Pointer to host & device arrays~~

~~float step = 1.0 / n; // Step size~~

~~size_t size = nblocks*ntpb * sizeof(float); //Array memory size~~

~~sumHost = (float *)malloc(size); // Allocate array on host~~

~~cudaMalloc((void **)&sumDev, size); // Allocate array on device~~

~~// Initialize array in device to 0~~

~~cudaMemset(sumDev, 0, size);~~

~~// initialization~~

~~std::srand(std::time(nullptr));~~

~~ts = steady_clock::now();~~

~~// Do calculation on device~~

~~calculate << <dimGrid, dimBlock >> > (sumDev, n, step, ntpb, nblocks); // call CUDA kernel~~

~~te = steady_clock::now();~~

~~cudaMemcpy(sumHost, sumDev, size, cudaMemcpyDeviceToHost);~~

~~for (tid = 0; tid<ntpb*nblocks; tid++)~~

~~pi += sumHost[tid];~~

~~pi *= step;~~

~~// Print results~~

~~printf("Number of iterations= %f\nPI = %f\n", n,pi);~~

~~reportTime("Pi calculation took ", te - ts);~~

~~// Cleanup~~

~~free(sumHost);~~

~~cudaFree(sumDev);~~

~~return 0;~~

}

~~</syntaxhighlight>~~

'''Results CPU vs GPU

<br>

Jpham14

96

edits

Changes

Kernal Blas

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Navigation

get involved with CDOT

courses

course projects

links

Tools