93
edits
Changes
Savy Cat
,→Profiling With Nsight
// Allocate device memory for src and dst
std::cout << "Allocating device memory ..." << std::endl;
cudaMalloc((void**)&d_src, w * h * sizeof(floatPX_TYPE) * 3); cudaMalloc((void**)&d_dst, w * h * sizeof(floatPX_TYPE) * 3);
// Copy h_src to d_src
std::cout << "Copying source image to device ..." << std::endl;
cudaMemcpy(d_src, h_src, w * h * sizeof(floatPX_TYPE) * 3, cudaMemcpyHostToDevice);
// Rotate image 6 x 2 times, copying result back to host each time
// Copy d_dst to h_dst
std::cout << "Copying result to host ..." << std::endl;
cudaMemcpy(h_dst, d_dst, w * h * sizeof(floatPX_TYPE) * 3, cudaMemcpyDeviceToHost);
// Rotate again
// Copy d_src to h_src
cudaMemcpy(h_src, d_src, w * h * sizeof(floatPX_TYPE) * 3, cudaMemcpyDeviceToHost);
std::cout << "Copying result to host ..." << std::endl;
}