93
edits
Changes
Savy Cat
,→Initial CUDA Code
for (int j = 0; j < height; j++) {
for (int k = 0; k < width; k++) {
std::cout << std::setw(4) << (int)img[idx(k, j, w, h, i)];
}
std::cout << std::endl;
}
std::cout << "Trying to read " << filename << std::endl;
h = cimg.height();
int size = w * h * cimg.spectrum();
for (int i = 0; i < size; i++) {
img[i] = cimg[i];
__global__ void rot90(floatPX_TYPE* src, floatPX_TYPE* dst, int src_w, int src_h, int z) {
int k = blockIdx.x * blockDim.x + threadIdx.x;
// Allocate device memory for src and dst
std::cout << "Allocating device memory ..." << std::endl;
cudaMalloc((void**)&d_src, w * h * sizeof(floatPX_TYPE) * 3); cudaMalloc((void**)&d_dst, w * h * sizeof(floatPX_TYPE) * 3);
// Copy h_src to d_src
std::cout << "Copying source image to device ..." << std::endl;
cudaMemcpy(d_src, h_src, w * h * sizeof(floatPX_TYPE) * 3, cudaMemcpyHostToDevice);
// Launch grid 3 times (one grid per colour channel)
// Copy d_dst to h_dst
std::cout << "Copying rotated image to host ..." << std::endl;
cudaMemcpy(h_dst, d_dst, w * h * sizeof(floatPX_TYPE) * 3, cudaMemcpyDeviceToHost);
// Dealocate memory