49
edits
Changes
→Assignment 2 - Parallelize
{| class="wikitable mw-collapsible mw-collapsed"
! Culptit Unoptimized - BlurImage( ... )
|-
|
#include <windows.h> // for bitmap headers.
#include <algorithm>
#include <chrono>
#include <cuda_runtime.h>
#include <device_functions.h>
//#if defined(__NVCC__) && __CUDACC_VER_MAJOR__ != 1const int ntpb = 1024;ifdef __CUDACC__//#elif defined(__NVCC__) &&if __CUDACC_VER_MAJOR__ == 1
//const int ntpb = 512;
//#else
//const int ntpb = 1024;
//#endif
//#endifconst float c_pi int ntpb = 3.14159265359f1024;const int STREAMS = 32;
void check(cudaError_t error) {
}
__global__ void horizontal_blur_kernelblur_kernel(floatBGRPixel* pixelsimageIn, floatBGRPixel* outputimageOut, float* intergralsblur, int nIntegralsn_blur, int widthx, int heightstart, int pitchjump) { //int p idx = pitch; //int x = width; //int y = height; //int n = nIntegrals; int idy = blockIdxblockDim.x*blockDimblockIdx.x + threadIdx.x; int idx = blockIdx.y*blockDim.y + threadIdx.y; //int startOffset = -1 * int(nIntegrals / 2);Location on the row
}
void BlurImage(const SImageData& srcImage, SImageData &destImage, float xblursigma, float yblursigma, unsigned int xblursize, unsigned int yblursize)
{
int n xPadded = srcImage.m_height*srcImage.m_pitchxImage + (xblursize - 1); // Width including padding int nblks yPadded = yImage + (n + ntpb yblursize - 1) ; // ntpbHeight including padding int paddedSize = xPadded*yPadded;
// horizontal blur from d_ipixels to d_opixelsAllocate memory for host and device { check(cudaHostAlloc((void**)&pinnedImage, 3 * imageSize * sizeof(float), 0)); int nIntegrals check(cudaMalloc((void**)&d_padded1, paddedSize * sizeof(BGRPixel))); check(cudaMalloc((void**)&d_padded2, paddedSize * sizeof(BGRPixel)));
}
for (int i = 0; i < STREAMS; ++i) {
check(cudaStreamCreate(&stream[i]));
}
}
printf("%s loaded\n", srcFileName);
SImageData destImage;
auto t1 = std::chrono::high_resolution_clock::now();
BlurImage(srcImage, destImage, xblursigma, yblursigma, xblursize, yblursize);
auto t2 = std::chrono::high_resolution_clock::now();
std::cout << "BlurImage time: " << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count() << "us" << std::endl;
if (SaveImage(destFileName, destImage))
printf("Blurred image saved as %s\n", destFileName);