Changes

BETTERRED

11,368 bytes added, 19:40, 23 March 2017

no edit summary

= Assignment 2 - Parallelize =

{| class="wikitable mw-collapsible mw-collapsed"

! Culptit - BlurImage( ... )

|-

|

#include <iostream>

#include <stdio.h>

#include <stdlib.h>

#include <stdint.h>

#include <array>

#include <vector>

#include <functional>

#include <windows.h> // for bitmap headers.

#include <algorithm>

#include <cuda_runtime.h>

// to remove intellisense highlighting

#include <device_launch_parameters.h>

#include <device_functions.h>

//#if defined(__NVCC__) && __CUDACC_VER_MAJOR__ != 1

const int ntpb = 1024;

//#elif defined(__NVCC__) &&__CUDACC_VER_MAJOR__ == 1

//const int ntpb = 512;

//#endif

const float c_pi = 3.14159265359f;

void check(cudaError_t error) {

if (error != cudaSuccess) {

throw std::exception(cudaGetErrorString(error));

}

struct SImageData

{

SImageData()

: m_width(0)

, m_height(0)

{ }

long m_width;

long m_height;

long m_pitch;

std::vector<uint8_t> m_pixels;

};

void WaitForEnter()

{

char c;

std::cout << "Press Enter key to exit ... ";

std::cin.get(c);

}

bool LoadImage(const char *fileName, SImageData& imageData)

{

// open the file if we can

FILE *file;

file = fopen(fileName, "rb");

if (!file)

return false;

// read the headers if we can

BITMAPFILEHEADER header;

BITMAPINFOHEADER infoHeader;

if (fread(&header, sizeof(header), 1, file) != 1 ||

fread(&infoHeader, sizeof(infoHeader), 1, file) != 1 ||

header.bfType != 0x4D42 || infoHeader.biBitCount != 24)

{

fclose(file);

return false;

}

// read in our pixel data if we can. Note that it's in BGR order, and width is padded to the next power of 4

imageData.m_pixels.resize(infoHeader.biSizeImage);

fseek(file, header.bfOffBits, SEEK_SET);

if (fread(&imageData.m_pixels[0], imageData.m_pixels.size(), 1, file) != 1)

{

fclose(file);

return false;

}

imageData.m_width = infoHeader.biWidth;

imageData.m_height = infoHeader.biHeight;

imageData.m_pitch = imageData.m_width * 3;

if (imageData.m_pitch & 3)

{

imageData.m_pitch &= ~3;

imageData.m_pitch += 4;

}

fclose(file);

return true;

}

bool SaveImage(const char *fileName, const SImageData &image)

{

// open the file if we can

FILE *file;

file = fopen(fileName, "wb");

if (!file)

return false;

// make the header info

BITMAPFILEHEADER header;

BITMAPINFOHEADER infoHeader;

header.bfType = 0x4D42;

header.bfReserved1 = 0;

header.bfReserved2 = 0;

header.bfOffBits = 54;

infoHeader.biSize = 40;

infoHeader.biWidth = image.m_width;

infoHeader.biHeight = image.m_height;

infoHeader.biPlanes = 1;

infoHeader.biBitCount = 24;

infoHeader.biCompression = 0;

infoHeader.biSizeImage = image.m_pixels.size();

infoHeader.biXPelsPerMeter = 0;

infoHeader.biYPelsPerMeter = 0;

infoHeader.biClrUsed = 0;

infoHeader.biClrImportant = 0;

header.bfSize = infoHeader.biSizeImage + header.bfOffBits;

// write the data and close the file

fwrite(&header, sizeof(header), 1, file);

fwrite(&infoHeader, sizeof(infoHeader), 1, file);

fwrite(&image.m_pixels[0], infoHeader.biSizeImage, 1, file);

fclose(file);

return true;

}

int PixelsNeededForSigma(float sigma)

{

// returns the number of pixels needed to represent a gaussian kernal that has values

// down to the threshold amount. A gaussian function technically has values everywhere

// on the image, but the threshold lets us cut it off where the pixels contribute to

// only small amounts that aren't as noticeable.

const float c_threshold = 0.005f; // 0.5%

return int(floor(1.0f + 2.0f * sqrtf(-2.0f * sigma * sigma * log(c_threshold)))) + 1;

}

float Gaussian(float sigma, float x)

{

return expf(-(x*x) / (2.0f * sigma*sigma));

}

float GaussianSimpsonIntegration(float sigma, float a, float b)

{

return

((b - a) / 6.0f) *

(Gaussian(sigma, a) + 4.0f * Gaussian(sigma, (a + b) / 2.0f) + Gaussian(sigma, b));

}

std::vector<float> GaussianKernelIntegrals(float sigma, int taps)

{

std::vector<float> ret;

float total = 0.0f;

for (int i = 0; i < taps; ++i)

{

float x = float(i) - float(taps / 2);

float value = GaussianSimpsonIntegration(sigma, x - 0.5f, x + 0.5f);

ret.push_back(value);

total += value;

}

// normalize it

for (unsigned int i = 0; i < ret.size(); ++i)

{

ret[i] /= total;

}

return ret;

}

const uint8_t* GetPixelOrBlack(const SImageData& image, int x, int y)

{

static const uint8_t black[3] = { 0, 0, 0 };

if (x < 0 || x >= image.m_width ||

y < 0 || y >= image.m_height)

{

return black;

}

return &image.m_pixels[(y * image.m_pitch) + x * 3];

}

__global__ void horizontal_blur_kernel(float* pixels, float* output, float* intergrals, int nIntegrals, int width, int height, int pitch) {

//int p = pitch;

//int x = width;

//int y = height;

//int n = nIntegrals;

int idy = blockIdx.x*blockDim.x + threadIdx.x;

int idx = blockIdx.y*blockDim.y + threadIdx.y;

//int startOffset = -1 * int(nIntegrals / 2);

//float* dst;

//const float* pixel;

//const float black[3] = { 0.0f, 0.0f, 0.0f };

//float blurred_pixel[3] = { 0.0f, 0.0f, 0.0f };

//for (int i = 0; i < n; ++i) { // Prefetch for integrals and pixels

// int ty = y + startOffset + i;

// pixel = (idx < 0 || idx >= x ||

// idy < 0 || idy >= ty) ? black : &pixels[(ty * p) + idx * 3];

// blurred_pixel[0] += pixel[0] * intergrals[i];

// blurred_pixel[1] += pixel[1] * intergrals[i];

// blurred_pixel[2] += pixel[2] * intergrals[i];

//}

//dst = &output[idy*p + idx * 3];

//dst[0] = blurred_pixel[0];

//dst[1] = blurred_pixel[1];

//dst[2] = blurred_pixel[2];

//if (idx == 0) {

output[idx*width + idy] = pixels[idx*width + idy];

//if (idx % 3 == 0) {

// output[idx + idy*width] = 0;

//}

}

void BlurImage(const SImageData& srcImage, SImageData &destImage, float xblursigma, float yblursigma, unsigned int xblursize, unsigned int yblursize)

{

float* d_ipixels; // Device input pixel array

float* d_opixels; // Device output pixel array

float* d_integrals; // Stores guassian kernel intergrals

int n = srcImage.m_height*srcImage.m_pitch;

int nblks = (n + ntpb - 1) / ntpb;

dim3 dimBlock(1, 3);

dim3 dimGrid(srcImage.m_width*3, srcImage.m_height);

check(cudaMalloc((void**)&d_ipixels, srcImage.m_pitch*srcImage.m_height * sizeof(float)));

check(cudaMalloc((void**)&d_opixels, srcImage.m_pitch*srcImage.m_height*sizeof(float)));

{

std::vector<float> temp(srcImage.m_pixels.size());

std::transform(srcImage.m_pixels.begin(), srcImage.m_pixels.end(), temp.begin(), [](auto e) {

return e / 255.0f;

});

check(cudaMemcpy(d_ipixels, temp.data(), 3 * srcImage.m_width*srcImage.m_height * sizeof(float), cudaMemcpyHostToDevice));

}

// horizontal blur from d_ipixels to d_opixels

{

int nIntegrals;

// Scoped so that the row is cleared once it's copied

{

auto row = GaussianKernelIntegrals(xblursigma, xblursize);

nIntegrals = row.size();

check(cudaMalloc((void**)&d_integrals, row.size() * sizeof(float)));

check(cudaMemcpy(d_integrals, row.data(), row.size() * sizeof(float), cudaMemcpyHostToDevice));

}

horizontal_blur_kernel <<<dimGrid, dimBlock >>> (d_ipixels, d_opixels, d_integrals, nIntegrals, srcImage.m_width, srcImage.m_height, srcImage.m_pitch);

cudaDeviceSynchronize();

check(cudaGetLastError());

check(cudaFree(d_integrals));

}

destImage.m_width = srcImage.m_width;

destImage.m_height = srcImage.m_height;

destImage.m_pitch = srcImage.m_pitch;

destImage.m_pixels.resize(destImage.m_height * destImage.m_pitch);

{

std::vector<float> temp(srcImage.m_pixels.size());

check(cudaMemcpy(temp.data(), d_opixels, 3 * srcImage.m_width*srcImage.m_height * sizeof(float), cudaMemcpyDeviceToHost));

std::transform(temp.begin(), temp.end(), destImage.m_pixels.begin(), [](auto e) {

return (int)(e * 255.0f);

});

}

check(cudaFree(d_ipixels));

check(cudaFree(d_opixels));

check(cudaDeviceReset());

//// allocate space for copying the image for destImage and tmpImage

//destImage.m_width = srcImage.m_width;

//destImage.m_height = srcImage.m_height;

//destImage.m_pitch = srcImage.m_pitch;

//destImage.m_pixels.resize(destImage.m_height * destImage.m_pitch);

//SImageData tmpImage;

//tmpImage.m_width = srcImage.m_width;

//tmpImage.m_height = srcImage.m_height;

//tmpImage.m_pitch = srcImage.m_pitch;

//tmpImage.m_pixels.resize(tmpImage.m_height * tmpImage.m_pitch);

//// horizontal blur from srcImage into tmpImage

//{

// auto row = GaussianKernelIntegrals(xblursigma, xblursize);

// int startOffset = -1 * int(row.size() / 2);

// for (int y = 0; y < tmpImage.m_height; ++y)

// {

// for (int x = 0; x < tmpImage.m_width; ++x)

// {

// std::array<float, 3> blurredPixel = { { 0.0f, 0.0f, 0.0f } };

// for (unsigned int i = 0; i < row.size(); ++i)

// {

// const uint8_t *pixel = GetPixelOrBlack(srcImage, x + startOffset + i, y);

// blurredPixel[0] += float(pixel[0]) * row[i];

// blurredPixel[1] += float(pixel[1]) * row[i];

// blurredPixel[2] += float(pixel[2]) * row[i];

// }

// uint8_t *destPixel = &tmpImage.m_pixels[y * tmpImage.m_pitch + x * 3];

// destPixel[0] = uint8_t(blurredPixel[0]);

// destPixel[1] = uint8_t(blurredPixel[1]);

// destPixel[2] = uint8_t(blurredPixel[2]);

// }

//}

//// vertical blur from tmpImage into destImage

//{

// auto row = GaussianKernelIntegrals(yblursigma, yblursize);

// int startOffset = -1 * int(row.size() / 2);

// for (int y = 0; y < destImage.m_height; ++y)

// {

// for (int x = 0; x < destImage.m_width; ++x)

// {

// std::array<float, 3> blurredPixel = { { 0.0f, 0.0f, 0.0f } };

// for (unsigned int i = 0; i < row.size(); ++i)

// {

// const uint8_t *pixel = GetPixelOrBlack(tmpImage, x, y + startOffset + i);

// blurredPixel[0] += float(pixel[0]) * row[i];

// blurredPixel[1] += float(pixel[1]) * row[i];

// blurredPixel[2] += float(pixel[2]) * row[i];

// }

// uint8_t *destPixel = &destImage.m_pixels[y * destImage.m_pitch + x * 3];

// destPixel[0] = uint8_t(blurredPixel[0]);

// destPixel[1] = uint8_t(blurredPixel[1]);

// destPixel[2] = uint8_t(blurredPixel[2]);

// }

//}

}

int main(int argc, char **argv)

{

float xblursigma, yblursigma;

bool showUsage = argc < 5 ||

(sscanf(argv[3], "%f", &xblursigma) != 1) ||

(sscanf(argv[4], "%f", &yblursigma) != 1);

char *srcFileName = argv[1];

char *destFileName = argv[2];

if (showUsage)

{

printf("Usage: <source> <dest> <xblur> <yblur>\nBlur values are sigma\n\n");

WaitForEnter();

return 1;

}

// calculate pixel sizes, and make sure they are odd

int xblursize = PixelsNeededForSigma(xblursigma) | 1;

int yblursize = PixelsNeededForSigma(yblursigma) | 1;

printf("Attempting to blur a 24 bit image.\n");

printf(" Source=%s\n Dest=%s\n blur=[%0.1f, %0.1f] px=[%d,%d]\n\n", srcFileName, destFileName, xblursigma, yblursigma, xblursize, yblursize);

SImageData srcImage;

if (LoadImage(srcFileName, srcImage))

{

printf("%s loaded\n", srcFileName);

SImageData destImage;

BlurImage(srcImage, destImage, xblursigma, yblursigma, xblursize, yblursize);

if (SaveImage(destFileName, destImage))

printf("Blurred image saved as %s\n", destFileName);

else

{

printf("Could not save blurred image as %s\n", destFileName);

WaitForEnter();

return 1;

}

else

{

printf("could not read 24 bit bmp file %s\n\n", srcFileName);

WaitForEnter();

return 1;

}

return 0;

}

</syntaxhighlight>

|}

= Assignment 3 - Optimize =

Jkraitberg

49

edits

Changes

BETTERRED

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Navigation

get involved with CDOT

courses

course projects

links

Tools