Changes

Jump to: navigation, search

BETTERRED

11,368 bytes added, 18:40, 23 March 2017
no edit summary
= Assignment 2 - Parallelize =
 
{| class="wikitable mw-collapsible mw-collapsed"
! Culptit - BlurImage( ... )
|-
|
<syntaxhighlight lang="cpp">
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <array>
#include <vector>
#include <functional>
#include <windows.h> // for bitmap headers.
#include <algorithm>
 
#include <cuda_runtime.h>
// to remove intellisense highlighting
#include <device_launch_parameters.h>
#include <device_functions.h>
 
//#if defined(__NVCC__) && __CUDACC_VER_MAJOR__ != 1
const int ntpb = 1024;
//#elif defined(__NVCC__) &&__CUDACC_VER_MAJOR__ == 1
//const int ntpb = 512;
//#endif
 
const float c_pi = 3.14159265359f;
 
void check(cudaError_t error) {
if (error != cudaSuccess) {
throw std::exception(cudaGetErrorString(error));
}
}
 
struct SImageData
{
SImageData()
: m_width(0)
, m_height(0)
{ }
 
long m_width;
long m_height;
long m_pitch;
std::vector<uint8_t> m_pixels;
};
 
void WaitForEnter()
{
char c;
std::cout << "Press Enter key to exit ... ";
std::cin.get(c);
}
 
bool LoadImage(const char *fileName, SImageData& imageData)
{
// open the file if we can
FILE *file;
file = fopen(fileName, "rb");
if (!file)
return false;
 
// read the headers if we can
BITMAPFILEHEADER header;
BITMAPINFOHEADER infoHeader;
if (fread(&header, sizeof(header), 1, file) != 1 ||
fread(&infoHeader, sizeof(infoHeader), 1, file) != 1 ||
header.bfType != 0x4D42 || infoHeader.biBitCount != 24)
{
fclose(file);
return false;
}
 
// read in our pixel data if we can. Note that it's in BGR order, and width is padded to the next power of 4
imageData.m_pixels.resize(infoHeader.biSizeImage);
fseek(file, header.bfOffBits, SEEK_SET);
if (fread(&imageData.m_pixels[0], imageData.m_pixels.size(), 1, file) != 1)
{
fclose(file);
return false;
}
 
imageData.m_width = infoHeader.biWidth;
imageData.m_height = infoHeader.biHeight;
 
imageData.m_pitch = imageData.m_width * 3;
if (imageData.m_pitch & 3)
{
imageData.m_pitch &= ~3;
imageData.m_pitch += 4;
}
 
fclose(file);
return true;
}
 
bool SaveImage(const char *fileName, const SImageData &image)
{
// open the file if we can
FILE *file;
file = fopen(fileName, "wb");
if (!file)
return false;
 
// make the header info
BITMAPFILEHEADER header;
BITMAPINFOHEADER infoHeader;
 
header.bfType = 0x4D42;
header.bfReserved1 = 0;
header.bfReserved2 = 0;
header.bfOffBits = 54;
 
infoHeader.biSize = 40;
infoHeader.biWidth = image.m_width;
infoHeader.biHeight = image.m_height;
infoHeader.biPlanes = 1;
infoHeader.biBitCount = 24;
infoHeader.biCompression = 0;
infoHeader.biSizeImage = image.m_pixels.size();
infoHeader.biXPelsPerMeter = 0;
infoHeader.biYPelsPerMeter = 0;
infoHeader.biClrUsed = 0;
infoHeader.biClrImportant = 0;
 
header.bfSize = infoHeader.biSizeImage + header.bfOffBits;
 
// write the data and close the file
fwrite(&header, sizeof(header), 1, file);
fwrite(&infoHeader, sizeof(infoHeader), 1, file);
fwrite(&image.m_pixels[0], infoHeader.biSizeImage, 1, file);
fclose(file);
return true;
}
 
int PixelsNeededForSigma(float sigma)
{
// returns the number of pixels needed to represent a gaussian kernal that has values
// down to the threshold amount. A gaussian function technically has values everywhere
// on the image, but the threshold lets us cut it off where the pixels contribute to
// only small amounts that aren't as noticeable.
const float c_threshold = 0.005f; // 0.5%
return int(floor(1.0f + 2.0f * sqrtf(-2.0f * sigma * sigma * log(c_threshold)))) + 1;
}
 
float Gaussian(float sigma, float x)
{
return expf(-(x*x) / (2.0f * sigma*sigma));
}
 
float GaussianSimpsonIntegration(float sigma, float a, float b)
{
return
((b - a) / 6.0f) *
(Gaussian(sigma, a) + 4.0f * Gaussian(sigma, (a + b) / 2.0f) + Gaussian(sigma, b));
}
 
std::vector<float> GaussianKernelIntegrals(float sigma, int taps)
{
std::vector<float> ret;
float total = 0.0f;
for (int i = 0; i < taps; ++i)
{
float x = float(i) - float(taps / 2);
float value = GaussianSimpsonIntegration(sigma, x - 0.5f, x + 0.5f);
ret.push_back(value);
total += value;
}
// normalize it
for (unsigned int i = 0; i < ret.size(); ++i)
{
ret[i] /= total;
}
return ret;
}
 
const uint8_t* GetPixelOrBlack(const SImageData& image, int x, int y)
{
static const uint8_t black[3] = { 0, 0, 0 };
if (x < 0 || x >= image.m_width ||
y < 0 || y >= image.m_height)
{
return black;
}
 
return &image.m_pixels[(y * image.m_pitch) + x * 3];
}
 
__global__ void horizontal_blur_kernel(float* pixels, float* output, float* intergrals, int nIntegrals, int width, int height, int pitch) {
//int p = pitch;
//int x = width;
//int y = height;
//int n = nIntegrals;
int idy = blockIdx.x*blockDim.x + threadIdx.x;
int idx = blockIdx.y*blockDim.y + threadIdx.y;
//int startOffset = -1 * int(nIntegrals / 2);
 
//float* dst;
//const float* pixel;
//const float black[3] = { 0.0f, 0.0f, 0.0f };
//float blurred_pixel[3] = { 0.0f, 0.0f, 0.0f };
 
//for (int i = 0; i < n; ++i) { // Prefetch for integrals and pixels
// int ty = y + startOffset + i;
// pixel = (idx < 0 || idx >= x ||
// idy < 0 || idy >= ty) ? black : &pixels[(ty * p) + idx * 3];
 
// blurred_pixel[0] += pixel[0] * intergrals[i];
// blurred_pixel[1] += pixel[1] * intergrals[i];
// blurred_pixel[2] += pixel[2] * intergrals[i];
//}
 
//dst = &output[idy*p + idx * 3];
//dst[0] = blurred_pixel[0];
//dst[1] = blurred_pixel[1];
//dst[2] = blurred_pixel[2];
 
//if (idx == 0) {
output[idx*width + idy] = pixels[idx*width + idy];
 
//if (idx % 3 == 0) {
// output[idx + idy*width] = 0;
//}
//}
}
 
void BlurImage(const SImageData& srcImage, SImageData &destImage, float xblursigma, float yblursigma, unsigned int xblursize, unsigned int yblursize)
{
float* d_ipixels; // Device input pixel array
float* d_opixels; // Device output pixel array
float* d_integrals; // Stores guassian kernel intergrals
 
int n = srcImage.m_height*srcImage.m_pitch;
int nblks = (n + ntpb - 1) / ntpb;
 
dim3 dimBlock(1, 3);
dim3 dimGrid(srcImage.m_width*3, srcImage.m_height);
 
check(cudaMalloc((void**)&d_ipixels, srcImage.m_pitch*srcImage.m_height * sizeof(float)));
check(cudaMalloc((void**)&d_opixels, srcImage.m_pitch*srcImage.m_height*sizeof(float)));
 
{
std::vector<float> temp(srcImage.m_pixels.size());
std::transform(srcImage.m_pixels.begin(), srcImage.m_pixels.end(), temp.begin(), [](auto e) {
return e / 255.0f;
});
 
check(cudaMemcpy(d_ipixels, temp.data(), 3 * srcImage.m_width*srcImage.m_height * sizeof(float), cudaMemcpyHostToDevice));
}
 
// horizontal blur from d_ipixels to d_opixels
{
int nIntegrals;
 
// Scoped so that the row is cleared once it's copied
{
auto row = GaussianKernelIntegrals(xblursigma, xblursize);
nIntegrals = row.size();
 
check(cudaMalloc((void**)&d_integrals, row.size() * sizeof(float)));
check(cudaMemcpy(d_integrals, row.data(), row.size() * sizeof(float), cudaMemcpyHostToDevice));
}
 
horizontal_blur_kernel <<<dimGrid, dimBlock >>> (d_ipixels, d_opixels, d_integrals, nIntegrals, srcImage.m_width, srcImage.m_height, srcImage.m_pitch);
 
cudaDeviceSynchronize();
check(cudaGetLastError());
 
check(cudaFree(d_integrals));
}
 
destImage.m_width = srcImage.m_width;
destImage.m_height = srcImage.m_height;
destImage.m_pitch = srcImage.m_pitch;
destImage.m_pixels.resize(destImage.m_height * destImage.m_pitch);
 
{
std::vector<float> temp(srcImage.m_pixels.size());
check(cudaMemcpy(temp.data(), d_opixels, 3 * srcImage.m_width*srcImage.m_height * sizeof(float), cudaMemcpyDeviceToHost));
std::transform(temp.begin(), temp.end(), destImage.m_pixels.begin(), [](auto e) {
return (int)(e * 255.0f);
});
}
 
check(cudaFree(d_ipixels));
check(cudaFree(d_opixels));
check(cudaDeviceReset());
 
 
//// allocate space for copying the image for destImage and tmpImage
//destImage.m_width = srcImage.m_width;
//destImage.m_height = srcImage.m_height;
//destImage.m_pitch = srcImage.m_pitch;
//destImage.m_pixels.resize(destImage.m_height * destImage.m_pitch);
 
//SImageData tmpImage;
//tmpImage.m_width = srcImage.m_width;
//tmpImage.m_height = srcImage.m_height;
//tmpImage.m_pitch = srcImage.m_pitch;
//tmpImage.m_pixels.resize(tmpImage.m_height * tmpImage.m_pitch);
 
//// horizontal blur from srcImage into tmpImage
//{
// auto row = GaussianKernelIntegrals(xblursigma, xblursize);
 
// int startOffset = -1 * int(row.size() / 2);
 
// for (int y = 0; y < tmpImage.m_height; ++y)
// {
// for (int x = 0; x < tmpImage.m_width; ++x)
// {
// std::array<float, 3> blurredPixel = { { 0.0f, 0.0f, 0.0f } };
// for (unsigned int i = 0; i < row.size(); ++i)
// {
// const uint8_t *pixel = GetPixelOrBlack(srcImage, x + startOffset + i, y);
// blurredPixel[0] += float(pixel[0]) * row[i];
// blurredPixel[1] += float(pixel[1]) * row[i];
// blurredPixel[2] += float(pixel[2]) * row[i];
// }
 
// uint8_t *destPixel = &tmpImage.m_pixels[y * tmpImage.m_pitch + x * 3];
 
// destPixel[0] = uint8_t(blurredPixel[0]);
// destPixel[1] = uint8_t(blurredPixel[1]);
// destPixel[2] = uint8_t(blurredPixel[2]);
// }
// }
//}
 
//// vertical blur from tmpImage into destImage
//{
// auto row = GaussianKernelIntegrals(yblursigma, yblursize);
 
// int startOffset = -1 * int(row.size() / 2);
 
// for (int y = 0; y < destImage.m_height; ++y)
// {
// for (int x = 0; x < destImage.m_width; ++x)
// {
// std::array<float, 3> blurredPixel = { { 0.0f, 0.0f, 0.0f } };
// for (unsigned int i = 0; i < row.size(); ++i)
// {
// const uint8_t *pixel = GetPixelOrBlack(tmpImage, x, y + startOffset + i);
// blurredPixel[0] += float(pixel[0]) * row[i];
// blurredPixel[1] += float(pixel[1]) * row[i];
// blurredPixel[2] += float(pixel[2]) * row[i];
// }
 
// uint8_t *destPixel = &destImage.m_pixels[y * destImage.m_pitch + x * 3];
 
// destPixel[0] = uint8_t(blurredPixel[0]);
// destPixel[1] = uint8_t(blurredPixel[1]);
// destPixel[2] = uint8_t(blurredPixel[2]);
// }
// }
//}
}
 
int main(int argc, char **argv)
{
float xblursigma, yblursigma;
 
bool showUsage = argc < 5 ||
(sscanf(argv[3], "%f", &xblursigma) != 1) ||
(sscanf(argv[4], "%f", &yblursigma) != 1);
 
char *srcFileName = argv[1];
char *destFileName = argv[2];
 
if (showUsage)
{
printf("Usage: <source> <dest> <xblur> <yblur>\nBlur values are sigma\n\n");
WaitForEnter();
return 1;
}
 
// calculate pixel sizes, and make sure they are odd
int xblursize = PixelsNeededForSigma(xblursigma) | 1;
int yblursize = PixelsNeededForSigma(yblursigma) | 1;
 
printf("Attempting to blur a 24 bit image.\n");
printf(" Source=%s\n Dest=%s\n blur=[%0.1f, %0.1f] px=[%d,%d]\n\n", srcFileName, destFileName, xblursigma, yblursigma, xblursize, yblursize);
 
SImageData srcImage;
if (LoadImage(srcFileName, srcImage))
{
printf("%s loaded\n", srcFileName);
SImageData destImage;
BlurImage(srcImage, destImage, xblursigma, yblursigma, xblursize, yblursize);
if (SaveImage(destFileName, destImage))
printf("Blurred image saved as %s\n", destFileName);
else
{
printf("Could not save blurred image as %s\n", destFileName);
WaitForEnter();
return 1;
}
}
else
{
printf("could not read 24 bit bmp file %s\n\n", srcFileName);
WaitForEnter();
return 1;
}
return 0;
}
</syntaxhighlight>
 
|}
 
= Assignment 3 - Optimize =
49
edits

Navigation menu