Changes

The B-Team

1,437 bytes removed, 18:20, 30 November 2012

→‎Code

I'm only going to post the kernels that changed. There were also many changes in the host code that helped make the program faster.

// part of common.h

const int BLOCK_SIZE_X = 16;

const int BLOCK_SIZE_Y = 8;

const int BLOCK_SIZE_SSAA = 256;

const int BLOCK_SIZE_RGB = 16;

const int MAX_GRID_SIZE_X = 65536;

const uint8_t MAX_ALIASING_FACTOR = 16;

</pre></big>

// part of main.cpp

cudaError_t error;

int iDevice;

cudaDeviceProp prop;

// Get device information for total global memory

error = cudaGetDevice(&iDevice);

if(error != cudaSuccess)

displayCudeError(error);

error = cudaGetDeviceProperties(&prop, iDevice);

if(error != cudaSuccess)

displayCudeError(error);

// The max amount to do per pass demends on the size of GPU memory and the size of unsigned integer.

// Global memory is devided by two so that both the value array and RGB array can both fit in memory.

DimensionSqType maxPixelsPerPass = (UINT_MAX > (prop.totalGlobalMem / 2)) ? (prop.totalGlobalMem / 2) : UINT_MAX;

// RGB + alpha is 4 BYTEs. Make sure two copies of the larger can fit in device memory.

DimensionType largerType = ((4 * sizeof(BYTE)) < sizeof(ElementType)) ? sizeof(ElementType) : (4 * sizeof(BYTE));

// Divide by two for extra safty.

maxPixelsPerPass /= (largerType * 2);

</pre></big>

__device__ ElementType mandelbrot(ElementType c_i, ElementType c_r, IterationType iterations)

{

ElementType z_r = c_r;

ElementType z_i = c_i;

ElementType z2_r = z_r * z_r;

ElementType z2_i = z_i * z_i;

IterationType n = 0;

while(n < iterations && z2_r + z2_i < 4.0)

{

z_i = 2.0 * z_r * z_i + c_i;

z_r = z2_r - z2_i + c_r;

z2_r = z_r * z_r;

z2_i = z_i * z_i;

n++;

}

z_i = 2.0 * z_r * z_i + c_i;

z_r = z2_r - z2_i + c_r;

z2_r = z_r * z_r;

z2_i = z_i * z_i;

z_i = 2.0 * z_r * z_i + c_i;

z_r = z2_r - z2_i + c_r;

z2_r = z_r * z_r;

z2_i = z_i * z_i;

n += 2;

if(n > iterations)

{

return (ElementType)iterations;

}

else

{

return (ElementType)n + 1.0 - __logf(__logf(__dsqrt_rn(z2_r + z2_i)))/__logf(2.0);;

}

</pre></big>

// Return number of iterations.

__global__ void getFractal(ElementType* img, ElementType yMax, ElementType xMin, ElementType xScale, ElementType yScale, IterationType iterations, DimensionType width, DimensionType height)

{

DimensionType dx = blockIdx.x * BLOCK_SIZE_X + threadIdx.x;

DimensionType dy = blockIdx.y * BLOCK_SIZE_Y + threadIdx.y;

if(dx >= width || dy >= height)

return; ~~// This is fine because so few registers are used~~

img[(DimensionSqType)dy * (DimensionSqType)width + (DimensionSqType)dx] = mandelbrot(yMax - (ElementType)dy * yScale,

xMin + (ElementType)dx * xScale,

iterations);

}

</pre></big>

// Return number of iterations.

__global__ void getFractalSSAA(ElementType* img, DimensionSqType* list, DimensionSqType length, ElementType yMax, ElementType xMin,

ElementType xScale, ElementType yScale, IterationType iterations,

DimensionType width, AlisingFactorType ssaafactor)

{

DimensionType curr = blockIdx.x * BLOCK_SIZE_SSAA + threadIdx.x;

if(curr >= length)

return;

DimensionSqType val = list[curr];

ElementType xSubScale = xScale / ((ElementType)ssaafactor);

ElementType ySubScale = yScale / ((ElementType)ssaafactor);

// Get the centre of the top left subpixel

xMin = xMin + (ElementType)(val % width) * xScale - (xScale / 2.0) + (xSubScale / 2.0);

yMax = yMax - (ElementType)(val / width) * yScale + (yScale / 2.0) - (ySubScale / 2.0);

// Get the values for each pixel in fractal

ElementType subpixels[MAX_ALIASING_FACTOR * MAX_ALIASING_FACTOR];

for(AlisingFactorType x = 0; x < ssaafactor; x++)

{

for(AlisingFactorType y = 0; y < ssaafactor; y++)

{

subpixels[x * ssaafactor + y] = mandelbrot(yMax - ySubScale * y , xMin + xSubScale * x, iterations);

}

AlisingFactorSqType factor2 = (AlisingFactorSqType)ssaafactor * (AlisingFactorSqType)ssaafactor;

if(factor2 % 2 != 0)

{

img[val] = getMedian(subpixels, (AlisingFactorSqType)ssaafactor * (AlisingFactorSqType)ssaafactor / 2, factor2);

}

else

{

img[val] = (getMedian(subpixels, factor2 / 2 - 1, factor2)

+ getMedian(subpixels, factor2 / 2, factor2))

/ 2.0;

}

</pre></big>

~~<big><pre>~~

~~__global__ void getBmpRGB(BYTE* image, ElementType* values, DimensionType width, DimensionType height, IterationType iterations)~~

{

~~DimensionType dy = blockIdx.y * BLOCK_SIZE + threadIdx.y;~~

~~DimensionType dx = blockIdx.x * BLOCK_SIZE + threadIdx.x;~~

~~if(dx >= width || dy >= height)~~

~~return;~~

~~DimensionType c = dy * width + dx;~~

~~BYTE rgbValue[3];~~

~~getRGB(values[c]/(ElementType)iterations, rgbValue);~~

~~image[c*3] = rgbValue[2];~~

~~image[c*3 + 1] = rgbValue[1];~~

~~image[c*3 + 2] = rgbValue[0];~~

}

~~</pre></big>~~

~~<big><pre>~~

~~__global__ void getBmpRGBfromHistorgram(ElementType* map, BYTE* image, ElementType* values, DimensionType width, DimensionType height)~~

{

~~DimensionType dy = blockIdx.y * BLOCK_SIZE + threadIdx.y;~~

~~DimensionType dx = blockIdx.x * BLOCK_SIZE + threadIdx.x;~~

~~if(dx >= width || dy >= height)~~

~~return;~~

~~DimensionType c = dy * width + dx;~~

~~IterationType ival = (IterationType)values[c];~~

~~ElementType colourVal = map[ival] + (values[c] - (ElementType)ival) * (map[ival + 1] - map[ival]);~~

~~BYTE rgbValue[3];~~

~~getRGB(colourVal, rgbValue);~~

~~image[c*3] = rgbValue[2];~~

~~image[c*3 + 1] = rgbValue[1];~~

~~image[c*3 + 2] = rgbValue[0];~~

}

~~</pre></big>~~

= Assignment 2 =

Kmbarnhart

1

edit

CDOT Wiki β

Changes

The B-Team

CDOT Wiki ^β