Difference between revisions of "The B-Team"

From CDOT Wiki
Jump to: navigation, search
Line 1: Line 1:
 
{{GPU610/DPS915 Index | 20123}}
 
{{GPU610/DPS915 Index | 20123}}
 
= The B-Team =
 
= The B-Team =
 +
 
== Team Members ==  
 
== Team Members ==  
 
# [mailto:kmbarnhart@learn.senecac.on.ca?sujbect=DPS915 Kyle Barnhart]
 
# [mailto:kmbarnhart@learn.senecac.on.ca?sujbect=DPS915 Kyle Barnhart]
  
 +
= Assignment 2 =
 +
== Mandelbrot Set ==
 +
=== Repo ===
 +
https://github.com/KyleBarnhart/Fractal/tree/cuda
 +
 +
=== Description ===
 +
The new version of my fractal program is running on the GPU and has been highly parallelized. It is running more than ten times faster than the original version. I am sure this is highly GPU dependent and I am running it on an Nvidia Geforce GTA 660 Ti with 1344 cuda cores. However I think the comparison is fair because my CPU is also very fast. It is an Intel i5 2500k which is 4 cores at 3.3 Ghz stock and is running overclocked to 4.6 Ghz. A slower GPU compared to a slower CPU might be able to expect similar speed ups.
 +
 +
Parallelizing the main Mandelbrot function ended up requiring me to move a few other functions to the GPU and rewriting the anti-aliasing function.  The anti-aliasing previously made many calls to the Mandelbrot function. This was not good for the GPU and I changed it so that it makes a list of points to run the anti-aliasing algorithm over. Another issue with the anti-aliasing was selection the right value to return. I previously found the best quality result is to return the median value. The anti-aliasing required its own Mandelbrot function as well as moving the median function to the device. I also move the function that converts Mandelbrot escape values to RGB values to the device. This was because I found that I could account for half the running time depending on settings.
 +
 +
=== Code ===
 +
<big><pre>
 +
__device__ void swap(ElementType* a, ElementType* b)
 +
{
 +
  ElementType t = *a;
 +
  *a = *b;
 +
  *b = t;
 +
}
 +
</pre></big>
 +
<big><pre>
 +
__device__ ElementType getMedian(ElementType* arr, AlisingFactorSqType median, AlisingFactorSqType n)
 +
{
 +
    AlisingFactorSqType low, high ;
 +
    AlisingFactorSqType middle, ll, hh;
 +
 +
    low = 0 ; high = n-1 ;
 +
    for (;;) {
 +
        if (high <= low) /* One element only */
 +
            return arr[median] ;
 +
 +
        if (high == low + 1) {  /* Two elements only */
 +
            if (arr[low] > arr[high])
 +
                swap(&arr[low], &arr[high]) ;
 +
            return arr[median];
 +
        }
 +
 +
      /* Find median of low, middle and high items; swap into position low */
 +
      middle = (low + high) / 2;
 +
      if (arr[middle] > arr[high])    swap(&arr[middle], &arr[high]) ;
 +
      if (arr[low] > arr[high])      swap(&arr[low], &arr[high]) ;
 +
      if (arr[middle] > arr[low])    swap(&arr[middle], &arr[low]) ;
 +
 +
      /* Swap low item (now in position middle) into position (low+1) */
 +
      swap(&arr[middle], &arr[low+1]) ;
 +
 +
      /* Nibble from each end towards middle, swapping items when stuck */
 +
      ll = low + 1;
 +
      hh = high;
 +
      for (;;) {
 +
          do ll++; while (arr[low] > arr[ll]) ;
 +
          do hh--; while (arr[hh]  > arr[low]) ;
 +
 +
          if (hh < ll)
 +
          break;
 +
 +
          swap(&arr[ll], &arr[hh]) ;
 +
      }
 +
 +
      /* Swap middle item (in position low) back into correct position */
 +
      swap(&arr[low], &arr[hh]) ;
 +
 +
      /* Re-set active partition */
 +
      if (hh <= median)
 +
          low = ll;
 +
      if (hh >= median)
 +
          high = hh - 1;
 +
    }
 +
}
 +
</pre></big>
 +
<big><pre>
 +
__device__ ElementType mandelbrot(ElementType c_i, ElementType c_r, IterationType iterations)
 +
{
 +
  ElementType z_r = c_r;
 +
  ElementType z_i = c_i;
 +
 
 +
  ElementType z2_r = z_r * z_r;
 +
  ElementType z2_i = z_i * z_i;
 +
 
 +
  IterationType n = 0;
 +
 
 +
  while(n < iterations && z2_r + z2_i < 4.0)
 +
  {         
 +
      z_i = 2.0 * z_r * z_i + c_i;
 +
      z_r = z2_r - z2_i + c_r;
 +
 
 +
      z2_r = z_r * z_r;
 +
      z2_i = z_i * z_i;
 +
     
 +
      n++;
 +
  }
 +
 +
  z_i = 2.0 * z_r * z_i + c_i;
 +
  z_r = z2_r - z2_i + c_r;
 +
 
 +
  z2_r = z_r * z_r;
 +
  z2_i = z_i * z_i;
 +
 +
  z_i = 2.0 * z_r * z_i + c_i;
 +
  z_r = z2_r - z2_i + c_r;
 +
 
 +
  z2_r = z_r * z_r;
 +
  z2_i = z_i * z_i;
 +
     
 +
  n += 2;
 +
 +
  if(n > iterations)
 +
  {
 +
      return (ElementType)iterations;
 +
  }
 +
  else
 +
  {
 +
      return (ElementType)n + 1.0 - log(log(sqrt(z2_r + z2_i)))/log(2.0);;
 +
  }
 +
}
 +
</pre></big>
 +
<big><pre>
 +
// Calculate if c is in Mandelbrot set.
 +
// Return number of iterations.
 +
__global__ void getFractal(ElementType* img, ElementType yMax, ElementType xMin, ElementType xScale, ElementType yScale, IterationType iterations, DimensionType width, DimensionType height)
 +
 +
  DimensionType dx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
 +
  DimensionType dy = blockIdx.y * BLOCK_SIZE + threadIdx.y;
 +
 +
  if(dx >= width || dy >= height)
 +
      return;
 +
 
 +
  ElementType c_i = yMax - (ElementType)dy * yScale;
 +
  ElementType c_r = xMin + (ElementType)dx * xScale;
 +
 +
  DimensionSqType c = (DimensionSqType)dy * (DimensionSqType)width + (DimensionSqType)dx;
 +
  img[c] = mandelbrot(c_i, c_r, iterations);
 +
}
 +
</pre></big>
 +
<big><pre>
 +
// Calculate if c is in Mandelbrot set.
 +
// Return number of iterations.
 +
__global__ void getFractalSSAA(ElementType* img, DimensionSqType* list, DimensionSqType length, ElementType yMax, ElementType xMin,
 +
                              ElementType xScale, ElementType yScale, IterationType iterations,
 +
                              DimensionType width, AlisingFactorType ssaafactor)
 +
 +
  DimensionType curr = blockIdx.x * BLOCK_SIZE * BLOCK_SIZE + threadIdx.x;
 +
 +
  if(curr >= length)
 +
      return;
 +
 +
  DimensionType dx = list[curr] % width;
 +
  DimensionType dy = list[curr] / width;
 +
 +
  ElementType c_i = yMax - (ElementType)dy * yScale;
 +
  ElementType c_r = xMin + (ElementType)dx * xScale;
 +
 +
  // Get sub pixel width and height
 +
  ElementType xSubScale = xScale / ((ElementType)ssaafactor);
 +
  ElementType ySubScale = yScale / ((ElementType)ssaafactor);
 +
 
 +
  // Get the centre of the top left subpixel
 +
  xMin = c_r - (xScale / 2.0) + (xSubScale / 2.0);
 +
  yMax = c_i + (yScale / 2.0) - (ySubScale / 2.0);
 +
 +
  AlisingFactorSqType factor2 = (AlisingFactorSqType)ssaafactor * (AlisingFactorSqType)ssaafactor;
 +
 
 +
  // Get the values for each pixel in fractal 
 +
  ElementType subpixels[MAX_ALIASING_FACTOR * MAX_ALIASING_FACTOR];
 +
 
 +
  for(AlisingFactorType x = 0; x < ssaafactor; x++)
 +
  {
 +
      for(AlisingFactorType y = 0; y < ssaafactor; y++)
 +
      {
 +
        subpixels[x * ssaafactor + y] = mandelbrot(yMax - ySubScale * y , xMin + xSubScale * x, iterations);
 +
      }
 +
  }
 +
 +
  if(factor2 % 2 != 0)
 +
  {
 +
      img[list[curr]] = getMedian(subpixels, factor2 / 2,  factor2);
 +
  }
 +
  else
 +
  {
 +
      img[list[curr]] = (getMedian(subpixels, factor2 / 2 - 1,  factor2)
 +
                          + getMedian(subpixels, factor2 / 2,  factor2))
 +
                        / 2.0;
 +
  }
 +
}
 +
</pre></big>
 +
<big><pre>
 +
__device__ void getRGB(ElementType value, BYTE* rgb)
 +
{
 +
  short colourInt = (short)(value * 1791.0f);
 +
 
 +
  BYTE bracket = colourInt / 256;
 +
  BYTE colour = (BYTE)(colourInt % 256);
 +
 +
  switch (bracket)
 +
  {
 +
      case 0:
 +
        rgb[0] = colour;
 +
        rgb[1] = 0;
 +
        rgb[2] = 0;
 +
        break;
 +
         
 +
      case 1:
 +
        rgb[0] = 255;
 +
        rgb[1] = colour;
 +
        rgb[2] = 0;
 +
        break;
 +
         
 +
      case 2:
 +
        rgb[0] = 255 - colour;
 +
        rgb[1] = 255;
 +
        rgb[2] = 0;
 +
        break;
 +
 +
      case 3:
 +
        rgb[0] = 0;
 +
        rgb[1] = 255;
 +
        rgb[2] = colour;
 +
          break;
 +
 +
      case 4:
 +
        rgb[0] = 0;
 +
        rgb[1] = 255 - colour;
 +
        rgb[2] = 255;
 +
        break;
 +
 +
      case 5:
 +
        rgb[0] = colour;
 +
        rgb[1] = 0;
 +
        rgb[2] = 255;
 +
        break;
 +
 +
      case 6:
 +
        rgb[0] = 255 - colour;
 +
        rgb[1] = 0;
 +
        rgb[2] = 255 - colour;
 +
        break;
 +
 +
      default:
 +
        rgb[0] = 0;
 +
        rgb[1] = 0;
 +
        rgb[2] = 0;
 +
          break;   
 +
  }
 +
}
 +
</pre></big>
 +
<big><pre>
 +
__global__ void getBmpRGB(BYTE* image, ElementType* values, DimensionType width, DimensionType height, IterationType iterations)
 +
{
 +
  DimensionType dy = blockIdx.y * BLOCK_SIZE + threadIdx.y; 
 +
  DimensionType dx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
 +
 
 +
  if(dx >= width || dy >= height)
 +
      return;
 +
 +
  DimensionType c = dy * width + dx;
 +
 
 +
  BYTE rgbValue[3];
 +
 +
  getRGB(values[c]/(ElementType)iterations, rgbValue);
 +
     
 +
  image[c*3]      = rgbValue[2];
 +
  image[c*3 + 1]  = rgbValue[1];
 +
  image[c*3 + 2]  = rgbValue[0];
 +
}
 +
</pre></big>
 +
<big><pre>
 +
__global__ void getBmpRGBfromHistorgram(ElementType* map, BYTE* image, ElementType* values, DimensionType width, DimensionType height)
 +
{
 +
  DimensionType dy = blockIdx.y * BLOCK_SIZE + threadIdx.y; 
 +
  DimensionType dx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
 +
 
 +
  if(dx >= width || dy >= height)
 +
      return;
 +
 +
  DimensionType c = dy * width + dx;
 +
 +
  IterationType ival = (IterationType)values[c];
 +
 +
  ElementType colourVal = map[ival] + (values[c] - (ElementType)ival) * (map[ival + 1] - map[ival]);
 +
 
 +
  BYTE rgbValue[3];
 +
 +
  getRGB(colourVal, rgbValue);
 +
     
 +
  image[c*3]      = rgbValue[2];
 +
  image[c*3 + 1]  = rgbValue[1];
 +
  image[c*3 + 2]  = rgbValue[0];
 +
}
 +
</pre></big>
 +
 +
 +
 +
= Assignment 1 =
 
== Mandelbrot Set ==
 
== Mandelbrot Set ==
 
=== Repo ===
 
=== Repo ===

Revision as of 05:37, 21 November 2012


GPU610/DPS915 | Student List | Group and Project Index | Student Resources | Glossary

The B-Team

Team Members

  1. Kyle Barnhart

Assignment 2

Mandelbrot Set

Repo

https://github.com/KyleBarnhart/Fractal/tree/cuda

Description

The new version of my fractal program is running on the GPU and has been highly parallelized. It is running more than ten times faster than the original version. I am sure this is highly GPU dependent and I am running it on an Nvidia Geforce GTA 660 Ti with 1344 cuda cores. However I think the comparison is fair because my CPU is also very fast. It is an Intel i5 2500k which is 4 cores at 3.3 Ghz stock and is running overclocked to 4.6 Ghz. A slower GPU compared to a slower CPU might be able to expect similar speed ups.

Parallelizing the main Mandelbrot function ended up requiring me to move a few other functions to the GPU and rewriting the anti-aliasing function. The anti-aliasing previously made many calls to the Mandelbrot function. This was not good for the GPU and I changed it so that it makes a list of points to run the anti-aliasing algorithm over. Another issue with the anti-aliasing was selection the right value to return. I previously found the best quality result is to return the median value. The anti-aliasing required its own Mandelbrot function as well as moving the median function to the device. I also move the function that converts Mandelbrot escape values to RGB values to the device. This was because I found that I could account for half the running time depending on settings.

Code

__device__ void swap(ElementType* a, ElementType* b)
{
   ElementType t = *a;
   *a = *b;
   *b = t;
}
__device__ ElementType getMedian(ElementType* arr, AlisingFactorSqType median, AlisingFactorSqType n) 
{
    AlisingFactorSqType low, high ;
    AlisingFactorSqType middle, ll, hh;

    low = 0 ; high = n-1 ;
    for (;;) {
        if (high <= low) /* One element only */
            return arr[median] ;

        if (high == low + 1) {  /* Two elements only */
            if (arr[low] > arr[high])
                swap(&arr[low], &arr[high]) ;
            return arr[median];
        }

       /* Find median of low, middle and high items; swap into position low */
       middle = (low + high) / 2;
       if (arr[middle] > arr[high])    swap(&arr[middle], &arr[high]) ;
       if (arr[low] > arr[high])       swap(&arr[low], &arr[high]) ;
       if (arr[middle] > arr[low])     swap(&arr[middle], &arr[low]) ;

       /* Swap low item (now in position middle) into position (low+1) */
       swap(&arr[middle], &arr[low+1]) ;

       /* Nibble from each end towards middle, swapping items when stuck */
       ll = low + 1;
       hh = high;
       for (;;) {
           do ll++; while (arr[low] > arr[ll]) ;
           do hh--; while (arr[hh]  > arr[low]) ;

           if (hh < ll)
           break;

           swap(&arr[ll], &arr[hh]) ;
       }

       /* Swap middle item (in position low) back into correct position */
       swap(&arr[low], &arr[hh]) ;

       /* Re-set active partition */
       if (hh <= median)
           low = ll;
       if (hh >= median)
           high = hh - 1;
    }
}
__device__ ElementType mandelbrot(ElementType c_i, ElementType c_r, IterationType iterations)
{
   ElementType z_r = c_r;
   ElementType z_i = c_i;
   
   ElementType z2_r = z_r * z_r;
   ElementType z2_i = z_i * z_i;
   
   IterationType n = 0;
   
   while(n < iterations && z2_r + z2_i < 4.0)
   {           
      z_i = 2.0 * z_r * z_i + c_i;
      z_r = z2_r - z2_i + c_r;
   
      z2_r = z_r * z_r;
      z2_i = z_i * z_i;
      
      n++;
   }

   z_i = 2.0 * z_r * z_i + c_i;
   z_r = z2_r - z2_i + c_r;
   
   z2_r = z_r * z_r;
   z2_i = z_i * z_i;

   z_i = 2.0 * z_r * z_i + c_i;
   z_r = z2_r - z2_i + c_r;
   
   z2_r = z_r * z_r;
   z2_i = z_i * z_i;
      
   n += 2;

   if(n > iterations)
   {
      return (ElementType)iterations;
   }
   else
   {
      return (ElementType)n + 1.0 - log(log(sqrt(z2_r + z2_i)))/log(2.0);;
   }
}
// Calculate if c is in Mandelbrot set.
// Return number of iterations.
__global__ void getFractal(ElementType* img, ElementType yMax, ElementType xMin, ElementType xScale, ElementType yScale, IterationType iterations, DimensionType width, DimensionType height)
{  
   DimensionType dx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
   DimensionType dy = blockIdx.y * BLOCK_SIZE + threadIdx.y;

   if(dx >= width || dy >= height)
      return;
   
   ElementType c_i = yMax - (ElementType)dy * yScale;
   ElementType c_r = xMin + (ElementType)dx * xScale;

   DimensionSqType c = (DimensionSqType)dy * (DimensionSqType)width + (DimensionSqType)dx;
   img[c] = mandelbrot(c_i, c_r, iterations);
}
// Calculate if c is in Mandelbrot set.
// Return number of iterations.
__global__ void getFractalSSAA(ElementType* img, DimensionSqType* list, DimensionSqType length, ElementType yMax, ElementType xMin,
                               ElementType xScale, ElementType yScale, IterationType iterations,
                               DimensionType width, AlisingFactorType ssaafactor)
{  
   DimensionType curr = blockIdx.x * BLOCK_SIZE * BLOCK_SIZE + threadIdx.x;

   if(curr >= length)
      return;

   DimensionType dx = list[curr] % width;
   DimensionType dy = list[curr] / width;

   ElementType c_i = yMax - (ElementType)dy * yScale;
   ElementType c_r = xMin + (ElementType)dx * xScale;

   // Get sub pixel width and height
   ElementType xSubScale = xScale / ((ElementType)ssaafactor);
   ElementType ySubScale = yScale / ((ElementType)ssaafactor);
   
   // Get the centre of the top left subpixel
   xMin = c_r - (xScale / 2.0) + (xSubScale / 2.0);
   yMax = c_i + (yScale / 2.0) - (ySubScale / 2.0);

   AlisingFactorSqType factor2 = (AlisingFactorSqType)ssaafactor * (AlisingFactorSqType)ssaafactor;
   
   // Get the values for each pixel in fractal   
   ElementType subpixels[MAX_ALIASING_FACTOR * MAX_ALIASING_FACTOR];
   
   for(AlisingFactorType x = 0; x < ssaafactor; x++)
   {
      for(AlisingFactorType y = 0; y < ssaafactor; y++)
      {
         subpixels[x * ssaafactor + y] = mandelbrot(yMax - ySubScale * y , xMin + xSubScale * x, iterations);
      }
   }

   if(factor2 % 2 != 0)
   {
      img[list[curr]] = getMedian(subpixels, factor2 / 2,  factor2);
   }
   else
   {
      img[list[curr]] = (getMedian(subpixels, factor2 / 2 - 1,  factor2)
                           + getMedian(subpixels, factor2 / 2,  factor2))
                         / 2.0;
   }
}
__device__ void getRGB(ElementType value, BYTE* rgb)
{ 
   short colourInt = (short)(value * 1791.0f);
   
   BYTE bracket = colourInt / 256;
   BYTE colour = (BYTE)(colourInt % 256);

   switch (bracket)
   {
      case 0:
         rgb[0] = colour;
         rgb[1] = 0;
         rgb[2] = 0;
         break;
          
      case 1:
         rgb[0] = 255;
         rgb[1] = colour;
         rgb[2] = 0;
         break;
          
      case 2:
         rgb[0] = 255 - colour;
         rgb[1] = 255;
         rgb[2] = 0;
         break;

      case 3:
         rgb[0] = 0;
         rgb[1] = 255;
         rgb[2] = colour;
          break;

      case 4:
         rgb[0] = 0;
         rgb[1] = 255 - colour;
         rgb[2] = 255;
         break;

      case 5:
         rgb[0] = colour;
         rgb[1] = 0;
         rgb[2] = 255;
         break;

      case 6:
         rgb[0] = 255 - colour;
         rgb[1] = 0;
         rgb[2] = 255 - colour;
         break;

      default:
         rgb[0] = 0;
         rgb[1] = 0;
         rgb[2] = 0;
          break;    
   }
}
__global__ void getBmpRGB(BYTE* image, ElementType* values, DimensionType width, DimensionType height, IterationType iterations)
{
   DimensionType dy = blockIdx.y * BLOCK_SIZE + threadIdx.y;  
   DimensionType dx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
   
   if(dx >= width || dy >= height)
      return; 

   DimensionType c = dy * width + dx;
   
   BYTE rgbValue[3];

   getRGB(values[c]/(ElementType)iterations, rgbValue);
      
   image[c*3]      = rgbValue[2];
   image[c*3 + 1]  = rgbValue[1];
   image[c*3 + 2]  = rgbValue[0];
}
__global__ void getBmpRGBfromHistorgram(ElementType* map, BYTE* image, ElementType* values, DimensionType width, DimensionType height)
{
   DimensionType dy = blockIdx.y * BLOCK_SIZE + threadIdx.y;  
   DimensionType dx = blockIdx.x * BLOCK_SIZE + threadIdx.x;
   
   if(dx >= width || dy >= height)
      return; 

   DimensionType c = dy * width + dx;

   IterationType ival = (IterationType)values[c];

   ElementType colourVal = map[ival] + (values[c] - (ElementType)ival) * (map[ival + 1] - map[ival]);
   
   BYTE rgbValue[3];

   getRGB(colourVal, rgbValue);
      
   image[c*3]      = rgbValue[2];
   image[c*3 + 1]  = rgbValue[1];
   image[c*3 + 2]  = rgbValue[0];
}


Assignment 1

Mandelbrot Set

Repo

https://github.com/KyleBarnhart/Fractal

Description

I wrote this program because I could not find a suitable fractal program. This program generates fractal images of the Mandelbrot Set and saves them in the BMP image format. It can generate images of an almost arbitrary size. You also set the locations, number of iterations, and the zoom factor. It uses a smoothing algorithm and a histogram to create smoothly and evenly coloured images. It has selective super sampling for anti-aliasing. And it can generate a sequence of images that can be put together to make a video. In testing the program I made a video (http://www.youtube.com/watch?v=vQigSMuHxuU).

Code

inline ElementType mandelbrot(ElementType c_r, ElementType c_i, IterationType iterations)
{  
   ElementType z_r = c_r;
   ElementType z_i = c_i;
   
   ElementType z2_r = z_r * z_r;
   ElementType z2_i = z_i * z_i;
   
   IterationType n = 0;
   
   while(n < iterations && z2_r + z2_i < 4.0)
   {           
      z_i = 2.0 * z_r * z_i + c_i;
      z_r = z2_r - z2_i + c_r;
   
      z2_r = z_r * z_r;
      z2_i = z_i * z_i;
      
      n++;
   }
   
   z_i = 2.0 * z_r * z_i + c_i;
   z_r = z2_r - z2_i + c_r;
   
   z2_r = z_r * z_r;
   z2_i = z_i * z_i;
   
   z_i = 2.0 * z_r * z_i + c_i;
   z_r = z2_r - z2_i + c_r;
   
   z2_r = z_r * z_r;
   z2_i = z_i * z_i;
   
   n += 2;
   
   if(n > iterations)
   {
      return (ElementType)iterations;
   }
   else
   {
      return (ElementType)n + 1.0 - log(log(sqrt(z2_r + z2_i)))/log(2.0);;
   }
}


Calculating Pi

Repo

https://github.com/KyleBarnhart/Pi

Description

I wrote this program based on reading some articles I found. The program calculates the value of pi by randomly generating points in a one by one area and determining if they are no more than a distance of one from the bottom left corner (0, 0). By randomly generating many uniformly distributed numbers and dividing the number that fall within the circle by the number of iterations you can determine pi. The more iterations that you preform, the closer the value will be to pi.

Code

inline double moreRandom(unsigned iterations)
{
   double result = (double)rand() / (double)RAND_MAX;
   
   for(unsigned i = 1; i < iterations; i++)
   {
      result = (((double)rand()) + result) / (double)RAND_MAX;
   }
   
   return result;
}