Difference between revisions of "TeamDS"

From CDOT Wiki
Jump to: navigation, search
(SDF Brute-Force Method)
(Analysis)
Line 104: Line 104:
  
  
===Analysis===
+
===Bench Marks===
  
Flat profile:
+
Flat profile:
 
Each sample counts as 0.01 seconds.
 
  %  cumulative  self              self    total
 
  time  seconds  seconds    calls  ns/call  ns/call  name
 
  61.38      3.37    3.37                            BlurImage(SImageData const&, SImageData&, float, float, unsigned int, unsigned int)
 
  38.62      5.49    2.12 172032000    12.32    12.32  GetPixelOrBlack(SImageData const&, int, int)
 
  0.00      5.49    0.00      126    0.00    0.00  Gaussian(float, float)
 
  0.00      5.49    0.00      42    0.00    0.00  GaussianSimpsonIntegration(float, float, float)
 
  0.00      5.49    0.00      12    0.00    0.00  void std::vector<float, std::allocator<float> >::_M_insert_aux<float const&>(__gnu_cxx::__normal_iterator<float*, std::vector<float, std::allocator<float> > >, float const&&&)
 
  0.00      5.49    0.00        3    0.00    0.00  std::vector<unsigned char, std::allocator<unsigned char> >::_M_default_append(unsigned int)
 
  0.00      5.49    0.00        2    0.00    0.00  GaussianKernelIntegrals(float, int)
 
  0.00      5.49    0.00        1    0.00    0.00  _GLOBAL__sub_I__Z12WaitForEnterv
 
  
  Call graph:
+
Each sample counts as 0.01 seconds.
+
  %  cumulative  self              self    total         
+
time  seconds  seconds    calls  Ts/call  Ts/call  name   
granularity: each sample hit covers 4 byte(s) for 0.18% of 5.49 seconds
+
100.00      6.90    6.90                            Generate(float const*, float*, int, int, int)
+
  0.00      6.90    0.00        1    0.00    0.00  _GLOBAL__sub_I__Z5Pausev
index % time    self  children    called    name
+
 
 +
 
 +
  Call graph
 +
 
 +
 
 +
granularity: each sample hit covers 4 byte(s) for 0.14% of 6.90 seconds
 +
 
 +
index % time    self  children    called    name
 
                                                 <spontaneous>
 
                                                 <spontaneous>
[1]    100.0    3.37    2.12                BlurImage(SImageData const&, SImageData&, float, float, unsigned int, unsigned int) [1]
+
[1]    100.0    6.90   0.00                 Generate(float const*, float*, int, int, int) [1]
                2.12    0.00 172032000/172032000    GetPixelOrBlack(SImageData const&, int, int) [2]
+
-----------------------------------------------
                0.00    0.00      2/2          GaussianKernelIntegrals(float, int) [11]
+
                0.00    0.00      1/1          __do_global_ctors_aux [9]
                0.00    0.00      2/3          std::vector<unsigned char, std::allocator<unsigned char> >::_M_default_append(unsigned int) [10]
+
[6]     0.0    0.00    0.00      1        _GLOBAL__sub_I__Z5Pausev [6]
-----------------------------------------------
+
-----------------------------------------------
                2.12    0.00 172032000/172032000    BlurImage(SImageData const&, SImageData&, float, float, unsigned int, unsigned int) [1]
+
[2]    38.6   2.12   0.00 172032000        GetPixelOrBlack(SImageData const&, int, int) [2]
+
Index by function name
-----------------------------------------------
+
 
                0.00    0.00    126/126        GaussianSimpsonIntegration(float, float, float) [8]
+
   [6] _GLOBAL__sub_I__Z5Pausev (a1.cpp) [1] Generate(float const*, float*, int, int, int)
[7]      0.0    0.00    0.00    126        Gaussian(float, float) [7]
 
-----------------------------------------------
 
                0.00    0.00      42/42          GaussianKernelIntegrals(float, int) [11]
 
[8]      0.0    0.00    0.00      42        GaussianSimpsonIntegration(float, float, float) [8]
 
                0.00    0.00    126/126        Gaussian(float, float) [7]
 
-----------------------------------------------
 
                0.00    0.00      12/12          GaussianKernelIntegrals(float, int) [11]
 
[9]      0.0    0.00    0.00      12        void std::vector<float, std::allocator<float> >::_M_insert_aux<float const&>(__gnu_cxx::__normal_iterator<float*, std::vector<float, std::allocator<float> > >, float const&&&) [9]
 
-----------------------------------------------
 
                0.00    0.00      1/3          LoadImage(char const*, SImageData&) [15]
 
                0.00    0.00      2/3          BlurImage(SImageData const&, SImageData&, float, float, unsigned int, unsigned int) [1]
 
[10]    0.0    0.00    0.00      3        std::vector<unsigned char, std::allocator<unsigned char> >::_M_default_append(unsigned int) [10]
 
-----------------------------------------------
 
                0.00    0.00      2/2          BlurImage(SImageData const&, SImageData&, float, float, unsigned int, unsigned int) [1]
 
[11]    0.0    0.00    0.00      2        GaussianKernelIntegrals(float, int) [11]
 
                0.00    0.00      42/42          GaussianSimpsonIntegration(float, float, float) [8]
 
                0.00    0.00      12/12          void std::vector<float, std::allocator<float> >::_M_insert_aux<float const&>(__gnu_cxx::__normal_iterator<float*, std::vector<float, std::allocator<float> > >, float const&&&) [9]
 
-----------------------------------------------
 
                0.00    0.00      1/1          __do_global_ctors_aux [18]
 
[12]     0.0    0.00    0.00      1        _GLOBAL__sub_I__Z12WaitForEnterv [12]
 
-----------------------------------------------
 
 
Index by function name
 
 
   [12] _GLOBAL__sub_I__Z12WaitForEnterv (gaussian.cpp) [8] GaussianSimpsonIntegration(float, float, float) [9] void std::vector<float, std::allocator<float> >::_M_insert_aux<float const&>(__gnu_cxx::__normal_iterator<float*, std::vector<float, std::allocator<float> > >, float const&&&)
 
    [2] GetPixelOrBlack(SImageData const&, int, int) [7] Gaussian(float, float) [10] std::vector<unsigned char, std::allocator<unsigned char> >::_M_default_append(unsigned int)
 
  [11] GaussianKernelIntegrals(float, int) [1] BlurImage(SImageData const&, SImageData&, float, float, unsigned int, unsigned int)
 
  
 
=== Assignment 2 ===
 
=== Assignment 2 ===
 
=== Assignment 3 ===
 
=== Assignment 3 ===

Revision as of 00:03, 5 April 2017

Signed Distance Field Generator

Team Members

  1. Dawood Shirzada - Developer

Email All

Progress

Assignment 1

What is Signed Distance Field?

Signed Distance Field also know as SDF, is a technique developed by Valve company that uses low resolution textures to display extremely high resolution looking fonts and decals. Valve used SDF in their game engines that run such games as Half-Life 2, Counter-Strike 2 and etc. SDF is so effective that no matter how many times the font or decal is zoomed in, it will always look crisp and sharp while using very small textures. This allows fonts and decals in game to have much higher quality with low memory compare to using regular high resolution textures.

For more detailed information please read Valve's publication: http://www.valvesoftware.com/publications/2007/SIGGRAPH2007_AlphaTestedMagnification.pdf

Examples from Valve

SDF exmaple from Valve.jpeg


Examples in action

https://youtu.be/CGZRHJvJYIg?t=40


How does Signed Distance Field work?

SDF ONLY works with monochromatic images such as decals and fonts. SDF takes the original texture as an input and creates a SDF version of that texture and saves it in an image format. The game engine then uses the low resolution SDF texture instead.


SDFvsOJ.png


SDF version is actually much different than the original image. the SDF version no longer stores the pixel color intensity like normal images do, but instead stores the distance to nearest opposite color. For example, since monochromatic images only have black and white, for every white pixel, you look for the nearest black pixel and stores the distance between the two and vice versa.

To read the SDF version of image, we will need to use a custom shader that can understand the SDF version. With help of custom shaders, we can do many more effects such as edge glows, drop shadows, soft edges and etc. All these effects at virtually no additional rendering costs. SDF is a huge win when it comes to gaming performance!

How to convert image to SDF version?

It turns out to convert a image to SDF, it is very computationally expensive. There are however, many methods that approximates and are relatively fast, but the Brute-Force method produces the most accurate result and it is the method that Valve used for their textures. Therefore, I will be using this method as well.

Big-O Complexity

For every pixel in a image, we will need to test it against every other pixel. This makes its complexity O(n^2). For example a 256x256 has 65,536 pixel. Each pixel would have to be tested against 65536 pixels to find out the nearest corresponding pixel. So it needs 65,536 * 65,536 = 4,294,967,296 array element look ups!

Why I chose this

I chose SDF image conversion because it has lots of potential for parallelization. Since all the operations of a pixel are independent of each other and reads data from one single array, this allows for massive gains when using GPU multi-threading.


SDF Brute-Force Method

void Generate(const float src[], float dst[], int width, int height, int spread)
{
	
	int spreadSize = spread * spread;
	int size = width * height;
	for (int i = 0; i < size; i++)
	{
		Vector2 localVec(i - ((i / width) * width), i / width);
		float shortestDist = MAX_FLOAT_VALUE;
		float pixelVal = src[i];
		if (pixelVal > 0) // It's an inside pixel
		{
			// Find closest outside pixel
			for (int j = 0; j < size; j++)
			{
				float pixelVal2 = src[j];
				if (pixelVal2 == 0)// Outside pixel
				{
					// Calculate distance
					Vector2 targetVec(j - ((j / width) * width), j / width);
					float dist = localVec.CalcDistance(targetVec);
					if (dist < shortestDist) shortestDist = dist;
				}

			}

			float spread01 = (shortestDist / spread);
			if (spread01 > 1) spread01 = 1; // clamp it
			dst[i] = (spread01 * .5f) + 0.5f;
		}
		else // It's an outisde pixel
		{
			// Find closest inside pixel
			for (int j = 0; j < size; j++)
			{
				float pixelVal2 = src[j];
				if (pixelVal2 > 0)// Inside pixel
				{
					// Calculate distance
					Vector2 targetVec(j - ((j / width) * width), j / width);
					float dist = localVec.CalcDistance(targetVec);
					if (dist < shortestDist) shortestDist = dist;
				}
			}

			float spread01 = (shortestDist / spread);
			if (spread01 > 1) spread01 = 1; // clamp it
			dst[i] = (1 - spread01) *.5f;
		}
	}
}


Bench Marks

Flat profile:

Each sample counts as 0.01 seconds.

 %   cumulative   self              self     total           
time   seconds   seconds    calls  Ts/call  Ts/call  name    

100.00 6.90 6.90 Generate(float const*, float*, int, int, int)

 0.00      6.90     0.00        1     0.00     0.00  _GLOBAL__sub_I__Z5Pausev


Call graph


granularity: each sample hit covers 4 byte(s) for 0.14% of 6.90 seconds

index % time self children called name

                                                <spontaneous>

[1] 100.0 6.90 0.00 Generate(float const*, float*, int, int, int) [1]


               0.00    0.00       1/1           __do_global_ctors_aux [9]

[6] 0.0 0.00 0.00 1 _GLOBAL__sub_I__Z5Pausev [6]


� Index by function name

  [6] _GLOBAL__sub_I__Z5Pausev (a1.cpp) [1] Generate(float const*, float*, int, int, int)

Assignment 2

Assignment 3