Open main menu

CDOT Wiki β

Changes

GPU610/Cosmosis

10,514 bytes added, 13:38, 13 April 2013
Assignment 3: code
We initially intended on using the fast-math library provided by CUDA. At first our results were marginally faster than our previous code. Though after some optimizations we discovered that our code actually performed better than the fast-math library. With fast-math, it took 0.451889 seconds to process 1000 bodies for 512 samples, conversely without fast-math we got 0.409581 seconds, which is a considerable improvement.
 
==== Optimized Code ====
 
<div style='color:#000020;background:#f6f8ff;'>
<span style='color:#200080; font-weight:bold; '>void</span> __global__ SimCalc<span style='color:#308080; '>(</span>BodyArray a<span style='color:#308080; '>)</span>
<span style='color:#406080; '>{</span>
int_fast32_t idx <span style='color:#308080; '>=</span> blockIdx<span style='color:#308080; '>.</span>x <span style='color:#308080; '>*</span> blockDim<span style='color:#308080; '>.</span>x <span style='color:#308080; '>+</span> threadIdx<span style='color:#308080; '>.</span>x<span style='color:#406080; '>;</span>
<span style='color:#200080; font-weight:bold; '>if</span> <span style='color:#308080; '>(</span>idx <span style='color:#308080; '>&lt;</span> a<span style='color:#308080; '>.</span>size<span style='color:#308080; '>)</span> <span style='color:#406080; '>{</span>
<span style='color:#200080; font-weight:bold; '>const</span> _T G <span style='color:#308080; '>=</span> <span style='color:#008000; '>6.67384</span><span style='color:#006600; '>f</span> <span style='color:#308080; '>*</span> <span style='color:#003060; '>pow</span><span style='color:#308080; '>(</span><span style='color:#008000; '>10.0</span><span style='color:#006600; '>f</span><span style='color:#308080; '>,</span> <span style='color:#308080; '>-</span><span style='color:#008000; '>11.0</span><span style='color:#006600; '>f</span><span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
<span style='color:#595979; '>//precompute positions at index</span>
<span style='color:#200080; font-weight:bold; '>const</span> _T px <span style='color:#308080; '>=</span> a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Position<span style='color:#308080; '>.</span>x<span style='color:#406080; '>;</span>
<span style='color:#200080; font-weight:bold; '>const</span> _T py <span style='color:#308080; '>=</span> a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Position<span style='color:#308080; '>.</span>y<span style='color:#406080; '>;</span>
<span style='color:#595979; '>//mass at the index</span>
<span style='color:#200080; font-weight:bold; '>const</span> _T M_idx <span style='color:#308080; '>=</span> G<span style='color:#308080; '>*</span>a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Mass<span style='color:#406080; '>;</span>
a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Force <span style='color:#308080; '>=</span> vec2_t<span style='color:#308080; '>(</span><span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
<span style='color:#200080; font-weight:bold; '>for</span> <span style='color:#308080; '>(</span>int_fast32_t j<span style='color:#308080; '>(</span><span style='color:#008c00; '>0</span><span style='color:#308080; '>)</span><span style='color:#406080; '>;</span> j <span style='color:#308080; '>!</span><span style='color:#308080; '>=</span> a<span style='color:#308080; '>.</span>size<span style='color:#406080; '>;</span> <span style='color:#308080; '>+</span><span style='color:#308080; '>+</span>j<span style='color:#308080; '>)</span> <span style='color:#406080; '>{</span>
<span style='color:#200080; font-weight:bold; '>if</span> <span style='color:#308080; '>(</span>idx <span style='color:#308080; '>!</span><span style='color:#308080; '>=</span> j<span style='color:#308080; '>)</span> <span style='color:#406080; '>{</span>
_T dx <span style='color:#308080; '>=</span> a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>j<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Position<span style='color:#308080; '>.</span>x <span style='color:#308080; '>-</span> px<span style='color:#406080; '>;</span>
_T dy <span style='color:#308080; '>=</span> a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>j<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Position<span style='color:#308080; '>.</span>y <span style='color:#308080; '>-</span> py<span style='color:#406080; '>;</span>
_T r <span style='color:#308080; '>=</span> <span style='color:#003060; '>sqrt</span><span style='color:#308080; '>(</span>dx<span style='color:#308080; '>*</span>dx <span style='color:#308080; '>+</span> dy<span style='color:#308080; '>*</span>dy<span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
_T F <span style='color:#308080; '>=</span> <span style='color:#308080; '>(</span>M_idx<span style='color:#308080; '>*</span>a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>j<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Mass<span style='color:#308080; '>)</span><span style='color:#308080; '>/</span><span style='color:#308080; '>(</span>r<span style='color:#308080; '>*</span>r<span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Force<span style='color:#308080; '>.</span>x <span style='color:#308080; '>+</span><span style='color:#308080; '>=</span> F <span style='color:#308080; '>*</span> <span style='color:#308080; '>(</span>dx <span style='color:#308080; '>/</span> r<span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Force<span style='color:#308080; '>.</span>y <span style='color:#308080; '>+</span><span style='color:#308080; '>=</span> F <span style='color:#308080; '>*</span> <span style='color:#308080; '>(</span>dy <span style='color:#308080; '>/</span> r<span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
<span style='color:#406080; '>}</span>
<span style='color:#406080; '>}</span>
<span style='color:#406080; '>}</span>
 
<span style='color:#200080; font-weight:bold; '>void</span> __global__ SimTick<span style='color:#308080; '>(</span>BodyArray a<span style='color:#308080; '>,</span> _T dt<span style='color:#308080; '>)</span>
<span style='color:#406080; '>{</span>
<span style='color:#200080; font-weight:bold; '>int</span> idx <span style='color:#308080; '>=</span> blockIdx<span style='color:#308080; '>.</span>x <span style='color:#308080; '>*</span> blockDim<span style='color:#308080; '>.</span>x <span style='color:#308080; '>+</span> threadIdx<span style='color:#308080; '>.</span>x<span style='color:#406080; '>;</span>
<span style='color:#200080; font-weight:bold; '>if</span> <span style='color:#308080; '>(</span>idx <span style='color:#308080; '>&lt;</span> a<span style='color:#308080; '>.</span>size<span style='color:#308080; '>)</span>
<span style='color:#406080; '>{</span>
_T mass <span style='color:#308080; '>=</span> a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Mass<span style='color:#406080; '>;</span>
a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Velocity<span style='color:#308080; '>.</span>x <span style='color:#308080; '>+</span><span style='color:#308080; '>=</span> dt <span style='color:#308080; '>*</span> <span style='color:#308080; '>(</span>a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Force<span style='color:#308080; '>.</span>x <span style='color:#308080; '>/</span> mass<span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Velocity<span style='color:#308080; '>.</span>y <span style='color:#308080; '>+</span><span style='color:#308080; '>=</span> dt <span style='color:#308080; '>*</span> <span style='color:#308080; '>(</span>a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Force<span style='color:#308080; '>.</span>y <span style='color:#308080; '>/</span> mass<span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Position<span style='color:#308080; '>.</span>x <span style='color:#308080; '>+</span><span style='color:#308080; '>=</span> dt <span style='color:#308080; '>*</span> a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Velocity<span style='color:#308080; '>.</span>x<span style='color:#406080; '>;</span>
a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Position<span style='color:#308080; '>.</span>y <span style='color:#308080; '>+</span><span style='color:#308080; '>=</span> dt <span style='color:#308080; '>*</span> a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Velocity<span style='color:#308080; '>.</span>y<span style='color:#406080; '>;</span>
<span style='color:#406080; '>}</span>
<span style='color:#406080; '>}</span>
</div>
1
edit