Changes

Jump to: navigation, search

GPU610/Cosmosis

28,552 bytes added, 15:10, 13 April 2013
Dynamic Shared Memory (not implemented)
==== Baseline ====
The following profiles were made under the following compiler and computer settings:
 
<pre>nvcc main.cpp timer.cpp sim\simbody.cu sim\simulation.cu -DWIN32 -O3</pre>
 
* i5 2500K @ 4.5Ghz
* Nvidia GTX 560Ti
* Raw computations, no graphics drawn to the screen for unbiased results.
* Random position, velocity and mass for each body.
* Brute force algorithm for calculating the forces (O(n^2)).
==== Initial Profiling ====
We initially intended on using the fast-math library provided by CUDA. At first our results were marginally faster than our previous code. Though after some optimizations we discovered that our code actually performed better than the fast-math library. With fast-math, it took 0.451889 seconds to process 1000 bodies for 512 samples, conversely without fast-math we got 0.409581 seconds, which is a considerable improvement.
 
==== Optimized Code ====
 
<div style='color:#000020;background:#f6f8ff;'>
<span style='color:#200080; font-weight:bold; '>void</span> __global__ SimCalc<span style='color:#308080; '>(</span>BodyArray a<span style='color:#308080; '>)</span>
<span style='color:#406080; '>{</span>
int_fast32_t idx <span style='color:#308080; '>=</span> blockIdx<span style='color:#308080; '>.</span>x <span style='color:#308080; '>*</span> blockDim<span style='color:#308080; '>.</span>x <span style='color:#308080; '>+</span> threadIdx<span style='color:#308080; '>.</span>x<span style='color:#406080; '>;</span>
<span style='color:#200080; font-weight:bold; '>if</span> <span style='color:#308080; '>(</span>idx <span style='color:#308080; '>&lt;</span> a<span style='color:#308080; '>.</span>size<span style='color:#308080; '>)</span> <span style='color:#406080; '>{</span>
<span style='color:#200080; font-weight:bold; '>const</span> _T G <span style='color:#308080; '>=</span> <span style='color:#008000; '>6.67384</span><span style='color:#006600; '>f</span> <span style='color:#308080; '>*</span> <span style='color:#003060; '>pow</span><span style='color:#308080; '>(</span><span style='color:#008000; '>10.0</span><span style='color:#006600; '>f</span><span style='color:#308080; '>,</span> <span style='color:#308080; '>-</span><span style='color:#008000; '>11.0</span><span style='color:#006600; '>f</span><span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
<span style='color:#595979; '>//precompute positions at index</span>
<span style='color:#200080; font-weight:bold; '>const</span> _T px <span style='color:#308080; '>=</span> a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Position<span style='color:#308080; '>.</span>x<span style='color:#406080; '>;</span>
<span style='color:#200080; font-weight:bold; '>const</span> _T py <span style='color:#308080; '>=</span> a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Position<span style='color:#308080; '>.</span>y<span style='color:#406080; '>;</span>
<span style='color:#595979; '>//mass at the index</span>
<span style='color:#200080; font-weight:bold; '>const</span> _T M_idx <span style='color:#308080; '>=</span> G<span style='color:#308080; '>*</span>a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Mass<span style='color:#406080; '>;</span>
a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Force <span style='color:#308080; '>=</span> vec2_t<span style='color:#308080; '>(</span><span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
<span style='color:#200080; font-weight:bold; '>for</span> <span style='color:#308080; '>(</span>int_fast32_t j<span style='color:#308080; '>(</span><span style='color:#008c00; '>0</span><span style='color:#308080; '>)</span><span style='color:#406080; '>;</span> j <span style='color:#308080; '>!</span><span style='color:#308080; '>=</span> a<span style='color:#308080; '>.</span>size<span style='color:#406080; '>;</span> <span style='color:#308080; '>+</span><span style='color:#308080; '>+</span>j<span style='color:#308080; '>)</span> <span style='color:#406080; '>{</span>
<span style='color:#200080; font-weight:bold; '>if</span> <span style='color:#308080; '>(</span>idx <span style='color:#308080; '>!</span><span style='color:#308080; '>=</span> j<span style='color:#308080; '>)</span> <span style='color:#406080; '>{</span>
_T dx <span style='color:#308080; '>=</span> a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>j<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Position<span style='color:#308080; '>.</span>x <span style='color:#308080; '>-</span> px<span style='color:#406080; '>;</span>
_T dy <span style='color:#308080; '>=</span> a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>j<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Position<span style='color:#308080; '>.</span>y <span style='color:#308080; '>-</span> py<span style='color:#406080; '>;</span>
_T r <span style='color:#308080; '>=</span> <span style='color:#003060; '>sqrt</span><span style='color:#308080; '>(</span>dx<span style='color:#308080; '>*</span>dx <span style='color:#308080; '>+</span> dy<span style='color:#308080; '>*</span>dy<span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
_T F <span style='color:#308080; '>=</span> <span style='color:#308080; '>(</span>M_idx<span style='color:#308080; '>*</span>a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>j<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Mass<span style='color:#308080; '>)</span><span style='color:#308080; '>/</span><span style='color:#308080; '>(</span>r<span style='color:#308080; '>*</span>r<span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Force<span style='color:#308080; '>.</span>x <span style='color:#308080; '>+</span><span style='color:#308080; '>=</span> F <span style='color:#308080; '>*</span> <span style='color:#308080; '>(</span>dx <span style='color:#308080; '>/</span> r<span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Force<span style='color:#308080; '>.</span>y <span style='color:#308080; '>+</span><span style='color:#308080; '>=</span> F <span style='color:#308080; '>*</span> <span style='color:#308080; '>(</span>dy <span style='color:#308080; '>/</span> r<span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
<span style='color:#406080; '>}</span>
<span style='color:#406080; '>}</span>
<span style='color:#406080; '>}</span>
 
<span style='color:#200080; font-weight:bold; '>void</span> __global__ SimTick<span style='color:#308080; '>(</span>BodyArray a<span style='color:#308080; '>,</span> _T dt<span style='color:#308080; '>)</span>
<span style='color:#406080; '>{</span>
<span style='color:#200080; font-weight:bold; '>int</span> idx <span style='color:#308080; '>=</span> blockIdx<span style='color:#308080; '>.</span>x <span style='color:#308080; '>*</span> blockDim<span style='color:#308080; '>.</span>x <span style='color:#308080; '>+</span> threadIdx<span style='color:#308080; '>.</span>x<span style='color:#406080; '>;</span>
<span style='color:#200080; font-weight:bold; '>if</span> <span style='color:#308080; '>(</span>idx <span style='color:#308080; '>&lt;</span> a<span style='color:#308080; '>.</span>size<span style='color:#308080; '>)</span>
<span style='color:#406080; '>{</span>
_T mass <span style='color:#308080; '>=</span> a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Mass<span style='color:#406080; '>;</span>
a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Velocity<span style='color:#308080; '>.</span>x <span style='color:#308080; '>+</span><span style='color:#308080; '>=</span> dt <span style='color:#308080; '>*</span> <span style='color:#308080; '>(</span>a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Force<span style='color:#308080; '>.</span>x <span style='color:#308080; '>/</span> mass<span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Velocity<span style='color:#308080; '>.</span>y <span style='color:#308080; '>+</span><span style='color:#308080; '>=</span> dt <span style='color:#308080; '>*</span> <span style='color:#308080; '>(</span>a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Force<span style='color:#308080; '>.</span>y <span style='color:#308080; '>/</span> mass<span style='color:#308080; '>)</span><span style='color:#406080; '>;</span>
a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Position<span style='color:#308080; '>.</span>x <span style='color:#308080; '>+</span><span style='color:#308080; '>=</span> dt <span style='color:#308080; '>*</span> a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Velocity<span style='color:#308080; '>.</span>x<span style='color:#406080; '>;</span>
a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Position<span style='color:#308080; '>.</span>y <span style='color:#308080; '>+</span><span style='color:#308080; '>=</span> dt <span style='color:#308080; '>*</span> a<span style='color:#308080; '>.</span>array<span style='color:#308080; '>[</span>idx<span style='color:#308080; '>]</span><span style='color:#308080; '>.</span>Velocity<span style='color:#308080; '>.</span>y<span style='color:#406080; '>;</span>
<span style='color:#406080; '>}</span>
<span style='color:#406080; '>}</span>
</div>
 
==== Launch Control ====
 
We used the following calculations to determine the the number of threads and blocks to launch with:
 
<div style='color:#000020;background:#f6f8ff;'>
numThreads_ <span style='color:#308080; '>=</span> prop<span style='color:#308080; '>.</span>maxThreadsPerMultiProcessor <span style='color:#308080; '>/</span> maxBlocks<span style='color:#406080; '>;</span>
numBlocks_ <span style='color:#308080; '>=</span> <span style='color:#308080; '>(</span>bodies_<span style='color:#308080; '>.</span>size<span style='color:#308080; '>(</span><span style='color:#308080; '>)</span> <span style='color:#308080; '>+</span> numThreads_ <span style='color:#308080; '>-</span> <span style='color:#008c00; '>1</span><span style='color:#308080; '>)</span> <span style='color:#308080; '>/</span> numThreads_<span style='color:#406080; '>;</span>
numThreads_ <span style='color:#308080; '>=</span> <span style='color:#308080; '>(</span>numThreads_ <span style='color:#308080; '>+</span> <span style='color:#008c00; '>1</span><span style='color:#308080; '>)</span> <span style='color:#308080; '>&amp;</span> <span style='color:#308080; '>~</span><span style='color:#008c00; '>1</span><span style='color:#406080; '>;</span>
</div>
 
==== Dynamic Shared Memory (not implemented) ====
 
This is the roundabout way we thought of, of how to send in chunks to the kernel so that the kernel can handle shared memory size of no greater than the max shared memory size of the GPU:
<div style='color:#000000;background:#ffffff;'>
CHUNKSIZE <span style='color:#808030; '>=</span> <span style='color:#008c00; '>512</span><span style='color:#800080; '>;</span>
shared_ <span style='color:#808030; '>=</span> CHUNKSIZE <span style='color:#808030; '>*</span> <span style='color:#800000; font-weight:bold; '>sizeof</span><span style='color:#808030; '>(</span>SimBody<span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
<span style='color:#800000; font-weight:bold; '>while</span> <span style='color:#808030; '>(</span>chunks <span style='color:#808030; '>></span> <span style='color:#008c00; '>0</span><span style='color:#808030; '>)</span>
<span style='color:#800080; '>{</span>
BodyArray ba <span style='color:#808030; '>=</span> <span style='color:#800080; '>{</span> <span style='color:#808030; '>&amp;</span>arr<span style='color:#808030; '>.</span><span style='color:#603000; '>array</span><span style='color:#808030; '>[</span>index<span style='color:#808030; '>]</span><span style='color:#808030; '>,</span> CHUNKSIZE <span style='color:#800080; '>}</span><span style='color:#800080; '>;</span>
SimCalc <span style='color:#808030; '>&lt;</span><span style='color:#808030; '>&lt;</span><span style='color:#808030; '>&lt;</span> numBlocks_<span style='color:#808030; '>,</span> numThreads_<span style='color:#808030; '>,</span> shared_ <span style='color:#808030; '>></span><span style='color:#808030; '>></span><span style='color:#808030; '>></span><span style='color:#808030; '>(</span>ba<span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
cudaThreadSynchronize<span style='color:#808030; '>(</span><span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
SimTick <span style='color:#808030; '>&lt;</span><span style='color:#808030; '>&lt;</span><span style='color:#808030; '>&lt;</span> numBlocks_<span style='color:#808030; '>,</span> numThreads_<span style='color:#808030; '>,</span> shared_ <span style='color:#808030; '>></span><span style='color:#808030; '>></span><span style='color:#808030; '>></span><span style='color:#808030; '>(</span>ba<span style='color:#808030; '>,</span> timeStep<span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
cudaThreadSynchronize<span style='color:#808030; '>(</span><span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
index <span style='color:#808030; '>+</span><span style='color:#808030; '>=</span> CHUNKSIZE<span style='color:#800080; '>;</span>
<span style='color:#808030; '>-</span><span style='color:#808030; '>-</span>chunks<span style='color:#800080; '>;</span>
<span style='color:#800080; '>}</span>
chunks <span style='color:#808030; '>=</span> arr<span style='color:#808030; '>.</span>size <span style='color:#808030; '>/</span> CHUNKSIZE <span style='color:#808030; '>+</span> <span style='color:#008c00; '>1</span><span style='color:#800080; '>;</span>
index <span style='color:#808030; '>=</span> <span style='color:#008c00; '>0</span><span style='color:#800080; '>;</span>
</div>
It handles calculations in chunks so that the kernel can do calculations on body sizes of more than 1175 for devices with compute capabilities of 3.x.
 
Here is what the shared memory kernels would look like (not implemented because not correct):
<div style='color:#000000;background:#ffffff;'>
<span style='color:#800000; font-weight:bold; '>void</span> __global__ SimCalc<span style='color:#808030; '>(</span>BodyArray a<span style='color:#808030; '>)</span>
<span style='color:#800080; '>{</span>
int_fast32_t idx <span style='color:#808030; '>=</span> blockIdx<span style='color:#808030; '>.</span>x <span style='color:#808030; '>*</span> blockDim<span style='color:#808030; '>.</span>x <span style='color:#808030; '>+</span> threadIdx<span style='color:#808030; '>.</span>x<span style='color:#800080; '>;</span>
<span style='color:#800000; font-weight:bold; '>int</span> tid <span style='color:#808030; '>=</span> threadIdx<span style='color:#808030; '>.</span>x<span style='color:#800080; '>;</span>
<span style='color:#800000; font-weight:bold; '>extern</span> __shared__ SimBody sa<span style='color:#808030; '>[</span><span style='color:#808030; '>]</span><span style='color:#800080; '>;</span>
<span style='color:#800000; font-weight:bold; '>if</span> <span style='color:#808030; '>(</span>idx <span style='color:#808030; '>></span><span style='color:#808030; '>=</span> a<span style='color:#808030; '>.</span>size<span style='color:#808030; '>)</span>
<span style='color:#800000; font-weight:bold; '>return</span><span style='color:#800080; '>;</span>
sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span> <span style='color:#808030; '>=</span> a<span style='color:#808030; '>.</span><span style='color:#603000; '>array</span><span style='color:#808030; '>[</span>idx<span style='color:#808030; '>]</span><span style='color:#800080; '>;</span>
__syncthreads<span style='color:#808030; '>(</span><span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
<span style='color:#800000; font-weight:bold; '>const</span> _T G <span style='color:#808030; '>=</span> <span style='color:#008000; '>6.67384</span><span style='color:#006600; '>f</span> <span style='color:#808030; '>*</span> <span style='color:#603000; '>pow</span><span style='color:#808030; '>(</span><span style='color:#008000; '>10.0</span><span style='color:#006600; '>f</span><span style='color:#808030; '>,</span> <span style='color:#808030; '>-</span><span style='color:#008000; '>11.0</span><span style='color:#006600; '>f</span><span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
<span style='color:#696969; '>//precompute positions at index</span>
<span style='color:#800000; font-weight:bold; '>const</span> _T px <span style='color:#808030; '>=</span> sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Position<span style='color:#808030; '>.</span>x<span style='color:#800080; '>;</span>
<span style='color:#800000; font-weight:bold; '>const</span> _T py <span style='color:#808030; '>=</span> sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Position<span style='color:#808030; '>.</span>y<span style='color:#800080; '>;</span>
<span style='color:#696969; '>//mass at the index</span>
<span style='color:#800000; font-weight:bold; '>const</span> _T M_idx <span style='color:#808030; '>=</span> G<span style='color:#808030; '>*</span>sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Mass<span style='color:#800080; '>;</span>
sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Force <span style='color:#808030; '>=</span> vec2_t<span style='color:#808030; '>(</span><span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
<span style='color:#800000; font-weight:bold; '>for</span> <span style='color:#808030; '>(</span>int_fast32_t j<span style='color:#808030; '>(</span><span style='color:#008c00; '>0</span><span style='color:#808030; '>)</span><span style='color:#800080; '>;</span> j <span style='color:#808030; '>!</span><span style='color:#808030; '>=</span> a<span style='color:#808030; '>.</span>size<span style='color:#800080; '>;</span> <span style='color:#808030; '>+</span><span style='color:#808030; '>+</span>j<span style='color:#808030; '>)</span> <span style='color:#800080; '>{</span>
<span style='color:#800000; font-weight:bold; '>if</span> <span style='color:#808030; '>(</span>idx <span style='color:#808030; '>!</span><span style='color:#808030; '>=</span> j<span style='color:#808030; '>)</span> <span style='color:#800080; '>{</span>
_T dx <span style='color:#808030; '>=</span> a<span style='color:#808030; '>.</span><span style='color:#603000; '>array</span><span style='color:#808030; '>[</span>j<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Position<span style='color:#808030; '>.</span>x <span style='color:#808030; '>-</span> px<span style='color:#800080; '>;</span>
_T dy <span style='color:#808030; '>=</span> a<span style='color:#808030; '>.</span><span style='color:#603000; '>array</span><span style='color:#808030; '>[</span>j<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Position<span style='color:#808030; '>.</span>y <span style='color:#808030; '>-</span> py<span style='color:#800080; '>;</span>
_T r <span style='color:#808030; '>=</span> <span style='color:#603000; '>sqrt</span><span style='color:#808030; '>(</span>dx<span style='color:#808030; '>*</span>dx <span style='color:#808030; '>+</span> dy<span style='color:#808030; '>*</span>dy<span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
_T F <span style='color:#808030; '>=</span> <span style='color:#808030; '>(</span>M_idx<span style='color:#808030; '>*</span>a<span style='color:#808030; '>.</span><span style='color:#603000; '>array</span><span style='color:#808030; '>[</span>j<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Mass<span style='color:#808030; '>)</span><span style='color:#808030; '>/</span><span style='color:#808030; '>(</span>r<span style='color:#808030; '>*</span>r<span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Force<span style='color:#808030; '>.</span>x <span style='color:#808030; '>+</span><span style='color:#808030; '>=</span> F <span style='color:#808030; '>*</span> <span style='color:#808030; '>(</span>dx <span style='color:#808030; '>/</span> r<span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Force<span style='color:#808030; '>.</span>y <span style='color:#808030; '>+</span><span style='color:#808030; '>=</span> F <span style='color:#808030; '>*</span> <span style='color:#808030; '>(</span>dy <span style='color:#808030; '>/</span> r<span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
<span style='color:#800080; '>}</span>
__syncthreads<span style='color:#808030; '>(</span><span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
<span style='color:#800080; '>}</span>
a<span style='color:#808030; '>.</span><span style='color:#603000; '>array</span><span style='color:#808030; '>[</span>idx<span style='color:#808030; '>]</span> <span style='color:#808030; '>=</span> sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#800080; '>;</span>
<span style='color:#800080; '>}</span>
 
<span style='color:#800000; font-weight:bold; '>void</span> __global__ SimTick<span style='color:#808030; '>(</span>BodyArray a<span style='color:#808030; '>,</span> _T dt<span style='color:#808030; '>)</span>
<span style='color:#800080; '>{</span>
<span style='color:#800000; font-weight:bold; '>int</span> idx <span style='color:#808030; '>=</span> blockIdx<span style='color:#808030; '>.</span>x <span style='color:#808030; '>*</span> blockDim<span style='color:#808030; '>.</span>x <span style='color:#808030; '>+</span> threadIdx<span style='color:#808030; '>.</span>x<span style='color:#800080; '>;</span>
<span style='color:#800000; font-weight:bold; '>int</span> tid <span style='color:#808030; '>=</span> threadIdx<span style='color:#808030; '>.</span>x<span style='color:#800080; '>;</span>
<span style='color:#800000; font-weight:bold; '>extern</span> __shared__ SimBody sa<span style='color:#808030; '>[</span><span style='color:#808030; '>]</span><span style='color:#800080; '>;</span>
<span style='color:#800000; font-weight:bold; '>if</span> <span style='color:#808030; '>(</span>idx <span style='color:#808030; '>></span><span style='color:#808030; '>=</span> a<span style='color:#808030; '>.</span>size<span style='color:#808030; '>)</span>
<span style='color:#800000; font-weight:bold; '>return</span><span style='color:#800080; '>;</span>
sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span> <span style='color:#808030; '>=</span> a<span style='color:#808030; '>.</span><span style='color:#603000; '>array</span><span style='color:#808030; '>[</span>idx<span style='color:#808030; '>]</span><span style='color:#800080; '>;</span>
__syncthreads<span style='color:#808030; '>(</span><span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
_T mass <span style='color:#808030; '>=</span> sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Mass<span style='color:#800080; '>;</span>
sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Velocity<span style='color:#808030; '>.</span>x <span style='color:#808030; '>+</span><span style='color:#808030; '>=</span> dt <span style='color:#808030; '>*</span> <span style='color:#808030; '>(</span>sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Force<span style='color:#808030; '>.</span>x <span style='color:#808030; '>/</span> mass<span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Velocity<span style='color:#808030; '>.</span>y <span style='color:#808030; '>+</span><span style='color:#808030; '>=</span> dt <span style='color:#808030; '>*</span> <span style='color:#808030; '>(</span>sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Force<span style='color:#808030; '>.</span>y <span style='color:#808030; '>/</span> mass<span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Position<span style='color:#808030; '>.</span>x <span style='color:#808030; '>+</span><span style='color:#808030; '>=</span> dt <span style='color:#808030; '>*</span> sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Velocity<span style='color:#808030; '>.</span>x<span style='color:#800080; '>;</span>
sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Position<span style='color:#808030; '>.</span>y <span style='color:#808030; '>+</span><span style='color:#808030; '>=</span> dt <span style='color:#808030; '>*</span> sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#808030; '>.</span>Velocity<span style='color:#808030; '>.</span>y<span style='color:#800080; '>;</span>
__syncthreads<span style='color:#808030; '>(</span><span style='color:#808030; '>)</span><span style='color:#800080; '>;</span>
a<span style='color:#808030; '>.</span><span style='color:#603000; '>array</span><span style='color:#808030; '>[</span>idx<span style='color:#808030; '>]</span> <span style='color:#808030; '>=</span> sa<span style='color:#808030; '>[</span>tid<span style='color:#808030; '>]</span><span style='color:#800080; '>;</span>
<span style='color:#800080; '>}</span>
</div>
1
edit

Navigation menu