Difference between revisions of "Team failure"
(→Progress) |
(→Team Members) |
||
(3 intermediate revisions by the same user not shown) | |||
Line 4: | Line 4: | ||
# [mailto:jmiannandrea@senecacollege.ca?subject=gpu John Iannandrea], TBB Heat diffusion | # [mailto:jmiannandrea@senecacollege.ca?subject=gpu John Iannandrea], TBB Heat diffusion | ||
# [mailto:@senecacollege.ca?subject=gpu Colin Campbell], OpenMP Heat diffusion | # [mailto:@senecacollege.ca?subject=gpu Colin Campbell], OpenMP Heat diffusion | ||
− | # [mailto:@senecacollege.ca?subject=gpu Mateya Lucic], Cilk Plus Heat diffusion | + | # [mailto:mlucic3@senecacollege.ca?subject=gpu Mateya Lucic], Cilk Plus Heat diffusion |
[mailto:jmiannandrea@senecacollege.ca,mlucic3@senecacollege.ca?subject=GPU Email All] | [mailto:jmiannandrea@senecacollege.ca,mlucic3@senecacollege.ca?subject=GPU Email All] | ||
Line 166: | Line 166: | ||
=== Results === | === Results === | ||
+ | |||
+ | What we found was that all the parallelization methods were all very similar. We also tested this with cuda and found cuda to be the fastest. | ||
+ | |||
+ | [[Image:GeyIa97.png|640px]] | ||
+ | |||
+ | [[Image:TP4107j.png|300px]] |
Latest revision as of 21:20, 14 April 2016
GPU621/DPS921 | Participants | Groups and Projects | Resources | Glossary
Contents
Team Failure
Team Members
- John Iannandrea, TBB Heat diffusion
- Colin Campbell, OpenMP Heat diffusion
- Mateya Lucic, Cilk Plus Heat diffusion
Assignment
Our assignment was to implement OMP, TBB, and Cilk Plus versions of a 2d diffusion algorithm.
Serial
This is the serial version of the code we have parallelized
class SerialDiffuser : public IDiffuser { protected: void evolveTimestep(){ for (int row = 1; row < N - 1; row++) { for (int col = 1; col < N - 1; col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } } public: SerialDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ for (int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) ui[row * N + col] = 1.0; } } } void compute(){ for (int m = 1; m < timeSteps; m++) { evolveTimestep(); std::copy(u, u + N * N, ui); } } };
Omp
class OMPDiffuser : public IDiffuser { protected: void evolveTimestep(){ #pragma omp parallel for for (int row = 1; row < N - 1; row++) { for (int col = 1; col < N - 1; col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } } public: OMPDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ #pragma omp parallel for for (int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) ui[row * N + col] = 1.0; } } } void compute(){ for (int m = 1; m < timeSteps; m++) { evolveTimestep(); std::copy(u, u + N * N, ui); } } };
Cilk
class CilkDiffuser : public IDiffuser { protected: void evolveTimestep(){ cilk_for(int row = 1; row < N - 1; row++) { for (int col = 1; col < N - 1; col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } } public: CilkDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ cilk_for(int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) ui[row * N + col] = 1.0; } } } void compute(){ cilk_for(int m = 1; m < timeSteps; m++) { evolveTimestep(); u[0:N*N] = ui[0:N*N]; } } };
TBB
class TBBEvolve { float* u; float* ui; float delta, deltaT; const float diff = 0.5; int N; public: TBBEvolve(float* _u, float* _ui, float d, float dt, float n) : u(_u), ui(_ui), delta(d), deltaT(dt), N(n) {} void operator()(tbb::blocked_range2d<int> r) const{ for (int row = r.rows().begin(); row < r.rows().end(); row++) { #pragma simd for (int col = r.cols().begin(); col < r.cols().end(); col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } } }; class TBBDiffuser : public IDiffuser { protected: void evolveTimestep(){ } public: TBBDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ for (int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) ui[row * N + col] = 1.0; } } } void compute(){ for (int m = 1; m < timeSteps; m++) { tbb::blocked_range2d<int> r(1, N - 1, 1, N - 1); tbb::parallel_for(r, TBBEvolve(u, ui, delta, deltaT, N)); } } };
Results
What we found was that all the parallelization methods were all very similar. We also tested this with cuda and found cuda to be the fastest.