Difference between revisions of "Team failure"
(Created page with '{{GPU621/DPS921 Index | 20161}} = Team Failure = == Team Members == # [mailto:jmiannandrea@senecacollege.ca?subject=gpu John Iannandrea], Some responsibility # [mailto:@senecac…') |
(→Team Members) |
||
(5 intermediate revisions by 2 users not shown) | |||
Line 2: | Line 2: | ||
= Team Failure = | = Team Failure = | ||
== Team Members == | == Team Members == | ||
− | # [mailto:jmiannandrea@senecacollege.ca?subject=gpu John Iannandrea], | + | # [mailto:jmiannandrea@senecacollege.ca?subject=gpu John Iannandrea], TBB Heat diffusion |
− | # [mailto:@senecacollege.ca?subject=gpu Colin Campbell], | + | # [mailto:@senecacollege.ca?subject=gpu Colin Campbell], OpenMP Heat diffusion |
− | # . | + | # [mailto:mlucic3@senecacollege.ca?subject=gpu Mateya Lucic], Cilk Plus Heat diffusion |
− | [mailto:jmiannandrea@senecacollege.ca,@senecacollege.ca?subject=GPU Email All] | + | [mailto:jmiannandrea@senecacollege.ca,mlucic3@senecacollege.ca?subject=GPU Email All] |
− | == | + | == Assignment == |
− | === | + | Our assignment was to implement OMP, TBB, and Cilk Plus versions of a 2d diffusion algorithm. |
− | === | + | |
− | === | + | === Serial === |
+ | This is the serial version of the code we have parallelized | ||
+ | |||
+ | |||
+ | <pre>class SerialDiffuser : public IDiffuser { | ||
+ | protected: | ||
+ | void evolveTimestep(){ | ||
+ | for (int row = 1; row < N - 1; row++) { | ||
+ | for (int col = 1; col < N - 1; col++) { | ||
+ | float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; | ||
+ | float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; | ||
+ | |||
+ | u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | public: | ||
+ | SerialDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} | ||
+ | void init(){ | ||
+ | for (int row = 0; row < N; row++) { | ||
+ | for (int col = 0; col < N; col++) { | ||
+ | if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) | ||
+ | & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) | ||
+ | ui[row * N + col] = 1.0; | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | void compute(){ | ||
+ | for (int m = 1; m < timeSteps; m++) { | ||
+ | evolveTimestep(); | ||
+ | std::copy(u, u + N * N, ui); | ||
+ | } | ||
+ | } | ||
+ | };</pre> | ||
+ | |||
+ | ==== Omp ==== | ||
+ | |||
+ | |||
+ | <pre>class OMPDiffuser : public IDiffuser { | ||
+ | protected: | ||
+ | void evolveTimestep(){ | ||
+ | #pragma omp parallel for | ||
+ | for (int row = 1; row < N - 1; row++) { | ||
+ | for (int col = 1; col < N - 1; col++) { | ||
+ | float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; | ||
+ | float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; | ||
+ | |||
+ | u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | public: | ||
+ | OMPDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} | ||
+ | void init(){ | ||
+ | #pragma omp parallel for | ||
+ | for (int row = 0; row < N; row++) { | ||
+ | for (int col = 0; col < N; col++) { | ||
+ | if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) | ||
+ | & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) | ||
+ | ui[row * N + col] = 1.0; | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | void compute(){ | ||
+ | for (int m = 1; m < timeSteps; m++) { | ||
+ | evolveTimestep(); | ||
+ | std::copy(u, u + N * N, ui); | ||
+ | } | ||
+ | } | ||
+ | };</pre> | ||
+ | |||
+ | ==== Cilk ==== | ||
+ | |||
+ | <pre> | ||
+ | class CilkDiffuser : public IDiffuser { | ||
+ | protected: | ||
+ | void evolveTimestep(){ | ||
+ | cilk_for(int row = 1; row < N - 1; row++) { | ||
+ | for (int col = 1; col < N - 1; col++) { | ||
+ | float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; | ||
+ | float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; | ||
+ | |||
+ | u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | public: | ||
+ | CilkDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} | ||
+ | void init(){ | ||
+ | cilk_for(int row = 0; row < N; row++) { | ||
+ | for (int col = 0; col < N; col++) { | ||
+ | if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) | ||
+ | & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) | ||
+ | ui[row * N + col] = 1.0; | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | void compute(){ | ||
+ | cilk_for(int m = 1; m < timeSteps; m++) { | ||
+ | evolveTimestep(); | ||
+ | u[0:N*N] = ui[0:N*N]; | ||
+ | } | ||
+ | } | ||
+ | }; | ||
+ | </pre> | ||
+ | |||
+ | ==== TBB ==== | ||
+ | |||
+ | |||
+ | <pre>class TBBEvolve { | ||
+ | float* u; | ||
+ | float* ui; | ||
+ | float delta, deltaT; | ||
+ | const float diff = 0.5; | ||
+ | int N; | ||
+ | public: | ||
+ | TBBEvolve(float* _u, float* _ui, float d, float dt, float n) : u(_u), ui(_ui), delta(d), deltaT(dt), N(n) {} | ||
+ | void operator()(tbb::blocked_range2d<int> r) const{ | ||
+ | for (int row = r.rows().begin(); row < r.rows().end(); row++) { | ||
+ | #pragma simd | ||
+ | for (int col = r.cols().begin(); col < r.cols().end(); col++) { | ||
+ | float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; | ||
+ | float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; | ||
+ | |||
+ | u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | }; | ||
+ | |||
+ | class TBBDiffuser : public IDiffuser { | ||
+ | protected: | ||
+ | void evolveTimestep(){ | ||
+ | |||
+ | } | ||
+ | public: | ||
+ | TBBDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} | ||
+ | void init(){ | ||
+ | for (int row = 0; row < N; row++) { | ||
+ | for (int col = 0; col < N; col++) { | ||
+ | if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) | ||
+ | & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) | ||
+ | ui[row * N + col] = 1.0; | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | void compute(){ | ||
+ | for (int m = 1; m < timeSteps; m++) { | ||
+ | tbb::blocked_range2d<int> r(1, N - 1, 1, N - 1); | ||
+ | tbb::parallel_for(r, TBBEvolve(u, ui, delta, deltaT, N)); | ||
+ | } | ||
+ | } | ||
+ | };</pre> | ||
+ | |||
+ | |||
+ | === Results === | ||
+ | |||
+ | What we found was that all the parallelization methods were all very similar. We also tested this with cuda and found cuda to be the fastest. | ||
+ | |||
+ | [[Image:GeyIa97.png|640px]] | ||
+ | |||
+ | [[Image:TP4107j.png|300px]] |
Latest revision as of 21:20, 14 April 2016
GPU621/DPS921 | Participants | Groups and Projects | Resources | Glossary
Contents
Team Failure
Team Members
- John Iannandrea, TBB Heat diffusion
- Colin Campbell, OpenMP Heat diffusion
- Mateya Lucic, Cilk Plus Heat diffusion
Assignment
Our assignment was to implement OMP, TBB, and Cilk Plus versions of a 2d diffusion algorithm.
Serial
This is the serial version of the code we have parallelized
class SerialDiffuser : public IDiffuser { protected: void evolveTimestep(){ for (int row = 1; row < N - 1; row++) { for (int col = 1; col < N - 1; col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } } public: SerialDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ for (int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) ui[row * N + col] = 1.0; } } } void compute(){ for (int m = 1; m < timeSteps; m++) { evolveTimestep(); std::copy(u, u + N * N, ui); } } };
Omp
class OMPDiffuser : public IDiffuser { protected: void evolveTimestep(){ #pragma omp parallel for for (int row = 1; row < N - 1; row++) { for (int col = 1; col < N - 1; col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } } public: OMPDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ #pragma omp parallel for for (int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) ui[row * N + col] = 1.0; } } } void compute(){ for (int m = 1; m < timeSteps; m++) { evolveTimestep(); std::copy(u, u + N * N, ui); } } };
Cilk
class CilkDiffuser : public IDiffuser { protected: void evolveTimestep(){ cilk_for(int row = 1; row < N - 1; row++) { for (int col = 1; col < N - 1; col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } } public: CilkDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ cilk_for(int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) ui[row * N + col] = 1.0; } } } void compute(){ cilk_for(int m = 1; m < timeSteps; m++) { evolveTimestep(); u[0:N*N] = ui[0:N*N]; } } };
TBB
class TBBEvolve { float* u; float* ui; float delta, deltaT; const float diff = 0.5; int N; public: TBBEvolve(float* _u, float* _ui, float d, float dt, float n) : u(_u), ui(_ui), delta(d), deltaT(dt), N(n) {} void operator()(tbb::blocked_range2d<int> r) const{ for (int row = r.rows().begin(); row < r.rows().end(); row++) { #pragma simd for (int col = r.cols().begin(); col < r.cols().end(); col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } } }; class TBBDiffuser : public IDiffuser { protected: void evolveTimestep(){ } public: TBBDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ for (int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) ui[row * N + col] = 1.0; } } } void compute(){ for (int m = 1; m < timeSteps; m++) { tbb::blocked_range2d<int> r(1, N - 1, 1, N - 1); tbb::parallel_for(r, TBBEvolve(u, ui, delta, deltaT, N)); } } };
Results
What we found was that all the parallelization methods were all very similar. We also tested this with cuda and found cuda to be the fastest.