Team failure
GPU621/DPS921 | Participants | Groups and Projects | Resources | Glossary
Contents
Team Failure
Team Members
- John Iannandrea, TBB Heat diffusion
- Colin Campbell, OpenMP Heat diffusion
- Mateya Lucic, Cilk Plus Heat diffusion
Assignment
Our assignment was to implement OMP, TBB, and Cilk Plus versions of a 2d diffusion algorithm.
Serial
This is the serial version of the code we have parallelized
class SerialDiffuser : public IDiffuser { protected: void evolveTimestep(){ for (int row = 1; row < N - 1; row++) { for (int col = 1; col < N - 1; col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } } public: SerialDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ for (int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) ui[row * N + col] = 1.0; } } } void compute(){ for (int m = 1; m < timeSteps; m++) { evolveTimestep(); std::copy(u, u + N * N, ui); } } };
Omp
class OMPDiffuser : public IDiffuser { protected: void evolveTimestep(){ #pragma omp parallel for for (int row = 1; row < N - 1; row++) { for (int col = 1; col < N - 1; col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } } public: OMPDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ #pragma omp parallel for for (int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) ui[row * N + col] = 1.0; } } } void compute(){ for (int m = 1; m < timeSteps; m++) { evolveTimestep(); std::copy(u, u + N * N, ui); } } };
Cilk
class CilkDiffuser : public IDiffuser { protected: void evolveTimestep(){ cilk_for(int row = 1; row < N - 1; row++) { for (int col = 1; col < N - 1; col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } } public: CilkDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ cilk_for(int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) ui[row * N + col] = 1.0; } } } void compute(){ cilk_for(int m = 1; m < timeSteps; m++) { evolveTimestep(); u[0:N*N] = ui[0:N*N]; } } };
TBB
class TBBEvolve { float* u; float* ui; float delta, deltaT; const float diff = 0.5; int N; public: TBBEvolve(float* _u, float* _ui, float d, float dt, float n) : u(_u), ui(_ui), delta(d), deltaT(dt), N(n) {} void operator()(tbb::blocked_range2d<int> r) const{ for (int row = r.rows().begin(); row < r.rows().end(); row++) { #pragma simd for (int col = r.cols().begin(); col < r.cols().end(); col++) { float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta; float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta; u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy); } } } }; class TBBDiffuser : public IDiffuser { protected: void evolveTimestep(){ } public: TBBDiffuser(int _N, int _T) : IDiffuser(_N, _T) {} void init(){ for (int row = 0; row < N; row++) { for (int col = 0; col < N; col++) { if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1) & (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05)) ui[row * N + col] = 1.0; } } } void compute(){ for (int m = 1; m < timeSteps; m++) { tbb::blocked_range2d<int> r(1, N - 1, 1, N - 1); tbb::parallel_for(r, TBBEvolve(u, ui, delta, deltaT, N)); } } };
Results
What we found was that all the parallelization methods were all very similar. We also tested this with cuda and found cuda to be the fastest.