Team failure

From CDOT Wiki
Jump to: navigation, search


GPU621/DPS921 | Participants | Groups and Projects | Resources | Glossary

Team Failure

Team Members

  1. John Iannandrea, TBB Heat diffusion
  2. Colin Campbell, OpenMP Heat diffusion
  3. Mateya Lucic, Cilk Plus Heat diffusion

Email All

Assignment

Our assignment was to implement OMP, TBB, and Cilk Plus versions of a 2d diffusion algorithm.

Serial

This is the serial version of the code we have parallelized


class SerialDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){
		for (int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
public:
	SerialDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			evolveTimestep();
			std::copy(u, u + N * N, ui);
		}
	}
};

Omp

class OMPDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){
		#pragma omp parallel for
		for (int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
public:
	OMPDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		#pragma omp parallel for
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			evolveTimestep();
			std::copy(u, u + N * N, ui);
		}
	}
};

Cilk

class CilkDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){
		cilk_for(int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
public:
	CilkDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		cilk_for(int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		cilk_for(int m = 1; m < timeSteps; m++) {
			evolveTimestep();
			u[0:N*N] = ui[0:N*N];
		}
	}
};

TBB

class TBBEvolve {
	float* u;
	float* ui;
	float delta, deltaT;
	const float diff = 0.5;
	int N;
public:
	TBBEvolve(float* _u, float* _ui, float d, float dt, float n) : u(_u), ui(_ui), delta(d), deltaT(dt), N(n) {}
	void operator()(tbb::blocked_range2d<int> r) const{
		for (int row = r.rows().begin(); row < r.rows().end(); row++) {
			#pragma simd
			for (int col = r.cols().begin(); col < r.cols().end(); col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
};

class TBBDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){

	}
public:
	TBBDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			tbb::blocked_range2d<int> r(1, N - 1, 1, N - 1);
			tbb::parallel_for(r, TBBEvolve(u, ui, delta, deltaT, N));
		}
	}
};


Results

What we found was that all the parallelization methods were all very similar. We also tested this with cuda and found cuda to be the fastest.

GeyIa97.png

TP4107j.png