Latest revision as of 21:20, 14 April 2016

GPU621/DPS921 | Participants | Groups and Projects | Resources | Glossary

Team Failure

Team Members

John Iannandrea, TBB Heat diffusion
Colin Campbell, OpenMP Heat diffusion
Mateya Lucic, Cilk Plus Heat diffusion

Email All

Assignment

Our assignment was to implement OMP, TBB, and Cilk Plus versions of a 2d diffusion algorithm.

Serial

This is the serial version of the code we have parallelized

class SerialDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){
		for (int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
public:
	SerialDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			evolveTimestep();
			std::copy(u, u + N * N, ui);
		}
	}
};

Omp

class OMPDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){
		#pragma omp parallel for
		for (int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
public:
	OMPDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		#pragma omp parallel for
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			evolveTimestep();
			std::copy(u, u + N * N, ui);
		}
	}
};

Cilk

class CilkDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){
		cilk_for(int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
public:
	CilkDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		cilk_for(int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		cilk_for(int m = 1; m < timeSteps; m++) {
			evolveTimestep();
			u[0:N*N] = ui[0:N*N];
		}
	}
};

TBB

class TBBEvolve {
	float* u;
	float* ui;
	float delta, deltaT;
	const float diff = 0.5;
	int N;
public:
	TBBEvolve(float* _u, float* _ui, float d, float dt, float n) : u(_u), ui(_ui), delta(d), deltaT(dt), N(n) {}
	void operator()(tbb::blocked_range2d<int> r) const{
		for (int row = r.rows().begin(); row < r.rows().end(); row++) {
			#pragma simd
			for (int col = r.cols().begin(); col < r.cols().end(); col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
};

class TBBDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){

	}
public:
	TBBDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			tbb::blocked_range2d<int> r(1, N - 1, 1, N - 1);
			tbb::parallel_for(r, TBBEvolve(u, ui, delta, deltaT, N));
		}
	}
};

Results

What we found was that all the parallelization methods were all very similar. We also tested this with cuda and found cuda to be the fastest.

Difference between revisions of "Team failure"

Latest revision as of 21:20, 14 April 2016

Contents

Team Failure

Team Members

Assignment

Serial

Omp

Cilk

TBB

Results

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Navigation

get involved with CDOT

courses

course projects

links

Tools

@@ Line 2: / Line 2: @@
 = Team Failure =
 == Team Members ==
-# [mailto:jmiannandrea@senecacollege.ca?subject=gpu John Iannandrea], Some responsibility
+# [mailto:jmiannandrea@senecacollege.ca?subject=gpu John Iannandrea], TBB Heat diffusion
-# [mailto:@senecacollege.ca?subject=gpu Colin Campbell], Some other responsibility
+# [mailto:@senecacollege.ca?subject=gpu Colin Campbell], OpenMP Heat diffusion
-# ...
+# [mailto:mlucic3@senecacollege.ca?subject=gpu Mateya Lucic], Cilk Plus Heat diffusion
-[mailto:jmiannandrea@senecacollege.ca,@senecacollege.ca?subject=GPU Email All]
+[mailto:jmiannandrea@senecacollege.ca,mlucic3@senecacollege.ca?subject=GPU Email All]
-== Progress ==
+== Assignment ==
-=== Assignment 1 ===
+Our assignment was to implement OMP, TBB, and Cilk Plus versions of a 2d diffusion algorithm.
-=== Assignment 2 ===
-=== Assignment 3 ===
+=== Serial ===
+This is the serial version of the code we have parallelized
+<pre>class SerialDiffuser : public IDiffuser {
+protected:
+	void evolveTimestep(){
+		for (int row = 1; row < N - 1; row++) {
+			for (int col = 1; col < N - 1; col++) {
+				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
+				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;
+				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
+			}
+		}
+	}
+public:
+	SerialDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
+	void init(){
+		for (int row = 0; row < N; row++) {
+			for (int col = 0; col < N; col++) {
+				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
+					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
+					ui[row * N + col] = 1.0;
+			}
+		}
+	}
+	void compute(){
+		for (int m = 1; m < timeSteps; m++) {
+			evolveTimestep();
+			std::copy(u, u + N * N, ui);
+		}
+	}
+};</pre>
+==== Omp ====
+<pre>class OMPDiffuser : public IDiffuser {
+protected:
+	void evolveTimestep(){
+		#pragma omp parallel for
+		for (int row = 1; row < N - 1; row++) {
+			for (int col = 1; col < N - 1; col++) {
+				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
+				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;
+				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
+			}
+		}
+	}
+public:
+	OMPDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
+	void init(){
+		#pragma omp parallel for
+		for (int row = 0; row < N; row++) {
+			for (int col = 0; col < N; col++) {
+				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
+					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
+					ui[row * N + col] = 1.0;
+			}
+		}
+	}
+	void compute(){
+		for (int m = 1; m < timeSteps; m++) {
+			evolveTimestep();
+			std::copy(u, u + N * N, ui);
+		}
+	}
+};</pre>
+==== Cilk ====
+<pre>
+class CilkDiffuser : public IDiffuser {
+protected:
+	void evolveTimestep(){
+		cilk_for(int row = 1; row < N - 1; row++) {
+			for (int col = 1; col < N - 1; col++) {
+				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
+				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;
+				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
+			}
+		}
+	}
+public:
+	CilkDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
+	void init(){
+		cilk_for(int row = 0; row < N; row++) {
+			for (int col = 0; col < N; col++) {
+				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
+					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
+					ui[row * N + col] = 1.0;
+			}
+		}
+	}
+	void compute(){
+		cilk_for(int m = 1; m < timeSteps; m++) {
+			evolveTimestep();
+			u[0:N*N] = ui[0:N*N];
+		}
+	}
+};
+</pre>
+==== TBB ====
+<pre>class TBBEvolve {
+	float* u;
+	float* ui;
+	float delta, deltaT;
+	const float diff = 0.5;
+	int N;
+public:
+	TBBEvolve(float* _u, float* _ui, float d, float dt, float n) : u(_u), ui(_ui), delta(d), deltaT(dt), N(n) {}
+	void operator()(tbb::blocked_range2d<int> r) const{
+		for (int row = r.rows().begin(); row < r.rows().end(); row++) {
+			#pragma simd
+			for (int col = r.cols().begin(); col < r.cols().end(); col++) {
+				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
+				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;
+				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
+			}
+		}
+	}
+};
+class TBBDiffuser : public IDiffuser {
+protected:
+	void evolveTimestep(){
+	}
+public:
+	TBBDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
+	void init(){
+		for (int row = 0; row < N; row++) {
+			for (int col = 0; col < N; col++) {
+				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
+					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
+					ui[row * N + col] = 1.0;
+			}
+		}
+	}
+	void compute(){
+		for (int m = 1; m < timeSteps; m++) {
+			tbb::blocked_range2d<int> r(1, N - 1, 1, N - 1);
+			tbb::parallel_for(r, TBBEvolve(u, ui, delta, deltaT, N));
+		}
+	}
+};</pre>
+=== Results ===
+What we found was that all the parallelization methods were all very similar. We also tested this with cuda and found cuda to be the fastest.
+[[Image:GeyIa97.png|640px]]
+[[Image:TP4107j.png|300px]]