Difference between revisions of "Team failure"

From CDOT Wiki
Jump to: navigation, search
(Team Members)
(Team Members)
 
(4 intermediate revisions by the same user not shown)
Line 4: Line 4:
 
# [mailto:jmiannandrea@senecacollege.ca?subject=gpu John Iannandrea], TBB Heat diffusion
 
# [mailto:jmiannandrea@senecacollege.ca?subject=gpu John Iannandrea], TBB Heat diffusion
 
# [mailto:@senecacollege.ca?subject=gpu Colin Campbell], OpenMP Heat diffusion
 
# [mailto:@senecacollege.ca?subject=gpu Colin Campbell], OpenMP Heat diffusion
# [mailto:@senecacollege.ca?subject=gpu Mateya Lucic], Cilk Plus Heat diffusion
+
# [mailto:mlucic3@senecacollege.ca?subject=gpu Mateya Lucic], Cilk Plus Heat diffusion
 
[mailto:jmiannandrea@senecacollege.ca,mlucic3@senecacollege.ca?subject=GPU Email All]
 
[mailto:jmiannandrea@senecacollege.ca,mlucic3@senecacollege.ca?subject=GPU Email All]
  
== Progress ==
+
== Assignment ==
=== Assignment 1 ===
+
Our assignment was to implement OMP, TBB, and Cilk Plus versions of a 2d diffusion algorithm.
=== Assignment 2 ===
+
 
=== Assignment 3 ===
+
=== Serial ===
 +
This is the serial version of the code we have parallelized
 +
 
 +
 
 +
<pre>class SerialDiffuser : public IDiffuser {
 +
protected:
 +
void evolveTimestep(){
 +
for (int row = 1; row < N - 1; row++) {
 +
for (int col = 1; col < N - 1; col++) {
 +
float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
 +
float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;
 +
 
 +
u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
 +
}
 +
}
 +
}
 +
public:
 +
SerialDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
 +
void init(){
 +
for (int row = 0; row < N; row++) {
 +
for (int col = 0; col < N; col++) {
 +
if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
 +
& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
 +
ui[row * N + col] = 1.0;
 +
}
 +
}
 +
}
 +
void compute(){
 +
for (int m = 1; m < timeSteps; m++) {
 +
evolveTimestep();
 +
std::copy(u, u + N * N, ui);
 +
}
 +
}
 +
};</pre>
 +
 
 +
==== Omp ====
 +
 
 +
 
 +
<pre>class OMPDiffuser : public IDiffuser {
 +
protected:
 +
void evolveTimestep(){
 +
#pragma omp parallel for
 +
for (int row = 1; row < N - 1; row++) {
 +
for (int col = 1; col < N - 1; col++) {
 +
float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
 +
float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;
 +
 
 +
u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
 +
}
 +
}
 +
}
 +
public:
 +
OMPDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
 +
void init(){
 +
#pragma omp parallel for
 +
for (int row = 0; row < N; row++) {
 +
for (int col = 0; col < N; col++) {
 +
if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
 +
& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
 +
ui[row * N + col] = 1.0;
 +
}
 +
}
 +
}
 +
void compute(){
 +
for (int m = 1; m < timeSteps; m++) {
 +
evolveTimestep();
 +
std::copy(u, u + N * N, ui);
 +
}
 +
}
 +
};</pre>
 +
 
 +
==== Cilk ====
 +
 
 +
<pre>
 +
class CilkDiffuser : public IDiffuser {
 +
protected:
 +
void evolveTimestep(){
 +
cilk_for(int row = 1; row < N - 1; row++) {
 +
for (int col = 1; col < N - 1; col++) {
 +
float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
 +
float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;
 +
 
 +
u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
 +
}
 +
}
 +
}
 +
public:
 +
CilkDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
 +
void init(){
 +
cilk_for(int row = 0; row < N; row++) {
 +
for (int col = 0; col < N; col++) {
 +
if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
 +
& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
 +
ui[row * N + col] = 1.0;
 +
}
 +
}
 +
}
 +
void compute(){
 +
cilk_for(int m = 1; m < timeSteps; m++) {
 +
evolveTimestep();
 +
u[0:N*N] = ui[0:N*N];
 +
}
 +
}
 +
};
 +
</pre>
 +
 
 +
==== TBB ====
 +
 
 +
 
 +
<pre>class TBBEvolve {
 +
float* u;
 +
float* ui;
 +
float delta, deltaT;
 +
const float diff = 0.5;
 +
int N;
 +
public:
 +
TBBEvolve(float* _u, float* _ui, float d, float dt, float n) : u(_u), ui(_ui), delta(d), deltaT(dt), N(n) {}
 +
void operator()(tbb::blocked_range2d<int> r) const{
 +
for (int row = r.rows().begin(); row < r.rows().end(); row++) {
 +
#pragma simd
 +
for (int col = r.cols().begin(); col < r.cols().end(); col++) {
 +
float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
 +
float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;
 +
 
 +
u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
 +
}
 +
}
 +
}
 +
};
 +
 
 +
class TBBDiffuser : public IDiffuser {
 +
protected:
 +
void evolveTimestep(){
 +
 
 +
}
 +
public:
 +
TBBDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
 +
void init(){
 +
for (int row = 0; row < N; row++) {
 +
for (int col = 0; col < N; col++) {
 +
if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
 +
& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
 +
ui[row * N + col] = 1.0;
 +
}
 +
}
 +
}
 +
void compute(){
 +
for (int m = 1; m < timeSteps; m++) {
 +
tbb::blocked_range2d<int> r(1, N - 1, 1, N - 1);
 +
tbb::parallel_for(r, TBBEvolve(u, ui, delta, deltaT, N));
 +
}
 +
}
 +
};</pre>
 +
 
 +
 
 +
=== Results ===
 +
 
 +
What we found was that all the parallelization methods were all very similar. We also tested this with cuda and found cuda to be the fastest.
 +
 
 +
[[Image:GeyIa97.png|640px]]
 +
 
 +
[[Image:TP4107j.png|300px]]

Latest revision as of 21:20, 14 April 2016


GPU621/DPS921 | Participants | Groups and Projects | Resources | Glossary

Team Failure

Team Members

  1. John Iannandrea, TBB Heat diffusion
  2. Colin Campbell, OpenMP Heat diffusion
  3. Mateya Lucic, Cilk Plus Heat diffusion

Email All

Assignment

Our assignment was to implement OMP, TBB, and Cilk Plus versions of a 2d diffusion algorithm.

Serial

This is the serial version of the code we have parallelized


class SerialDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){
		for (int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
public:
	SerialDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			evolveTimestep();
			std::copy(u, u + N * N, ui);
		}
	}
};

Omp

class OMPDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){
		#pragma omp parallel for
		for (int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
public:
	OMPDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		#pragma omp parallel for
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			evolveTimestep();
			std::copy(u, u + N * N, ui);
		}
	}
};

Cilk

class CilkDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){
		cilk_for(int row = 1; row < N - 1; row++) {
			for (int col = 1; col < N - 1; col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
public:
	CilkDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		cilk_for(int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		cilk_for(int m = 1; m < timeSteps; m++) {
			evolveTimestep();
			u[0:N*N] = ui[0:N*N];
		}
	}
};

TBB

class TBBEvolve {
	float* u;
	float* ui;
	float delta, deltaT;
	const float diff = 0.5;
	int N;
public:
	TBBEvolve(float* _u, float* _ui, float d, float dt, float n) : u(_u), ui(_ui), delta(d), deltaT(dt), N(n) {}
	void operator()(tbb::blocked_range2d<int> r) const{
		for (int row = r.rows().begin(); row < r.rows().end(); row++) {
			#pragma simd
			for (int col = r.cols().begin(); col < r.cols().end(); col++) {
				float uxx = (ui[(row + 1) * N + col] - (2 * ui[row * N + col]) + ui[(row - 1) * N + col]) / delta;
				float uyy = (ui[row * N + (col + 1)] - (2 * ui[row * N + col]) + ui[row * N + (col - 1)]) / delta;

				u[row * N + col] = ui[row * N + col] + deltaT * diff * (uxx + uyy);
			}
		}
	}
};

class TBBDiffuser : public IDiffuser {
protected:
	void evolveTimestep(){

	}
public:
	TBBDiffuser(int _N, int _T) : IDiffuser(_N, _T) {}
	void init(){
		for (int row = 0; row < N; row++) {
			for (int col = 0; col < N; col++) {
				if ((pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) <= 0.1)
					& (pow(row * dir - 0.5, 2) + pow(col * dir - 0.5, 2) >= 0.05))
					ui[row * N + col] = 1.0;
			}
		}
	}
	void compute(){
		for (int m = 1; m < timeSteps; m++) {
			tbb::blocked_range2d<int> r(1, N - 1, 1, N - 1);
			tbb::parallel_for(r, TBBEvolve(u, ui, delta, deltaT, N));
		}
	}
};


Results

What we found was that all the parallelization methods were all very similar. We also tested this with cuda and found cuda to be the fastest.

GeyIa97.png

TP4107j.png