Revision as of 11:54, 8 March 2013

Team Carlos

Team Members

Carlos Conejo

Progress

Assignment 1

For my assignment 1, I profiled a Summarized Area Table code. Here is the code:

#include <iostream>

#include <cstdlib>



using namespace std;



/* Creates the Matrice */

void createMatrice(float** a, int size){

	for(int i = 0; i < size; i++)

		a[i] = new float[size];

}



/* Initializes the matrice to any random number between 1 and 9 */

void initializeMatrice(float** a, int size){

	float f = 1.0 / RAND_MAX;

	for(int i = 0; i < size; i++)

		for(int j = 0; j < size; j++)

			a[i][j] = rand() * f;

}



/* Creates the summarized area table */

void summarizedAreaTable(float** a, float** b, int size){

	int k = 0;

	float sum = 0.0;

	for(int i = size-1; i >= 0; i--){

		for(int j = 0; j < size; j++){

			for(int k = i; k < size; k++){

				for(int m = 0; m <= j; m++){

					sum += a[k][m];

				}

			}

			b[i][j] = sum;

			sum = 0.0;

		}

	}

}



int main(int argc, char* argv[]){

	if(argc == 2){ // only one argument (program name + one argument) allowed

		int size = atoi(argv[1]);

		float **a = new float*[size];

		float **b = new float*[size];



		createMatrice(a,size); // creates the matrice a

		createMatrice(b,size); // creates the matrice b

		initializeMatrice(a,size); // initializes the matrices

		summarizedAreaTable(a,b,size); // Does the SAT on a and stores it on b



		cout << "Finished" << endl;

		return 0;

	}

	else if(argc < 2)

		cout << "Please provide a size" << endl; // when no arguments are supplied

	else

		cout << "Only one size is allowed" << endl; // when more than one argument(the program name + one or more arguments) is supplied

}


---- Profiling Results for the summarizedAreaTable() function ------



		Word Problem                          Seconds      

			250                                  1.50

			500                                 25.87

			750                                173.99 

		   1000	                               658.34

   

--------------------------------------------------------------------

Assignment 2


#include <iostream>

#include <cstdlib>

#include <cuda_runtime.h>



using namespace std;



/* Initializes the matrix to any random number between 0 and 1 */

void initializeMatrix(float* a, int size){

	float f = 1.0 / RAND_MAX;

	for(int i = 0; i < size * size; i++)

			a[i] = rand() * f;

}



/* Displays the matrix */

void matrixDisplay(char matrix, float* a, int size){

	int i = 0;

	cout << matrix <<" is: " << endl;

	while(i < size*size){

		for(int j = 0; j < size; j++, i++)

			cout << a[i] << " ";

		cout << endl;

	}

}



/* Creates the Summed area table */

__global__ void SummedAreaTable(float* A, float* B, int size){

	int idx = blockIdx.x * blockDim.x + threadIdx.x;

	float sum = 0;

	int rest;

	

	if(idx < (size*size)){

		if(idx < size)

			rest = idx;

		else{

			rest = idx - size;

			while(rest >= size)

				rest = rest - size;

		}

		for(int i = (size*size) - size + rest; i >= idx - rest; i -= size)

			for(int j = i, k = rest; k >= 0; j--, k--)

				sum += A[j];

		B[idx] = sum;

	}

}



int main(int argc, char* argv[]){

	if(argc == 2){ // only one argument (program name + one argument) allowed

		int size = atoi(argv[1]);

		int tSize = size * size;

		int d;

		int nThreads;

		int mThreads;

		int Blocks;

		int Threads;

		float *a = new float[tSize];

		float *A;

		float *b = new float[tSize];

		float *B;

		cudaError_t error; // error handler

		

		/* Gets the maximum number of threads and blocks */

		cudaDeviceProp prop;

		cudaGetDevice(&d);

		cudaGetDeviceProperties(&prop, d);

		nThreads = prop.maxThreadsDim[0];

		mThreads = nThreads * prop.maxGridSize[0];

		

		/* Checks if the size of the matrix is less than the maximum number of threads */

		if((tSize) < nThreads){

			Blocks = 1;

			Threads = tSize;

		}

		/* Checks if the size of the matrix is greater than the maximum number of threads */

		else if((tSize) > nThreads){

			Blocks = (tSize + nThreads - 1) / nThreads;

			Threads = nThreads;

		}

		/* Checks if the size of the matrix is less than the maximum number of threads multipled by the maximum number of blocks */

		else if((tSize) > mThreads){

			tSize = mThreads;

			Blocks = (tSize + nThreads - 1) / nThreads;

			Threads = nThreads;

		}

		

		dim3 dGrid(Blocks, Blocks, 1); // sets the grids

		dim3 dBlock(Threads, Threads, 1); // sets the blocks

	

		

		initializeMatrix(a,size); // initializes the matrix a

		error = cudaMalloc((void**)&A, tSize * sizeof(float)); // allocates memory on the device for matrix A;

		if (error != cudaSuccess) {

			cout << cudaGetErrorString(error) << endl;

    }

		error = cudaMalloc((void**)&B, tSize * sizeof(float)); // allocates memory on the device for matrix B;

		if (error != cudaSuccess) {

			cout << cudaGetErrorString(error) << endl;

    }

		error = cudaMemcpy(A, a, tSize * sizeof(float), cudaMemcpyHostToDevice); // copies the host matrix a into the device matrix A

		if (error != cudaSuccess) {

			cout << cudaGetErrorString(error) << endl;

    }

		

		/* Performs the SAT on the device on A and stores it on B */

		SummedAreaTable<<<Blocks,Threads>>>(A,B,size); // Does the SAT on a and stores it on b

		cudaDeviceSynchronize(); // synchronizes the host and the device

		error = cudaGetLastError();

		if (error != cudaSuccess) {

			cout << cudaGetErrorString(error) << endl;

			cudaFree(a);

			cudaFree(b);

			delete [] a;

			delete [] b;

			return 3;

	  }

		/* copies device matrix B into host matrix b */

		error = cudaMemcpy(b, B, tSize * sizeof(float), cudaMemcpyDeviceToHost); 

		if (error != cudaSuccess) {

			cout << cudaGetErrorString(error) << endl;

    }

		

		//matrixDisplay('a', a, size); //uncomment/comment to display/not display the matrix a

		//matrixDisplay('b', b, size); //uncomment/comment to display/not display the matrix b

		

		/* deallocates space in both host memory and device memory */

		cudaFree(A);

		cudaFree(B);

		delete [] a;

		delete [] b;

		cudaDeviceReset();

		cout << "Finished" << endl;

		return 0;

	}

	else if(argc < 2){

		cout << "Please provide a size" << endl; // when no arguments are supplied

		return 0;

	}

	else{

		cout << "Only one size is allowed" << endl; // when more than one argument(the program name + one or more arguments) is supplied

		return 0;

	}

}



Size
A1  (CPU)
A2  (GPU)

100
0.03
0.0034

200
0.61
0.0445

300
3.08
0.2124

400
9.66
0.6549

500
24.17
1.58

600
54.4
3.268

700
113.17
5.976

Difference between revisions of "Carlos"

Revision as of 11:54, 8 March 2013

Contents

Team Carlos

Team Members

Progress

Assignment 1

Assignment 2

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Navigation

get involved with CDOT

courses

course projects

links

Tools