Changes

← Older edit

DPS915/M-N-M

7,798 bytes added, 16:55, 12 April 2013

→‎Final version's errors, warnings and observations

</pre>

~~=== Assignment 1 ===~~

==== Muhammad Ahsan: Prime Number Generator( 1,000,000,000 primes) ====

</pre>

~~=== Assignment 1 ===~~

==== Nitin Prakash Panicker: LZW File Compression ====

</pre>

==== Source Code for LZW File Compression====

=== Assignment 2 ===

~~<h3>~~==== Source code for prime number generator we will be putting on the gpu~~</h3>~~====

<pre>

</pre>

~~<h3>~~==== Version of prime generator running on GPU~~</h3>~~====

<pre>

</pre>

~~<h3>~~ ==== Almost Final version ~~</h3>~~====

<pre>

# include <cmath> // This library enable the use of sqrt.

=== Assignment 3 ===

==== Cuda Version:First Attempt ====

<pre>

# include <cmath> // This library enable the use of sqrt.

# include <iostream>

# include <ctime>

#include<iomanip>

#include<cstdlib>

# include <cuda_runtime.h>

//#include <times.h>

using namespace std;

inline clock_t getMilliSecs() {

return clock() / (CLOCKS_PER_SEC / 1000);

}

__global__ void primegen(bool prime, int number2,int x,int *primes_d)

{

int c = 0;

int idx = blockIdx.x * blockDim.x + threadIdx.x;

for ( int i=1; i <= x; i++)

{

if( i!= idx && i%idx == 0 )

{

prime = false;

break;

}

if(prime)

{

primes_d[c]=i;

c += 1;

}

prime = true;

}

/*for (int i = 1; i <= x; i++)

{

for ( int j = 2; j <= number2; j++)

{

if ( i!=j && i % j == 0 )

{

prime = false;

break;

}

if (prime)

{

primes_d[c]=i;

c += 1;

}

prime = true;

} */

void primenum(long double); // Prototype...

int main()

{

long double x = 0;

cout<<"\n This program will generate all prime numbers up to the"<<"\n number you have entered below...\n";

cout<<"\n Please enter a number: ";

cin>> x;

cout<<"\n Here are all the prime numbers up to "<<x<<".\n";

primenum(x); //function invocation...

//cout<<endl<<"\nThere are "<<c

//<<" prime numbers less than or equal to "<<x<<".\n\n";

return 0;

}

// This function will determine the primenumbers up to num.

void primenum(long double x)

{

int n = x;

int d;

bool prime = true;

//struct tms start_time, stop_time;

int number2;

number2 =(int) floor (sqrt (x));

clock_t start = getMilliSecs();

cudaDeviceProp prop;

cudaGetDevice(&d);

cudaGetDeviceProperties(&prop, d);

int nThreads = prop.maxThreadsDim[0];

int n_max = nThreads * prop.maxGridSize[0];

if ( n> n_max) {

n = n_max;

cout << "n reduced to " << n << endl;

}

//Array to hold generated primes on host

int *primes_h = new int[(int)x];

//Device array to hold the primes on the device

int *primes_d = new int[(int)x];

//allocate device memory and initialize device memory

cudaMalloc((void**)&primes_d, (int)x * sizeof(int));

// cudaMalloc((void**)&c_d, sizeof(int));

cudaMemset(&primes_d,0,x * sizeof(int));

//error checking

cudaError_t error ;

//Kernal goes here

primegen<<<(n + nThreads - 1) / nThreads, nThreads>>>(prime,number2,(int)x,primes_d);

// extract error code from the kernel's execution

error = cudaGetLastError();

if (error != cudaSuccess) {

cout << cudaGetErrorString(error) << endl;

}

//copy the array holding primes from device to host

error =cudaMemcpy(primes_h, primes_d, ((int)x) * sizeof(int), cudaMemcpyDeviceToHost);

if (error != cudaSuccess) {

cout << cudaGetErrorString(error) << endl;

}

// cudaMemcpy(c_h, c_d, sizeof(int), cudaMemcpyDeviceToHost);

//display the primes

for(int i=0; i<(int)x ; i++){

if(primes_h[i]>=2 && primes_h[i]<=(int)x){

cout<<primes_h[i]<<endl;

}

cout << "Elapsed time: " << (getMilliSecs() - start) << "ms" << endl;

// cout<< "time: "<< (stop_s-start_s)/double(CLOCKS_PER_SEC)<<endl;

//free allocated memory

delete [] primes_h;

cudaFree(primes_d);

getchar();

}

</pre>

==== Conclusion: Logical Error ====

[[Image:gpuA3error.png|thumb|widthpx| ]]

The prime number generated seems to have run into some logical error. It does not generate the prime numbers correctly. Instead spits out all numbers.

==== Cuda Version: Attempt Two ====

Gives a run time error "invalid argument". Logical error still persists.

==== Final Cuda version ====

<pre>

#include <cstdio>

#include <cstdlib>

#include <iostream>

#include <ctime>

#include <cuda_runtime.h>

using namespace std;

/**

* This macro checks return value of the CUDA runtime call and exits

* the application if the call failed.

*/

#define CUDA_CHECK_RETURN(value) { \

cudaError_t _m_cudaStat = value; \

if (_m_cudaStat != cudaSuccess) { \

fprintf(stderr, "Error %s at line %d in file %s\n", \

cudaGetErrorString(_m_cudaStat), __LINE__, __FILE__); \

exit(1); \

} }

/**

* Kernel code to generate and detect primes

*/

__global__ void prime(int *num, int blockNum, int threadNum, int size) {

const int tid = blockIdx.x * blockDim.x + threadIdx.x;

const int bid = blockIdx.y * blockDim.y + threadIdx.y;

__syncthreads();

/**

* Generate prime numbers and store them in the array.

* The first element is always 2

*/

if(tid == 0) {

num[tid] = 2;

} else {

num[tid] = 2 * tid + 1;

}

int tmp = bid * threadNum + tid;

int step1 = 2 * tmp + 3;

int step2 = tmp + 1;

while(tmp < size) {

int i = 1;

/**

* Check if an element is not prime, if it isn't set it to 0.

*/

while((step1 * i + step2) < size) {

num[step1 * i + step2] = 0;

i++;

}

tmp += blockNum * threadNum;

__syncthreads();

}

int main(int argc, char* argv[]) {

if(argc != 2) {

cout << "Incorrect no of arguments" << endl;

return 1;

}

int n = atoi(argv[1]);

/**

* variable declarations

*/

int *device;

int host[n];

int d;

cudaDeviceProp prop;

/**

* Get the properties of the device in use

*/

cudaGetDevice(&d);

cudaGetDeviceProperties(&prop, d);

int numberOfBlocks = 8;

int maxThreadsPerBlock = prop.maxThreadsPerBlock;

int numberOfThreads = maxThreadsPerBlock/numberOfBlocks;

/**

* Start timer

*/

clock_t cb, ce;

cb = clock();

/**

* Allocate memory on the device

*/

CUDA_CHECK_RETURN(cudaMalloc((void**) &device, sizeof(int) * n));

/**

* Call kernel with appropriate grid and thread size

*/

prime<<<numberOfBlocks, numberOfThreads>>>(device, numberOfBlocks, numberOfThreads, n);

/**

* Copy results back to host

*/

CUDA_CHECK_RETURN(cudaMemcpy(&host, device, sizeof(int) * n, cudaMemcpyDeviceToHost));

/**

* Free memory on device

*/

CUDA_CHECK_RETURN(cudaFree(device));

/**

* Output values

*/

for (int i = 0; i < n; i++)

if (host[i] != 0)

cout << host[i] << endl;

/**

* Stop timer

*/

ce = clock();

cout << "Prime generation - took " << double(ce - cb)/CLOCKS_PER_SEC << " seconds" << endl;

}

</pre>

[[Image:manualDelete.png|thumb|200px|Manual Delete Warning]]

===== Final version's errors, warnings and observations =====

* If a number over 515 is entered as the launch argument, the program will display random values at the end of the list of prime numbers

* When attempting to delete the host array manually in the program, a warning is displayed

[[Image:ManualCrash.png|thumb|200px|Manual Delete Crash]]

* The program crashes at the end if the host array is manually deleted

===== Successful run of Prime generation =====

[[Image:PrimeSuccessfulRun.png]]

Mohamed Baig

1

edit

Changes

DPS915/M-N-M

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Navigation

get involved with CDOT

courses

course projects

links

Tools