Changes

Jump to: navigation, search

DPS915/M-N-M

7,798 bytes added, 16:55, 12 April 2013
Final version's errors, warnings and observations
</pre>
=== Assignment 1 ===
==== Muhammad Ahsan: Prime Number Generator( 1,000,000,000 primes) ====
</pre>
=== Assignment 1 ===
==== Nitin Prakash Panicker: LZW File Compression ====
</pre>
==== Source Code for LZW File Compression====
=== Assignment 2 ===
<h3>==== Source code for prime number generator we will be putting on the gpu</h3>====
<pre>
</pre>
<h3>==== Version of prime generator running on GPU</h3>====
<pre>
</pre>
<h3> ==== Almost Final version </h3>====
<pre>
# include <cmath> // This library enable the use of sqrt.
=== Assignment 3 ===
==== Cuda Version:First Attempt ====
 
<pre>
 
# include <cmath> // This library enable the use of sqrt.
 
# include <iostream>
 
# include <ctime>
 
#include<iomanip>
 
#include<cstdlib>
 
# include <cuda_runtime.h>
 
//#include <times.h>
 
 
using namespace std;
 
 
inline clock_t getMilliSecs() {
 
return clock() / (CLOCKS_PER_SEC / 1000);
 
}
 
 
 
__global__ void primegen(bool prime, int number2,int x,int *primes_d)
 
{
 
int c = 0;
 
int idx = blockIdx.x * blockDim.x + threadIdx.x;
 
 
for ( int i=1; i <= x; i++)
 
{
 
if( i!= idx && i%idx == 0 )
 
{
 
prime = false;
 
break;
 
}
 
 
if(prime)
 
{
 
primes_d[c]=i;
 
c += 1;
 
}
 
prime = true;
 
}
 
 
 
}
 
 
 
 
/*for (int i = 1; i <= x; i++)
 
{
 
for ( int j = 2; j <= number2; j++)
 
{
 
if ( i!=j && i % j == 0 )
 
{
 
prime = false;
 
break;
 
}
 
}
 
if (prime)
 
{
 
primes_d[c]=i;
 
c += 1;
 
}
 
prime = true;
 
 
} */
 
 
 
 
 
void primenum(long double); // Prototype...
 
 
 
 
int main()
 
{
 
long double x = 0;
 
cout<<"\n This program will generate all prime numbers up to the"<<"\n number you have entered below...\n";
 
cout<<"\n Please enter a number: ";
 
cin>> x;
 
cout<<"\n Here are all the prime numbers up to "<<x<<".\n";
 
primenum(x); //function invocation...
 
//cout<<endl<<"\nThere are "<<c
 
//<<" prime numbers less than or equal to "<<x<<".\n\n";
 
return 0;
 
}
 
 
// This function will determine the primenumbers up to num.
 
void primenum(long double x)
 
{
 
int n = x;
 
int d;
 
bool prime = true;
 
//struct tms start_time, stop_time;
 
int number2;
 
number2 =(int) floor (sqrt (x));
 
clock_t start = getMilliSecs();
 
 
cudaDeviceProp prop;
 
cudaGetDevice(&d);
 
cudaGetDeviceProperties(&prop, d);
 
int nThreads = prop.maxThreadsDim[0];
 
int n_max = nThreads * prop.maxGridSize[0];
 
if ( n> n_max) {
 
n = n_max;
 
cout << "n reduced to " << n << endl;
 
}
 
 
//Array to hold generated primes on host
 
int *primes_h = new int[(int)x];
 
 
//Device array to hold the primes on the device
 
int *primes_d = new int[(int)x];
 
 
//allocate device memory and initialize device memory
 
cudaMalloc((void**)&primes_d, (int)x * sizeof(int));
 
 
// cudaMalloc((void**)&c_d, sizeof(int));
 
cudaMemset(&primes_d,0,x * sizeof(int));
 
 
//error checking
 
cudaError_t error ;
 
 
//Kernal goes here
 
primegen<<<(n + nThreads - 1) / nThreads, nThreads>>>(prime,number2,(int)x,primes_d);
 
 
// extract error code from the kernel's execution
 
 
error = cudaGetLastError();
 
if (error != cudaSuccess) {
 
cout << cudaGetErrorString(error) << endl;
 
}
 
 
//copy the array holding primes from device to host
 
 
error =cudaMemcpy(primes_h, primes_d, ((int)x) * sizeof(int), cudaMemcpyDeviceToHost);
 
 
if (error != cudaSuccess) {
 
cout << cudaGetErrorString(error) << endl;
 
}
 
// cudaMemcpy(c_h, c_d, sizeof(int), cudaMemcpyDeviceToHost);
 
//display the primes
 
for(int i=0; i<(int)x ; i++){
 
if(primes_h[i]>=2 && primes_h[i]<=(int)x){
 
cout<<primes_h[i]<<endl;
 
}
 
}
 
cout << "Elapsed time: " << (getMilliSecs() - start) << "ms" << endl;
 
// cout<< "time: "<< (stop_s-start_s)/double(CLOCKS_PER_SEC)<<endl;
 
//free allocated memory
 
 
delete [] primes_h;
 
cudaFree(primes_d);
 
 
getchar();
 
}
 
</pre>
==== Conclusion: Logical Error ====
 
[[Image:gpuA3error.png|thumb|widthpx| ]]
 
The prime number generated seems to have run into some logical error. It does not generate the prime numbers correctly. Instead spits out all numbers.
 
==== Cuda Version: Attempt Two ====
Gives a run time error "invalid argument". Logical error still persists.
 
==== Final Cuda version ====
<pre>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <ctime>
#include <cuda_runtime.h>
 
using namespace std;
 
/**
* This macro checks return value of the CUDA runtime call and exits
* the application if the call failed.
*/
#define CUDA_CHECK_RETURN(value) { \
cudaError_t _m_cudaStat = value; \
if (_m_cudaStat != cudaSuccess) { \
fprintf(stderr, "Error %s at line %d in file %s\n", \
cudaGetErrorString(_m_cudaStat), __LINE__, __FILE__); \
exit(1); \
} }
 
/**
* Kernel code to generate and detect primes
*/
__global__ void prime(int *num, int blockNum, int threadNum, int size) {
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
const int bid = blockIdx.y * blockDim.y + threadIdx.y;
__syncthreads();
 
/**
* Generate prime numbers and store them in the array.
* The first element is always 2
*/
if(tid == 0) {
num[tid] = 2;
} else {
num[tid] = 2 * tid + 1;
}
 
int tmp = bid * threadNum + tid;
 
int step1 = 2 * tmp + 3;
int step2 = tmp + 1;
 
while(tmp < size) {
int i = 1;
/**
* Check if an element is not prime, if it isn't set it to 0.
*/
while((step1 * i + step2) < size) {
num[step1 * i + step2] = 0;
i++;
}
tmp += blockNum * threadNum;
__syncthreads();
}
}
 
int main(int argc, char* argv[]) {
if(argc != 2) {
cout << "Incorrect no of arguments" << endl;
return 1;
}
int n = atoi(argv[1]);
 
/**
* variable declarations
*/
int *device;
int host[n];
int d;
cudaDeviceProp prop;
 
/**
* Get the properties of the device in use
*/
cudaGetDevice(&d);
cudaGetDeviceProperties(&prop, d);
int numberOfBlocks = 8;
int maxThreadsPerBlock = prop.maxThreadsPerBlock;
int numberOfThreads = maxThreadsPerBlock/numberOfBlocks;
 
/**
* Start timer
*/
clock_t cb, ce;
cb = clock();
 
/**
* Allocate memory on the device
*/
CUDA_CHECK_RETURN(cudaMalloc((void**) &device, sizeof(int) * n));
 
/**
* Call kernel with appropriate grid and thread size
*/
prime<<<numberOfBlocks, numberOfThreads>>>(device, numberOfBlocks, numberOfThreads, n);
 
/**
* Copy results back to host
*/
CUDA_CHECK_RETURN(cudaMemcpy(&host, device, sizeof(int) * n, cudaMemcpyDeviceToHost));
 
/**
* Free memory on device
*/
CUDA_CHECK_RETURN(cudaFree(device));
 
/**
* Output values
*/
for (int i = 0; i < n; i++)
if (host[i] != 0)
cout << host[i] << endl;
 
/**
* Stop timer
*/
ce = clock();
cout << "Prime generation - took " << double(ce - cb)/CLOCKS_PER_SEC << " seconds" << endl;
}
</pre>
[[Image:manualDelete.png|thumb|200px|Manual Delete Warning]]
===== Final version's errors, warnings and observations =====
* If a number over 515 is entered as the launch argument, the program will display random values at the end of the list of prime numbers
* When attempting to delete the host array manually in the program, a warning is displayed
[[Image:ManualCrash.png|thumb|200px|Manual Delete Crash]]
* The program crashes at the end if the host array is manually deleted
 
===== Successful run of Prime generation =====
[[Image:PrimeSuccessfulRun.png]]

Navigation menu