Changes

Jump to: navigation, search

DPS915/M-N-M

10,881 bytes added, 16:55, 12 April 2013
Final version's errors, warnings and observations
</pre>
=== Assignment 1 ===
==== Muhammad Ahsan: Prime Number Generator( 1,000,000,000 primes) ====
</pre>
=== Assignment 1 ===
==== Nitin Prakash Panicker: LZW File Compression ====
</pre>
==== Source Code for LZW File Compression====
=== Assignment 2 ===
<h3>==== Source code for prime number generator we will be putting on the gpu</h3>====
<pre>
</pre>
<h3>==== Version of prime generator running on GPU</h3>====
<pre>
</pre>
<h3> ==== Almost Final version ====<pre># include <cmath> // This library enable the use of sqrt.  # include <iostream>  # include <ctime>  #include<iomanip>  #include<cstdlib>  # include <cuda_runtime.h>  //#include <times.h>    using namespace std;    inline clock_t getMilliSecs() {  return clock() / (CLOCKS_PER_SEC / 1000);  }    __global__ void primegen(bool prime, int number2,int x,int *primes_d)  {  int c = 0;    for (int i = 1; i <= x; i++)  {  for ( int j = 2; j <= number2; j++)  {  if ( i!=j && i % j == 0 )  {  prime = false;  break;  }  }  if (prime)  {  primes_d[c]=i;  c += 1;  }  prime = true;  }    }     void primenum(long double); // Prototype...        int main()  {  long double x = 0;  cout<<"\n This program will generate all prime numbers up to the"<<"\n number you have entered below...\n";  cout<<"\n Please enter a number: ";  cin>> x;  cout<<"\n Here are all the prime numbers up to "<<x<<".\n";  primenum(x); //function invocation...  //cout<<endl<<"\nThere are "<<c  //<<" prime numbers less than or equal to "<<x<<".\n\n";  return 0;  }    // This function will determine the primenumbers up to num.  void primenum(long double x)  {  bool prime = true;  //struct tms start_time, stop_time;  int number2;  number2 =(int) floor (sqrt (x));  clock_t start = getMilliSecs();  //Array to hold generated primes on host  int *primes_h = new int[(int)x];  //Device array to hold the primes on the device  int *primes_d = new int[(int)x];  //allocate device memory and initialize device memory  cudaMalloc((void**)&primes_d, (int)x * sizeof(int));  // cudaMalloc((void**)&c_d, sizeof(int));  cudaMemset(&primes_d,0,x * sizeof(int));  //error checking  cudaError_t error ;  //Kernal goes here  primegen<<<1,1>>>(prime,number2,(int)x,primes_d);    // extract error code from the kernel's execution  error = cudaGetLastError();  if (error != cudaSuccess) {  cout << cudaGetErrorString(error) << endl;  }    //copy the array holding primes from device to host  error =cudaMemcpy(primes_h, primes_d, ((int)x) * sizeof(int), cudaMemcpyDeviceToHost);    if (error != cudaSuccess) {  cout << cudaGetErrorString(error) << endl;  }  // cudaMemcpy(c_h, c_d, sizeof(int), cudaMemcpyDeviceToHost);  //display the primes  for(int i=0; i<(int)x ; i++){  if(primes_h[i]>=2 && primes_h[i]<=(int)x){  cout<<primes_h[i]<<endl;  }  }  cout << "Elapsed time: " << (getMilliSecs() - start) << "ms" << endl;  // cout<< "time: "<< (stop_s-start_s)/double(CLOCKS_PER_SEC)<<endl;  //free allocated memory    delete [] primes_h;  cudaFree(primes_d);    getchar();  }</h3pre=== Assignment 3 ======= Cuda Version:First Attempt ====
<pre>
 # include <cmath> // This library enable the use of sqrt.  # include <iostream>  # include <ctime>  #include<iomanip>  #include<cstdlib>  # include <cuda_runtime.h>  //#include <times.h>    using namespace std;    inline clock_t getMilliSecs() {  return clock() / (CLOCKS_PER_SEC / 1000);  }    __global__ void primegen(bool prime, int number2,int x,int *primes_d)  {  int c = 0;  int idx = blockIdx.x * blockDim.x + threadIdx.x;    for ( int i=1; i <= x; i++)  {  if( i!= idx && i%idx == 0 )  {  prime = false;  break;  }    if(prime)  {  primes_d[c]=i;  c += 1;  }  prime = true;  }      }       /*for (int i = 1; i <= x; i++)  {  for ( int j = 2; j <= number2; j++)  {  if ( i!=j && i % j == 0 )  {  prime = false;  break;  }  }  if (prime)  {  primes_d[c]=i;  c += 1;  }  prime = true;     } */        void primenum(long double); // Prototype...        int main()  {  long double x = 0;  cout<<"\n This program will generate all prime numbers up to the"<<"\n number you have entered below...\n";  cout<<"\n Please enter a number: ";  cin>> x;  cout<<"\n Here are all the prime numbers up to "<<x<<".\n";  primenum(x); //function invocation...  //cout<<endl<<"\nThere are "<<c  //<<" prime numbers less than or equal to "<<x<<".\n\n";  return 0;  }    // This function will determine the primenumbers up to num.  void primenum(long double x)  {  int n = x;  int d;  bool prime = true;  //struct tms start_time, stop_time;  int number2;  number2 =(int) floor (sqrt (x));  clock_t start = getMilliSecs();    cudaDeviceProp prop;  cudaGetDevice(&d);  cudaGetDeviceProperties(&prop, d);  int nThreads = prop.maxThreadsDim[0];  int n_max = nThreads * prop.maxGridSize[0];  if ( n> n_max) {  n = n_max;  cout << "n reduced to " << n << endl;  }     //Array to hold generated primes on host  int *primes_h = new int[(int)x];    //Device array to hold the primes on the device  int *primes_d = new int[(int)x];    //allocate device memory and initialize device memory  cudaMalloc((void**)&primes_d, (int)x * sizeof(int));    // cudaMalloc((void**)&c_d, sizeof(int));  cudaMemset(&primes_d,0,x * sizeof(int));    //error checking  cudaError_t error ;    //Kernal goes here  primegen<<<(n + nThreads - 1) / nThreads, nThreads>>>(prime,number2,(int)x,primes_d);    // extract error code from the kernel's execution    error = cudaGetLastError();  if (error != cudaSuccess) {  cout << cudaGetErrorString(error) << endl;  }    //copy the array holding primes from device to host    error =cudaMemcpy(primes_h, primes_d, ((int)x) * sizeof(int), cudaMemcpyDeviceToHost);    if (error != cudaSuccess) {  cout << cudaGetErrorString(error) << endl;  }  // cudaMemcpy(c_h, c_d, sizeof(int), cudaMemcpyDeviceToHost);  //display the primes  for(int i=0; i<(int)x ; i++){  if(primes_h[i]>=2&& primes_h[i]<=(int)x){  cout<<primes_h[i]<<endl;  }  }  cout << "Elapsed time: " << (getMilliSecs() - start) << "ms" << endl;  // cout<< "time: "<< (stop_s-start_s)/double(CLOCKS_PER_SEC)<<endl;  //free allocated memory    delete [] primes_h;  cudaFree(primes_d);    getchar();  } </pre>==== Conclusion: Logical Error ==== [[Image:gpuA3error.png|thumb|widthpx| ]] The prime number generated seems to have run into some logical error. It does not generate the prime numbers correctly. Instead spits out all numbers. ==== Cuda Version: Attempt Two ====Gives a run time error "invalid argument". Logical error still persists. ==== Final Cuda version ====<pre>#include <cstdio>#include <cstdlib>#include <iostream>#include <ctime>#include <cuda_runtime.h> using namespace std; /** * This macro checks return value of the CUDA runtime call and exits * the application if the call failed. */#define CUDA_CHECK_RETURN(value) { \ cudaError_t _m_cudaStat = value; \ if (_m_cudaStat != cudaSuccess) { \ fprintf(stderr, "Error %s at line %d in file %s\n", \ cudaGetErrorString(_m_cudaStat), __LINE__, __FILE__); \ exit(1); \ } } /** * Kernel code to generate and detect primes */__global__ void prime(int *num, int blockNum, int threadNum, int size) { const int tid = blockIdx.x * blockDim.x + threadIdx.x; const int bid = blockIdx.y * blockDim.y + threadIdx.y; __syncthreads();  /** * Generate prime numbers and store them in the array. * The first element is always 2 */ if(tid == 0) { num[tid] = 2; } else { num[tid] = 2 * tid + 1; }  int tmp = bid * threadNum + tid;  int step1 = 2 * tmp + 3; int step2 = tmp + 1;  while(tmp < size) { int i = 1; /** * Check if an element is not prime, if it isn't set it to 0. */ while((step1 * i + step2) < size) { num[step1 * i + step2] = 0; i++; } tmp += blockNum * threadNum; __syncthreads(); }} int main(int argc, char* argv[]) { if(argc != 2) { cout << "Incorrect no of arguments" << endl; return 1; } int n = atoi(argv[1]);  /** * variable declarations */ int *device; int host[n]; int d; cudaDeviceProp prop;  /** * Get the properties of the device in use */ cudaGetDevice(&d); cudaGetDeviceProperties(&prop, d); int numberOfBlocks = 8; int maxThreadsPerBlock = prop.maxThreadsPerBlock; int numberOfThreads = maxThreadsPerBlock/numberOfBlocks;  /** * Start timer */ clock_t cb, ce; cb = clock();  /** * Allocate memory on the device */ CUDA_CHECK_RETURN(cudaMalloc((void**) &device, sizeof(int) * n));  /** * Call kernel with appropriate grid and thread size */ prime<<<numberOfBlocks, numberOfThreads>>>(device, numberOfBlocks, numberOfThreads, n);  /** * Copy results back to host */ CUDA_CHECK_RETURN(cudaMemcpy(&host, device, sizeof(int) * n, cudaMemcpyDeviceToHost));  /** * Free memory on device */ CUDA_CHECK_RETURN(cudaFree(device));  /** * Output values */ for (int i = 0; i < n; i++) if (host[i] != 0) cout << host[i] << endl;  /** * Stop timer */ ce = clock(); cout << "Prime generation - took " << double(ce - cb)/CLOCKS_PER_SEC << " seconds" << endl;}
</pre>
[[Image:manualDelete.png|thumb|200px|Manual Delete Warning]]
===== Final version's errors, warnings and observations =====
* If a number over 515 is entered as the launch argument, the program will display random values at the end of the list of prime numbers
* When attempting to delete the host array manually in the program, a warning is displayed
[[Image:ManualCrash.png|thumb|200px|Manual Delete Crash]]
* The program crashes at the end if the host array is manually deleted
=== Assignment 3 ==Successful run of Prime generation =====[[Image:PrimeSuccessfulRun.png]]

Navigation menu