Changes

Jump to: navigation, search

DPS915/M-N-M

520 bytes removed, 16:55, 12 April 2013
Final version's errors, warnings and observations
==== Conclusion: Logical Error ====
[[Image:gpuA3error.png|thumb|widthpx| ]]
The prime number generated seems to have run into some logical error. It does not generate the prime numbers correctly. Instead spits out all numbers.
Gives a run time error "invalid argument". Logical error still persists.
==== Final Cuda version ====
<pre>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <ctime>
#include <cuda_runtime.h>
# include <cmath> // This library enable the use of sqrt.using namespace std;
/** * This macro checks return value of the CUDA runtime call and exits * the application if the call failed. */#define CUDA_CHECK_RETURN(value) { \ cudaError_t _m_cudaStat = value; \ if (_m_cudaStat != cudaSuccess) { \ fprintf(stderr, "Error %s at line %d in file %s\n", \ cudaGetErrorString(_m_cudaStat), __LINE__, __FILE__); \ exit(1); \ # include <iostream>} }
# include <ctime>/** * Kernel code to generate and detect primes */__global__ void prime(int *num, int blockNum, int threadNum, int size) { const int tid = blockIdx.x * blockDim.x + threadIdx.x; const int bid = blockIdx.y * blockDim.y + threadIdx.y; __syncthreads();
#include<iomanip>  #include<cstdlib>  # include <cuda_runtime.h>  //#include <times.h>    using namespace std;    inline clock_t getMilliSecs() {  return clock() / (CLOCKS_PER_SEC / 1000);  }    __global__ void primegen(bool prime, int number2,int x,int *primes_d,int index)  {* int c = 0;  int idx = blockIdx.x * blockDim.x + threadIdxGenerate prime numbers and store them in the array.x; * The first element is always 2 *// number2 =(int) floor (sqrt (x));  // cout<< "value of idx" << idx <<endl;  /* if( !(x!=idx && x%idx tid == 0 ) )  primes_d num[ctid] = x2; } else { c+ num[tid] = 2 * tid +1   } */
int tmp = bid * threadNum + tid;
primes_d[idx] int step1 = 666;  c+2 * tmp +3       }       /*for ( int i step2 = tmp + 1; i <= x; i++)  {  for ( int j = 2; j <= number2; j++)  {  if ( i!=j && i % j == 0 )  {  prime = false;  break;  }
while(tmp < size) {
int i = 1;
/**
* Check if an element is not prime, if it isn't set it to 0.
*/
while((step1 * i + step2) < size) {
num[step1 * i + step2] = 0;
i++;
}
tmp += blockNum * threadNum;
__syncthreads();
}
}
if int main(prime)  {  primes_dint argc, char* argv[c]=i;  c += 1;  }  prime = true;     } */        void primenum(long double); // Prototype...      { int mainif(argc != 2 long double x = 0;  cout<<"\n This program will generate all prime numbers up to theIncorrect no of arguments"<<"\n number you have entered below...\n";  cout<<"\n Please enter a number: ";  cin>> x;  cout<<"\n Here are all the prime numbers up to "<<x<<".\n";  primenum(x); //function invocation...  //cout<<endl<<"\nThere are "<<c  //<<" prime numbers less than or equal to "<<x<<".\n\n";  return 01
}
int n = atoi(argv[1]);
/** * variable declarations */ int *device; int host[n]; int d; cudaDeviceProp prop;
/** * Get the properties of the device in use */ This function will determine the primenumbers up to num cudaGetDevice(&d); cudaGetDeviceProperties(&prop, d); int numberOfBlocks = 8; int maxThreadsPerBlock = prop.maxThreadsPerBlock; int numberOfThreads = maxThreadsPerBlock/numberOfBlocks;
void primenum/** * Start timer */ clock_t cb, ce; cb = clock(long double x);
{ /** * Allocate memory on the device */ CUDA_CHECK_RETURN(cudaMalloc((void**) &device, sizeof(int) * n));
int /** * Call kernel with appropriate grid and thread size */ prime<<<numberOfBlocks, numberOfThreads>>>(device, numberOfBlocks, numberOfThreads, n = x);
/** * Copy results back to host */ CUDA_CHECK_RETURN(cudaMemcpy(&host, device, sizeof(int d) * n, cudaMemcpyDeviceToHost));
bool prime = true /** * Free memory on device */ CUDA_CHECK_RETURN(cudaFree(device));
/** * Output values */struct tms start_time, stop_time for (int i = 0; i < n; i++) if (host[i] != 0) cout << host[i] << endl;
int number2; /** * Stop timer *// number2 ce =clock(int) floor (sqrt ; cout << "Prime generation - took " << double(x)ce - cb)/CLOCKS_PER_SEC << " seconds" << endl;}</pre> clock_t start [[Image:manualDelete.png|thumb|200px|Manual Delete Warning]]===== Final version's errors, warnings and observations ===== getMilliSecs();* If a number over 515 is entered as the launch argument, the program will display random values at the end of the list of prime numbers* When attempting to delete the host array manually in the program, a warning is displayed[[Image:ManualCrash.png|thumb|200px|Manual Delete Crash]]* The program crashes at the end if the host array is manually deleted
  cudaDeviceProp prop;  cudaGetDevice(&d);  cudaGetDeviceProperties(&prop, d);  int nThreads = prop.maxThreadsDim[0];  int n_max = nThreads * prop.maxGridSize[0];  if ( n> n_max) {  n = n_max;  cout << "n reduced to " << n << endl;  }     //Array to hold generated primes on host  int *primes_h = new int[(int)x];    //Device array to hold the primes on the device  int *primes_d = new int[(int)x];    //allocate device memory and initialize device memory  cudaMalloc((void**)&primes_d, (int)x * sizeof(int));    // cudaMalloc((void**)&c_d, sizeof(int));  cudaMemset(&primes_d,0,x * sizeof(int));    primes_h[0] = 666;  //error checking  cudaError_t error ;  for(int i=0; i<(int)x ; i++){  //if(primes_h[i]>=2 && primes_h[i]<=(int)x){    cout<<"BEFORE:"<<primes_h[i]<<endl;  //}  }  //Kernal goes here  //for ( int i=2; i<= x; i++)  //{  int i=10;  number2 =(int) floor (sqrt ((long double)i));  // primegen<<<1, x>>>(prime,number2,(int)i,primes_d,666);  // primegen<<<1, x>>>(prime,number2,(int)i,primes_d,666);  cudaDeviceSynchronize();  //}  // extract error code from the kernel's execution    error Successful run of Prime generation = cudaGetLastError();  if (error != cudaSuccess) {  cout << cudaGetErrorString(error) << endl;  }    //copy the array holding primes from device to host    error =cudaMemcpy(primes_h, primes_d, ((int)x) * sizeof(int), cudaMemcpyDeviceToHost);    if (error != cudaSuccess) {  cout << cudaGetErrorString(error) << endl;  }  // cudaMemcpy(c_h, c_d, sizeof(int), cudaMemcpyDeviceToHost);  //display the primes  for(int i=0; i<(int)x ; i++){  //if(primes_h[i]>=2 && primes_h[i]<=(int)x){  cout<<primes_h[iImage:PrimeSuccessfulRun.png]<<endl;  //}  }  cout << "Elapsed time: " << (getMilliSecs() - start) << "ms" << endl;  // cout<< "time: "<< (stop_s-start_s)/double(CLOCKS_PER_SEC)<<endl;  //free allocated memory    delete [] primes_h;  cudaFree(primes_d);    getchar();  } </pre>

Navigation menu