==== Conclusion: Logical Error ====
[[Image:gpuA3error.png|thumb|widthpx| ]]
The prime number generated seems to have run into some logical error. It does not generate the prime numbers correctly. Instead spits out all numbers.
Gives a run time error "invalid argument". Logical error still persists.
==== Final Cuda version ====
<pre>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <ctime>
#include <cuda_runtime.h>
# include <cmath> // This library enable the use of sqrt.using namespace std;
/** * This macro checks return value of the CUDA runtime call and exits * the application if the call failed. */#define CUDA_CHECK_RETURN(value) { \ cudaError_t _m_cudaStat = value; \ if (_m_cudaStat != cudaSuccess) { \ fprintf(stderr, "Error %s at line %d in file %s\n", \ cudaGetErrorString(_m_cudaStat), __LINE__, __FILE__); \ exit(1); \ # include <iostream>} }
# include <ctime>/** * Kernel code to generate and detect primes */__global__ void prime(int *num, int blockNum, int threadNum, int size) { const int tid = blockIdx.x * blockDim.x + threadIdx.x; const int bid = blockIdx.y * blockDim.y + threadIdx.y; __syncthreads();
#include<iomanip> #include<cstdlib> # include <cuda_runtime.h> //#include <times.h> using namespace std; inline clock_t getMilliSecs() { return clock() / (CLOCKS_PER_SEC / 1000); } __global__ void primegen(bool prime, int number2,int x,int *primes_d,int index) {* int c = 0; int idx = blockIdx.x * blockDim.x + threadIdxGenerate prime numbers and store them in the array.x; * The first element is always 2 *// number2 =(int) floor (sqrt (x)); // cout<< "value of idx" << idx <<endl; /* if( !(x!=idx && x%idx tid == 0 ) ) { primes_d num[ctid] = x2; } else { c+ num[tid] = 2 * tid +1; } */
int tmp = bid * threadNum + tid;
primes_d[idx] int step1 = 666; c+2 * tmp +3; } /*for ( int i step2 = tmp + 1; i <= x; i++) { for ( int j = 2; j <= number2; j++) { if ( i!=j && i % j == 0 ) { prime = false; break; }
while(tmp < size) {
int i = 1;
/**
* Check if an element is not prime, if it isn't set it to 0.
*/
while((step1 * i + step2) < size) {
num[step1 * i + step2] = 0;
i++;
}
tmp += blockNum * threadNum;
__syncthreads();
}
}
if int main(prime) { primes_dint argc, char* argv[c]=i; c += 1; } prime = true; } */ void primenum(long double); // Prototype... { int mainif(argc != 2) { long double x = 0; cout<<"\n This program will generate all prime numbers up to theIncorrect no of arguments"<<"\n number you have entered below...\n"; cout<<"\n Please enter a number: "; cin>> x; cout<<"\n Here are all the prime numbers up to "<<x<<".\n"; primenum(x); //function invocation... //cout<<endl<<"\nThere are "<<c //<<" prime numbers less than or equal to "<<x<<".\n\n"; return 01;
}
int n = atoi(argv[1]);
/** * variable declarations */ int *device; int host[n]; int d; cudaDeviceProp prop;
/** * Get the properties of the device in use */ This function will determine the primenumbers up to num cudaGetDevice(&d); cudaGetDeviceProperties(&prop, d); int numberOfBlocks = 8; int maxThreadsPerBlock = prop.maxThreadsPerBlock; int numberOfThreads = maxThreadsPerBlock/numberOfBlocks;
void primenum/** * Start timer */ clock_t cb, ce; cb = clock(long double x);
{ /** * Allocate memory on the device */ CUDA_CHECK_RETURN(cudaMalloc((void**) &device, sizeof(int) * n));
int /** * Call kernel with appropriate grid and thread size */ prime<<<numberOfBlocks, numberOfThreads>>>(device, numberOfBlocks, numberOfThreads, n = x);
/** * Copy results back to host */ CUDA_CHECK_RETURN(cudaMemcpy(&host, device, sizeof(int d) * n, cudaMemcpyDeviceToHost));
bool prime = true /** * Free memory on device */ CUDA_CHECK_RETURN(cudaFree(device));
/** * Output values */struct tms start_time, stop_time for (int i = 0; i < n; i++) if (host[i] != 0) cout << host[i] << endl;
int number2; /** * Stop timer *// number2 ce =clock(int) floor (sqrt ; cout << "Prime generation - took " << double(x)ce - cb)/CLOCKS_PER_SEC << " seconds" << endl;}</pre> clock_t start [[Image:manualDelete.png|thumb|200px|Manual Delete Warning]]===== Final version's errors, warnings and observations ===== getMilliSecs();* If a number over 515 is entered as the launch argument, the program will display random values at the end of the list of prime numbers* When attempting to delete the host array manually in the program, a warning is displayed[[Image:ManualCrash.png|thumb|200px|Manual Delete Crash]]* The program crashes at the end if the host array is manually deleted
cudaDeviceProp prop; cudaGetDevice(&d); cudaGetDeviceProperties(&prop, d); int nThreads = prop.maxThreadsDim[0]; int n_max = nThreads * prop.maxGridSize[0]; if ( n> n_max) { n = n_max; cout << "n reduced to " << n << endl; } //Array to hold generated primes on host int *primes_h = new int[(int)x]; //Device array to hold the primes on the device int *primes_d = new int[(int)x]; //allocate device memory and initialize device memory cudaMalloc((void**)&primes_d, (int)x * sizeof(int)); // cudaMalloc((void**)&c_d, sizeof(int)); cudaMemset(&primes_d,0,x * sizeof(int)); primes_h[0] = 666; //error checking cudaError_t error ; for(int i=0; i<(int)x ; i++){ //if(primes_h[i]>=2 && primes_h[i]<=(int)x){ cout<<"BEFORE:"<<primes_h[i]<<endl; //} } //Kernal goes here //for ( int i=2; i<= x; i++) //{ int i=10; number2 =(int) floor (sqrt ((long double)i)); // primegen<<<1, x>>>(prime,number2,(int)i,primes_d,666); // primegen<<<1, x>>>(prime,number2,(int)i,primes_d,666); cudaDeviceSynchronize(); //} // extract error code from the kernel's execution error Successful run of Prime generation = cudaGetLastError(); if (error != cudaSuccess) { cout << cudaGetErrorString(error) << endl; } //copy the array holding primes from device to host error =cudaMemcpy(primes_h, primes_d, ((int)x) * sizeof(int), cudaMemcpyDeviceToHost); if (error != cudaSuccess) { cout << cudaGetErrorString(error) << endl; } // cudaMemcpy(c_h, c_d, sizeof(int), cudaMemcpyDeviceToHost); //display the primes for(int i=0; i<(int)x ; i++){ //if(primes_h[i]>=2 && primes_h[i]<=(int)x){ cout<<primes_h[iImage:PrimeSuccessfulRun.png]<<endl; //} } cout << "Elapsed time: " << (getMilliSecs() - start) << "ms" << endl; // cout<< "time: "<< (stop_s-start_s)/double(CLOCKS_PER_SEC)<<endl; //free allocated memory delete [] primes_h; cudaFree(primes_d); getchar(); } </pre>