Changes

← Older edit

DPS915/M-N-M

520 bytes removed, 16:55, 12 April 2013

→‎Final version's errors, warnings and observations

==== Conclusion: Logical Error ====

[[Image:gpuA3error.png|thumb|widthpx| ]]

The prime number generated seems to have run into some logical error. It does not generate the prime numbers correctly. Instead spits out all numbers.

Gives a run time error "invalid argument". Logical error still persists.

==== Final Cuda version ====

<pre>

#include <cstdio>

#include <cstdlib>

#include <iostream>

#include <ctime>

#include <cuda_runtime.h>

~~# include <cmath> // This library enable the use of sqrt.~~using namespace std;

/** * This macro checks return value of the CUDA runtime call and exits * the application if the call failed. */#define CUDA_CHECK_RETURN(value) { \ cudaError_t _m_cudaStat = value; \ if (_m_cudaStat != cudaSuccess) { \ fprintf(stderr, "Error %s at line %d in file %s\n", \ cudaGetErrorString(_m_cudaStat), __LINE__, __FILE__); \ exit(1); \ ~~# include <iostream>~~} }

~~# include <ctime>~~/** * Kernel code to generate and detect primes */__global__ void prime(int *num, int blockNum, int threadNum, int size) { const int tid = blockIdx.x * blockDim.x + threadIdx.x; const int bid = blockIdx.y * blockDim.y + threadIdx.y; __syncthreads();

~~#include<iomanip>~~ ~~#include<cstdlib>~~ ~~# include <cuda_runtime.h>~~ /~~/#include <times.h>~~ ~~using namespace std;~~ ~~inline clock_t getMilliSecs() {~~ ~~return clock() / (CLOCKS_PER_SEC / 1000);~~ } ~~__global__ void primegen(bool prime, int number2,int x,int~~ *~~primes_d,int index)~~ {* ~~int c = 0;~~ ~~int idx = blockIdx.x~~ * ~~blockDim.x + threadIdx~~Generate prime numbers and store them in the array.x; * The first element is always 2 */~~/ number2 =(int) floor (sqrt (x));~~ ~~// cout<< "value of idx" << idx <<endl;~~ /* if( ~~!(x!=idx && x%idx~~ tid == 0 ) ) { ~~primes_d~~ num[ctid] = x2; } else { c+ num[tid] = 2 * tid +1; } */

int tmp = bid * threadNum + tid;

~~primes_d[idx]~~ int step1 = ~~666;~~ c+2 * tmp +3; } ~~/*for (~~ int i step2 = tmp + 1; ~~i <= x; i++)~~ { ~~for ( int j = 2; j <= number2; j++)~~ { ~~if ( i!=j && i % j == 0 )~~ { ~~prime = false;~~ ~~break;~~ }

while(tmp < size) {

int i = 1;

/**

* Check if an element is not prime, if it isn't set it to 0.

*/

while((step1 * i + step2) < size) {

num[step1 * i + step2] = 0;

i++;

}

tmp += blockNum * threadNum;

__syncthreads();

}

if int main(~~prime)~~ { ~~primes_d~~int argc, char* argv[c]~~=i;~~ ~~c += 1;~~ } ~~prime = true;~~ ~~} */~~ ~~void primenum(long double~~)~~; // Prototype...~~ { ~~int main~~if(argc != 2) { ~~long double x = 0;~~ cout<<"~~\n This program will generate all prime numbers up to the~~Incorrect no of arguments"~~<<"\n number you have entered below...\n";~~ ~~cout<<"\n Please enter a number: ";~~ ~~cin>> x;~~ ~~cout<<"\n Here are all the prime numbers up to "<<x<<".\n";~~ ~~primenum(x); //function invocation...~~ ~~//cout~~<<endl~~<<"\nThere are "<<c~~ ~~//<<" prime numbers less than or equal to "<<x<<".\n\n"~~; return 01;

}

int n = atoi(argv[1]);

/** * variable declarations */ int *device; int host[n]; int d; cudaDeviceProp prop;

/** * Get the properties of the device in use */ ~~This function will determine the primenumbers up to num~~ cudaGetDevice(&d); cudaGetDeviceProperties(&prop, d); int numberOfBlocks = 8; int maxThreadsPerBlock = prop.maxThreadsPerBlock; int numberOfThreads = maxThreadsPerBlock/numberOfBlocks;

~~void primenum~~/** * Start timer */ clock_t cb, ce; cb = clock(~~long double x~~);

{ /** * Allocate memory on the device */ CUDA_CHECK_RETURN(cudaMalloc((void**) &device, sizeof(int) * n));

~~int~~ /** * Call kernel with appropriate grid and thread size */ prime<<<numberOfBlocks, numberOfThreads>>>(device, numberOfBlocks, numberOfThreads, n ~~= x~~);

/** * Copy results back to host */ CUDA_CHECK_RETURN(cudaMemcpy(&host, device, sizeof(int d) * n, cudaMemcpyDeviceToHost));

~~bool prime = true~~ /** * Free memory on device */ CUDA_CHECK_RETURN(cudaFree(device));

/** * Output values */~~struct tms start_time, stop_time~~ for (int i = 0; i < n; i++) if (host[i] != 0) cout << host[i] << endl;

~~int number2;~~ /** * Stop timer */~~/ number2~~ ce =clock(~~int~~) ~~floor (sqrt~~ ; cout << "Prime generation - took " << double(x)ce - cb)/CLOCKS_PER_SEC << " seconds" << endl;}</pre> ~~clock_t start~~ [[Image:manualDelete.png|thumb|200px|Manual Delete Warning]]===== Final version's errors, warnings and observations ===== ~~getMilliSecs();~~* If a number over 515 is entered as the launch argument, the program will display random values at the end of the list of prime numbers* When attempting to delete the host array manually in the program, a warning is displayed[[Image:ManualCrash.png|thumb|200px|Manual Delete Crash]]* The program crashes at the end if the host array is manually deleted

~~cudaDeviceProp prop;~~ ~~cudaGetDevice(&d);~~ ~~cudaGetDeviceProperties(&prop, d);~~ ~~int nThreads~~ = ~~prop.maxThreadsDim[0];~~ ~~int n_max = nThreads * prop.maxGridSize[0];~~ ~~if ( n> n_max) {~~ ~~n = n_max;~~ ~~cout << "n reduced to " << n << endl;~~ } ~~//Array to hold generated primes on host~~ ~~int *primes_h = new int[(int)x];~~ ~~//Device array to hold the primes on the device~~ ~~int *primes_d = new int[(int)x];~~ ~~//allocate device memory and initialize device memory~~ ~~cudaMalloc((void**)&primes_d, (int)x * sizeof(int));~~ ~~// cudaMalloc((void**)&c_d, sizeof(int));~~ ~~cudaMemset(&primes_d,0,x * sizeof(int));~~ ~~primes_h[0] = 666;~~ ~~//error checking~~ ~~cudaError_t error ;~~ ~~for(int i=0; i<(int)x ; i++){~~ ~~//if(primes_h[i]>=2 && primes_h[i]<=(int)x){~~ ~~cout<<"BEFORE:"<<primes_h[i]<<endl;~~ ~~//}~~ } ~~//Kernal goes here~~ ~~//for ( int i~~=~~2; i<~~= ~~x; i++)~~ ~~//{~~ ~~int i~~=~~10;~~ ~~number2~~ =~~(int) floor (sqrt ((long double)i));~~ ~~// primegen<<<1, x>>>(prime,number2,(int)i,primes_d,666);~~ ~~// primegen<<<1, x>>>(prime,number2,(int)i,primes_d,666);~~ ~~cudaDeviceSynchronize();~~ ~~//}~~ ~~// extract error code from the kernel's execution~~ ~~error~~ Successful run of Prime generation = ~~cudaGetLastError();~~ ~~if (error !~~= ~~cudaSuccess) {~~ ~~cout << cudaGetErrorString(error) << endl;~~ } ~~//copy the array holding primes from device to host~~ ~~error~~ =~~cudaMemcpy(primes_h, primes_d, ((int)x) * sizeof(int), cudaMemcpyDeviceToHost);~~ ~~if (error !~~= ~~cudaSuccess) {~~ ~~cout << cudaGetErrorString(error) << endl;~~ } ~~// cudaMemcpy(c_h, c_d, sizeof(int), cudaMemcpyDeviceToHost);~~ ~~//display the primes~~ ~~for(int i~~=~~0; i<(int)x ; i++){~~ ~~//if(primes_h~~[~~i]>=2 && primes_h~~[~~i]<=(int)x){~~ ~~cout<<primes_h[i~~Image:PrimeSuccessfulRun.png]~~<<endl;~~ ~~//}~~ } ~~cout << "Elapsed time: " << (getMilliSecs() - start) << "ms" << endl;~~ ~~// cout<< "time: "<< (stop_s-start_s)/double(CLOCKS_PER_SEC)<<endl;~~ ~~//free allocated memory~~ ~~delete [~~] ~~primes_h;~~ ~~cudaFree(primes_d);~~ ~~getchar();~~ } ~~</pre>~~

Mohamed Baig

1

edit

Changes

DPS915/M-N-M

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Navigation

get involved with CDOT

courses

course projects

links

Tools