1
edit
Changes
→Assignment 3
=== Assignment 3 ===
<h2>Cuda Version:First Attempt</h2>
<pre>
# include <cmath> // This library enable the use of sqrt.
# include <iostream>
# include <ctime>
#include<iomanip>
#include<cstdlib>
# include <cuda_runtime.h>
//#include <times.h>
using namespace std;
inline clock_t getMilliSecs() {
return clock() / (CLOCKS_PER_SEC / 1000);
}
__global__ void primegen(bool prime, int number2,int x,int *primes_d)
{
int c = 0;
int idx = blockIdx.x * blockDim.x + threadIdx.x;
for ( int i=1; i <= x; i++)
{
if( i!= idx && i%idx == 0 )
{
prime = false;
break;
}
if(prime)
{
primes_d[c]=i;
c += 1;
}
prime = true;
}
}
/*for (int i = 1; i <= x; i++)
{
for ( int j = 2; j <= number2; j++)
{
if ( i!=j && i % j == 0 )
{
prime = false;
break;
}
}
if (prime)
{
primes_d[c]=i;
c += 1;
}
prime = true;
} */
void primenum(long double); // Prototype...
int main()
{
long double x = 0;
cout<<"\n This program will generate all prime numbers up to the"<<"\n number you have entered below...\n";
cout<<"\n Please enter a number: ";
cin>> x;
cout<<"\n Here are all the prime numbers up to "<<x<<".\n";
primenum(x); //function invocation...
//cout<<endl<<"\nThere are "<<c
//<<" prime numbers less than or equal to "<<x<<".\n\n";
return 0;
}
// This function will determine the primenumbers up to num.
void primenum(long double x)
{
int n = x;
int d;
bool prime = true;
//struct tms start_time, stop_time;
int number2;
number2 =(int) floor (sqrt (x));
clock_t start = getMilliSecs();
cudaDeviceProp prop;
cudaGetDevice(&d);
cudaGetDeviceProperties(&prop, d);
int nThreads = prop.maxThreadsDim[0];
int n_max = nThreads * prop.maxGridSize[0];
if ( n> n_max) {
n = n_max;
cout << "n reduced to " << n << endl;
}
//Array to hold generated primes on host
int *primes_h = new int[(int)x];
//Device array to hold the primes on the device
int *primes_d = new int[(int)x];
//allocate device memory and initialize device memory
cudaMalloc((void**)&primes_d, (int)x * sizeof(int));
// cudaMalloc((void**)&c_d, sizeof(int));
cudaMemset(&primes_d,0,x * sizeof(int));
//error checking
cudaError_t error ;
//Kernal goes here
primegen<<<(n + nThreads - 1) / nThreads, nThreads>>>(prime,number2,(int)x,primes_d);
// extract error code from the kernel's execution
error = cudaGetLastError();
if (error != cudaSuccess) {
cout << cudaGetErrorString(error) << endl;
}
//copy the array holding primes from device to host
error =cudaMemcpy(primes_h, primes_d, ((int)x) * sizeof(int), cudaMemcpyDeviceToHost);
if (error != cudaSuccess) {
cout << cudaGetErrorString(error) << endl;
}
// cudaMemcpy(c_h, c_d, sizeof(int), cudaMemcpyDeviceToHost);
//display the primes
for(int i=0; i<(int)x ; i++){
if(primes_h[i]>=2 && primes_h[i]<=(int)x){
cout<<primes_h[i]<<endl;
}
}
cout << "Elapsed time: " << (getMilliSecs() - start) << "ms" << endl;
// cout<< "time: "<< (stop_s-start_s)/double(CLOCKS_PER_SEC)<<endl;
//free allocated memory
delete [] primes_h;
cudaFree(primes_d);
getchar();
}
</pre>
<h2>Conclusion: Logical Error</h2>
<h2>Cuda Version: Attempt Two</h2>