26
edits
Changes
Group 6
,→Assignment 2 - Parallelize
----
Serial Code:
void calculatePI(int n, float* h_a) {
float x, y;
int hit;
srand(time(NULL));
for (int j = 0; j < n; j++) {
hit = 0;
x = 0;
y = 0;
for (int i = 0; i < n; i++) {
x = float(rand()) / float(RAND_MAX);
y = float(rand()) / float(RAND_MAX);
if (y <= sqrt(1 - (x * x))) {
hit += 1;
}
}
h_a[j] = 4 * float(hit) / float(n);
}
}
Parallel code:
__global__ void setRng(curandState *rng) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
curand_init(123456, idx, 0, &rng[idx]);
}
__global__ void calPI(float* d_a, int n, curandState *rng) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int counter = 0;
while (counter < n) {
float x = curand_uniform(&rng[idx]);
float y = curand_uniform(&rng[idx]);
if (y <= sqrt(1 - (x * x))) {
d_a[idx]++;
}
counter++;
}
d_a[idx] = 4.0 * (float(d_a[idx])) / float(n);
}
=== Assignment 3 - Optimize ===