57
edits
Changes
→Kernel Version 2
int i = blockIdx.y * blockDim.y + threadIdx.y;
if(i<n && j<n){
double value = cos ( angle ) * a[j];
b[i] = atomicAdd(&b[i], value);
}
}
int main (int argc, char* argv[] ){
if (argc != 2) {
//copy randomly generated values from host to device
for(int i=0; i<n; i++)
cudaMemcpy(d_a,r,sizeof(double)*n,cudaMemcpyHostToDevice);
cudaMemcpy(d_b,s,sizeof(double)*n,cudaMemcpyHostToDevice);