Open main menu

CDOT Wiki β

Changes

Hu3Team

32 bytes added, 00:07, 11 November 2014
Assignment 2
__global__ void copyMat(const double *w, double *u){
 
int i = blockIdx.x * blockDim.x + threadIdx.x;
 
int j = blockIdx.y * blockDim.y + threadIdx.y;
 
if (i < M && j < N) {
 
u[i * M + j] = w[i * M + j];
 
}
 
__syncthreads();
 
}
 
__global__ void calcHeat(double *w, const double *u, double *d, int m, int n, double* d_array){
 
int i = blockIdx.x * blockDim.x + threadIdx.x;
 
int j = blockIdx.y * blockDim.y + threadIdx.y;
 
if ( i == 0 )i++;
 
if ( j == 0 )j++;
 
if (i < m && j < n) {
 
w[i * m + j] = (u[(i - 1) * m + j] + u[(i + 1) * m + j] + u[i * m + (j - 1)] + u[i * m + (j + 1)]) / 4.0;
 
d_array[i * m + j] = w[i * m + j] - u[i * m + j];
 
if( d_array[i * m + j] < 0 ){d_array[i * m + j] *= -1;}
 
}
 
*d = -1;
 
__syncthreads();
 
}
 
__global__ void bigDiff(double* d_array, double* d, int m, int n){
 
int i = blockIdx.x * blockDim.x + threadIdx.x;
 
 
for (int x = 1; i+x < m*n; x*=2) {
 
if (d_array[i] > *d || d_array[i + x] > *d){
 
if (d_array[i] > d_array[i + x])
 
*d = d_array[i];
 
else
 
*d = d_array[i + x];
 
}
 
__syncthreads();
 
}
 
}
 
Moreover, we made the input of the error tolerance (Epsilon) to be set on the code. After lots of difficulties found while we were coding, we finally got good results in comparison with the code of assignment 1. The runtime was decreased, and it made us to see the power that CUDA may provide to optimize the processing.
 
 
=== Assignment 3 ===