212
edits
Changes
BetaT
,→Solution to Windows Display Driver Crashing
{
int j = blockIdx.x * blockDim.x + threadIdx.x;
int i = blockIdx.y * blockDim.y + threadIdx.y;
if (i < nx && j < nx)
{
u[it] = un[1 * nx + it - 1];
__syncthreads();
u[i * nx + it ] = un[i * nx + it- 1] - total c * dt/dx * (un[i * nx + it - 1] - un[(i - 1) * nx + it - 1]);
__syncthreads();
}
}
}
The code below has been altered to remove the (j) variable and combined the two (if) statements into one, so that we can reduce (Thread Divergence), as well as move the (- c*dt/dx* ) recurring instruction set, and place it into a variable called total, so that each thread is NOT performing the same operation which causes a decrease in performance.
// kernerl
__global__ void Calculate(float* u, float* un, int nx, int c, float dx, float dt)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
float total = c*dt / dx;
if (i < nx && i != 0)
{
for (int it = 1; it <= nx - 1; it++)
{
un[i * nx + it - 1] = u[i * nx + it - 1];
__syncthreads();
u[it] = un[1 * nx + it - 1];
__syncthreads();
u[i * nx + it] = un[i * nx + it - 1] - total * (un[i * nx + it - 1] - un[(i - 1) * nx + it - 1]);
__syncthreads();
}
}
}