212
edits
Changes
BetaT
,→Optimizing Problems
== Optimizing Problems ==
__global__ void Calculate (float* u, float* un,int nx, int c, float dx, float dt) {
int j = blockIdx.x * blockDim.x + threadIdx.x;
int i = blockIdx.y * blockDim.y + threadIdx.y;
// removes from instructions because no need to do this NX amount of times
float total = c*dt / dx;
if (i < nx && j < nx)
{
// format for coalesced memory access
for (int it = 1; it <= nx- 1; it++)
{
if (i != 0 || i < nx )
{
un[i * nx + it-1] = u[i * nx + it-1];
__syncthreads();
u[it] = un[1 * nx + it - 1];
__syncthreads();
u[i * nx + it ] = un[i * nx + it- 1] - total * (un[i * nx + it - 1] - un[(i - 1) * nx + it - 1]);
__syncthreads();
}
}
}