Open main menu

CDOT Wiki β

Changes

GPUSquad

4,064 bytes added, 13:12, 27 March 2018
Assignment 2
=== Assignment 2 ===
<source>
 
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
 
#include <stdio.h>
 
// Load standard libraries
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <cmath>
#include <chrono>
using namespace std;
 
__global__ void matrixKernel(float* a, float* b, float dcent, int n, int m, int xmin, int xmax,
int ymin, int ymax, float dx, float dy, float dxxinv, float dyyinv) {//MODIFY to suit algorithm
 
int j = blockIdx.x * blockDim.x + threadIdx.x + 1;
//above: we are using block and thread indexes to replace some of the iteration logic
if (j < n - 1) {
for (int i = 1; i < m - 1; i++) {
int ij = i + m*j;
 
float x = xmin + i*dx, y = ymin + j*dy;
 
float input = abs(x) > 0.5 || abs(y) > 0.5 ? 0 : 1;
 
a[ij] = (input + dxxinv*(b[ij - 1] + b[ij + 1])
+ dyyinv*(b[ij - m] + b[ij + m]))*dcent;
}
}
 
}
// Set grid size and number of iterations
const int save_iters = 20;
const int total_iters = 5000;
const int error_every = 2;
const int m = 500, n = 500;
const float xmin = -1, xmax = 1;
const float ymin = -1, ymax = 1;
 
// Compute useful constants
const float pi = 3.1415926535897932384626433832795;
const float omega = 2 / (1 + sin(2 * pi / n));
const float dx = (xmax - xmin) / (m - 1);
const float dy = (ymax - ymin) / (n - 1);
const float dxxinv = 1 / (dx*dx);
const float dyyinv = 1 / (dy*dy);
const float dcent = 1 / (2 * (dxxinv + dyyinv));
 
// Input function
inline float f(int i, int j) {
float x = xmin + i*dx, y = ymin + j*dy;
return abs(x) > 0.5 || abs(y) > 0.5 ? 0 : 1;
}
 
// Common output and error routine
void outputImage(char* filename, float *a) {
// Computes the error if sn%error every==0
 
 
// Saves the matrix if sn<=save iters
int i, j, ij = 0, ds = sizeof(float);
float x, y, data_float; const char *pfloat;
pfloat = (const char*)&data_float;
 
ofstream outfile;
static char fname[256];
sprintf(fname, "%s.%d", filename, 101);
outfile.open(fname, fstream::out
| fstream::trunc | fstream::binary);
 
data_float = m; outfile.write(pfloat, ds);
 
for (i = 0; i < m; i++) {
x = xmin + i*dx;
data_float = x; outfile.write(pfloat, ds);
}
 
for (j = 0; j < n; j++) {
y = ymin + j*dy;
data_float = y;
outfile.write(pfloat, ds);
 
for (i = 0; i < m; i++) {
data_float = a[ij++];
outfile.write(pfloat, ds);
}
}
 
outfile.close();
}
 
 
void dojacobi() {
int i, j, ij, k;
float error, z;
float *a, *b;
float *u;
float *v;
u = new float[m*n];
v = new float[m*n];
 
// Set initial guess to be identically zero
for (ij = 0; ij < m*n; ij++) u[ij] = v[ij] = 0;
 
a = v; b = u;
float* d_a;
float* d_b;
 
//malloc
cudaMalloc((void**)&d_a, m*n * sizeof(float));
cudaMalloc((void**)&d_b, m*n * sizeof(float));
 
cudaMemcpy(d_a, a, n* m * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, n* m * sizeof(float), cudaMemcpyHostToDevice);
 
dim3 dGrid(m);
dim3 dBlock(m);
 
// Carry out Jacobi iterations
for (k = 1; k <= total_iters; k++) {
if (k % 2 == 0) {
cudaError_t error = cudaGetLastError();
 
matrixKernel << <dGrid, dBlock >> > (d_a, d_b, dcent, n, m, xmin, xmax, ymin, ymax, dx, dy, dxxinv, dyyinv);
 
cudaDeviceSynchronize();
if (cudaGetLastError()) {
std::cout << "error";
}
}
 
else {
cudaError_t error = cudaGetLastError();
 
matrixKernel << <dGrid, dBlock >> > (d_b, d_a, dcent, n, m, xmin, xmax, ymin, ymax, dx, dy, dxxinv, dyyinv);
 
cudaDeviceSynchronize();
if (cudaGetLastError()) {
std::cout << "error";
}
}
}
cudaMemcpy(a, d_a, n* m * sizeof(float), cudaMemcpyDeviceToHost);
outputImage("jacobi out", a);
cudaFree(d_a);
cudaFree(d_b);
delete[] u;
delete[] v;
}
 
int main() {
std::chrono::steady_clock::time_point ts, te;
ts = std::chrono::steady_clock::now();
dojacobi();
te = std::chrono::steady_clock::now();
std::chrono::steady_clock::duration duration = te - ts;
auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(duration);
std::cout << "Parallel Code Time: " << ms.count() << " ms" << std::endl;
cudaDeviceReset();
 
return 0;
}
 
</source>
 
=== Assignment 3 ===
93
edits