Changes

Jump to: navigation, search

UnknownX

2,352 bytes added, 14:40, 11 April 2017
Assignment 2 - Parallelization
== Assignment 2 - Parallelization==
[[File:Pygpu2.PNG]]
 
CPU code:
for (int y = 0; y < N; ++y) {
for (int x = 0; x < N; ++x) {
pix_col = black;
const Ray ray(Vec3(x, y, 0), Vec3(0, 0, 1));
if (sphere.intersect(ray, t)) {
const Vec3 pi = ray.o + ray.d*t;
const Vec3 L = light.c - pi;
const Vec3 N = sphere.getNormal(pi);
const double dt = dot(L.normalize(), N.normalize());
pix_col = (red + white*dt) * 0.5;
clamp255(pix_col);
}
pixs[3 * (y * N + x)] = (int)pix_col.x;
pixs[3 * (y * N + x) + 1] = (int)pix_col.y;
pixs[3 * (y * N + x) + 2] = (int)pix_col.z;
}
}
 
GPU
int size = N * N;
int nblocks = (size + ntpb - 1) / ntpb;
int* h_pixs_x = new int[N * N];
int* h_pixs_y = new int[N * N];
int* h_pixs_z = new int[N * N];
int* d_pixs_x;
int* d_pixs_y;
int* d_pixs_z;
cudaMalloc((void**)&d_pixs_x, N * N * sizeof(int));
cudaMalloc((void**)&d_pixs_y, N * N * sizeof(int));
cudaMalloc((void**)&d_pixs_z, N * N * sizeof(int));
kernel_tray << <nblocks, ntpb >> >(pix_col, N, d_pixs_x, d_pixs_y, d_pixs_z);
cudaMemcpy(h_pixs_x, d_pixs_x, N * N * sizeof(int), cudaMemcpyDeviceToHost);
cudaMemcpy(h_pixs_y, d_pixs_y, N * N * sizeof(int), cudaMemcpyDeviceToHost);
cudaMemcpy(h_pixs_z, d_pixs_z, N * N * sizeof(int), cudaMemcpyDeviceToHost);
 
 
Kernel
 
__global__ void kernel_tray(Vec3 pix_col, int N, int* pixs_x, int* pixs_y, int* pixs_z) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int x = idx / N;
int y = idx % N;
const Vec3 white(255, 255, 255);
const Vec3 black(0, 0, 0);
const Vec3 red(255, 0, 0);
const Sphere sphere(Vec3(N*0.5, N*0.5, 50), 50);
const Sphere light(Vec3(0, 0, 50), 1);
double t;
pix_col = black;
const Ray ray(Vec3(x, y, 0), Vec3(0, 0, 1));
if (sphere.intersect(ray, t)) {
const Vec3 pi = ray.o + ray.d*t;
const Vec3 L = light.c - pi;
const Vec3 N = sphere.getNormal(pi);
const double dt = dot(L.normalize(), N.normalize());
pix_col = (red + white*dt) * 0.5;
clamp255(pix_col);
}
pixs_x[y * N + x] = (int)pix_col.x;
pixs_y[y * N + x] = (int)pix_col.y;
pixs_z[y * N + x] = (int)pix_col.z;
}
== Assignment 3 - Optimization ==
122
edits

Navigation menu