Open main menu

CDOT Wiki β

Changes

GPU610 Team Tsubame

5,764 bytes added, 15:30, 4 April 2017
Maze
6. In VS, Open MazePng.cpp, Replace the entire MazePng.cpp file with Paralleling code in the following:
/* * MazePng.cpp * * Created on: 12 Jul 2013 * Author: yac */ #include "MazePng.h" // Include the parallel code... #include "MazePng.cuh" #include <stdio.h> #include <stdlib.h> #include <unistdstring.h> MazePng::MazePng(const unsigned int width, const unsigned int height) : AbstractMaze(width, height) { } MazePng::~MazePng() { } void MazePng::toPng(unsigned int scale) { png_byte color_type; png_byte bit_depth; png_structp png_ptr; png_infop info_ptr; png_bytep *row_pointers; int height, width; FILE *fp; width = (this->width * 2) + 1; height = (this->height * 2) + 1; color_type = PNG_COLOR_TYPE_RGB; bit_depth = 8; row_pointers = new png_bytep[height]; // Call the kernel wrapper... kw_drawWalls(row_pointers, this->cells, this->width, this->height, width, height, WALL, start); // createImage(row_pointers, 0); fp = fopen("out.png", "wb"); png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); info_ptr = png_create_info_struct(png_ptr); png_init_io(png_ptr, fp); png_set_IHDR(png_ptr, info_ptr, width, height, bit_depth, color_type, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); png_write_info(png_ptr, info_ptr); png_write_image(png_ptr, row_pointers); png_write_end(png_ptr, NULL); png_destroy_write_struct(&png_ptr, &info_ptr); for (int i = 0; i < height; ++i) { delete[] row_pointers[i]; } delete[] row_pointers; fclose(fp); } void inline MazePng::setPixel(png_bytep *row_pointers, unsigned int x, unsigned int y, png_byte type) { row_pointers[y][0 + 3 * x] = row_pointers[y][1 + 3 * x] = row_pointers[y][2 + 3 * x] = type; } void MazePng::createImage(png_bytep *row_pointers, unsigned int scale) { if (start < width) { setPixel(row_pointers, start * 2 + 1, 0, PATH); } for (unsigned int y = 0; y < height; ++y) { // for each row in range [1, actual height (not the number of pixels) )... for (unsigned int x = 0; x < width; ++x) { // for each cell in range [1, actual width)... switch ((cells[(y * width) + x] & 0xC0) >> 6) { case 2: setPixel(row_pointers, 2 + (x * 2), (y * 2) + 1, PATH); // even x, odd y break; case 1: setPixel(row_pointers, 1 + (x * 2), (y * 2) + 2, PATH); // odd x, even y break; case 0: setPixel(row_pointers, 2 + (x * 2), (y * 2) + 1, PATH); // even x, odd y setPixel(row_pointers, 1 + (x * 2), (y * 2) + 2, PATH); // odd x, even y break; } setPixel(row_pointers, 1 + (x * 2), (y * 2) + 1, PATH); // odd } } }
7. In VS, change right click on the Configuration from project in the Solution explorer and Add -> '''Debug to ReleaseNew Item''' and Platform from ; Add a new '''Cuda c++ header''' file, name it '''x86 to x64"MazePng.cuh"'''
8. Cope and paste the following code into '''"MazePng.cuh"''':  // MazePng.cuh #ifndef MAZEPNG_CUH_ #define MAZEPNG_CUH_ #include "MazePng.h" #include <cuda_runtime.h> const int ntpb = 1024; void kw_drawWalls(png_bytep*& row_pointers, const short* cells, const int width, const int height, const int pixWidth, const int pixHeight, const png_byte WALL, const int start); #endif 9. In VS, right click on the project in the Solution explorer and Add -> '''New Item'''; Add a new '''Cuda c++''' file, name it '''"MazePng.cu"''' 10. Cope and paste the following code into '''"MazePng.cu"''':  // MazePng.cu #include "MazePng.cuh" __global__ void k_drawWalls(png_byte* rows, const short* cells, const int width, const int height, const int len, const int size) { int i = blockIdx.x * blockDim.x + threadIdx.x; if (i < size) { rows[i] = WALL; __syncthreads(); int px = i % len; int py = i / len; int x = (px - 1) / 2; int y = (py - 1) / 2; if (px > 0 && py > 0 && x < width && y < height) { int c = (cells[y * width + x] & 0xC0) >> 6; int idx = py * len + 3 * px; if (c == 2) { if (py % 2 > 0 && px % 2 == 0) { rows[idx] = rows[idx + 1] = rows[idx + 2] = PATH; } } else if (c == 1) { if (py % 2 == 0 && px % 2 > 0) { rows[idx] = rows[idx + 1] = rows[idx + 2] = PATH; } } else if (c == 0) { if ((py % 2 > 0 && px % 2 == 0) || (py % 2 == 0 && px % 2 > 0)) { rows[idx] = rows[idx + 1] = rows[idx + 2] = PATH; } } if (py % 2 > 0 && px % 2 > 0) { rows[idx] = rows[idx + 1] = rows[idx + 2] = PATH; } } } } void kw_drawWalls(png_bytep*& row_pointers, const short* cells, const int width, const int height, const int pixWidth, const int pixHeight, const png_byte WALL, const int start) { int rowLen = pixWidth * 3; for (int i = 0; i < pixHeight; ++i) { row_pointers[i] = new png_byte[rowLen]; } png_byte* d_rows; short* d_cells; int nblks = (pixHeight * rowLen + ntpb - 1) / ntpb; int szRows = pixHeight * rowLen * sizeof(png_byte); int szCells = height * width * sizeof(short); cudaMalloc((void**)&d_rows, szRows); cudaMalloc((void**)&d_cells, szCells); cudaMemcpy(d_cells, cells, szCells, cudaMemcpyHostToDevice); k_drawWalls << <nblks, ntpb >> > (d_rows, d_cells, width, height, rowLen, szRows); // cudaDeviceSynchronize(); cudaStream_t* stream = new cudaStream_t[pixHeight]; for (int i = 0; i < pixHeight; i++) { cudaStreamCreate(&stream[i]); } for (int i = 0; i < pixHeight; i++) { cudaMemcpyAsync(row_pointers[i], d_rows + (i * rowLen), rowLen * sizeof(png_byte), cudaMemcpyDeviceToHost, stream[i]); } for (int i = 0; i < pixHeight; i++) { cudaStreamDestroy(stream[i]); } if (start < width) { int x = start * 2 + 1; row_pointers[0][3 * x] = row_pointers[0][3 * x + 1] = row_pointers[0][3 * x + 2] = PATH; } delete[] stream; cudaFree(d_cells); cudaFree(d_rows); } 11. In VS, change the Configuration from '''Debug to Release''' and Platform from '''x86 to x64''' 12. In VS, go to menu Build -> Build Solution
46
edits