Changes

Jump to: navigation, search

A-Team

2,705 bytes removed, 05:23, 1 April 2019
Initial implementation
During assignment 2, we tried a simple kernel that took the shape of a dot product, what this achieved was nothing special, actually as predicted at the end of assignment 1, continuously calling cudaMalloc and cudaMemCpy had severe consequences on time.
====Initial implementation====
vector <float> ddot(const vector <float>& m1, const vector <float>& m2, const int m1_rows, const int m1_columns, const int m2_columns) { cudaError_t Error = cudaSuccess; vector<float> product; float* h_p = new float[m1.size()]; float* h_m1 = new float[m1_rows * m1_columns]; for (int i = 0; i < m1.size; i++) { h_m1[i] = m1[i]; } float* h_m2 = new float[m1_rows * m2_columns]; for (int i = 0; i != m2.size; i++) { h_m2[i] = m2[i]; } //declare device variables float* d_m1; float* d_m2; float* d_p; Error = cudaMalloc((void**)&d_m1, m1_rows * m1_columns * sizeof(float)); if (Error != cudaSuccess) { cerr << "Failed @ d_m1 " << cudaGetErrorName(Error) << "!"; exit(EXIT_FAILURE); } Error = cudaMalloc((void**)&d_m2, m1_rows * m2_columns * sizeof(float)); if (Error != cudaSuccess) { cerr << "Failed @ d_m2 " << cudaGetErrorName(Error) << "!"; exit(EXIT_FAILURE); } Error = cudaMalloc((void**)&d_p, m1_rows * m1_columns * sizeof(float)); if (Error != cudaSuccess) { cerr << "Failed @ d_p " << cudaGetErrorName(Error) << "!"; exit(EXIT_FAILURE); } Error = cudaMemcpy(d_m1, h_m1, m1_rows * m1_columns * sizeof(float), cudaMemcpyHostToDevice); if (Error != cudaSuccess) { cerr << "Failed @ Memcpy d_m1 " << cudaGetErrorName(Error) << "!"; exit(EXIT_FAILURE); } Error = cudaMemcpy(d_m2, h_m2, m1_rows * m2_columns * sizeof(float), cudaMemcpyHostToDevice); if (Error != cudaSuccess) { cerr << "Failed @ Memcpy d_m2 " << cudaGetErrorName(Error) << "!"; exit(EXIT_FAILURE); }  //set blocks and call kernel int width = m1_rows; int height = m1_columns; dim3 dBlock(32, 32); dim3 dGrid((width + dBlock.x - 1) / dBlock.x, (height + dBlock.y - 1) / dBlock.y);  kdot << < dGrid, dBlock >> > (d_m1, d_m2, d_p, m1_rows, m1_columns, m2_columns); if (Error != cudaSuccess) { cerr << "Failed @ kdot function call " << cudaGetErrorName(Error) << "!"; exit(EXIT_FAILURE); } //copy device matrix to host matrix Error = cudaMemcpy(h_p, d_p, m1_rows * m1_columns * sizeof(float), cudaMemcpyDeviceToHost); if (Error != cudaSuccess) { cerr << "Failed @ cudaMemcpy from d_p to h_p " << cudaGetErrorName(Error) << "!"; exit(EXIT_FAILURE); } //freeCuda & delete //display("C = A B File:", h_p, m1_rows, m1_columns); Error = cudaFree(d_m1); cudaCheck(Error); Error = cudaFree(d_m2); cudaCheck(Error); Error = cudaFree(d_p); cudaCheck(Error); delete[] h_m1; delete[] h_m2; cudaDeviceReset(); //h_p to vector for (int i = 0; i < (m1_rows * m1_columns); i++) { productExample.push_back(h_p[ijpg]); } delete[] h_p; return product; }
=== Assignment 3 ===
113
edits

Navigation menu