Changes

A-Team

2,705 bytes removed, 06:23, 1 April 2019

→‎Initial implementation

During assignment 2, we tried a simple kernel that took the shape of a dot product, what this achieved was nothing special, actually as predicted at the end of assignment 1, continuously calling cudaMalloc and cudaMemCpy had severe consequences on time.

====Initial implementation====

~~vector <float> ddot(const vector <float>& m1, const vector <float>& m2, const int m1_rows, const int m1_columns, const int m2_columns) {~~ ~~cudaError_t Error = cudaSuccess;~~ ~~vector<float> product;~~ ~~float* h_p = new float~~[~~m1.size()];~~ ~~float* h_m1 = new float~~[~~m1_rows * m1_columns];~~ ~~for (int i = 0; i < m1.size; i++) {~~ ~~h_m1[i] = m1[i];~~ } ~~float* h_m2 = new float[m1_rows * m2_columns];~~ ~~for (int i = 0; i != m2.size; i++) {~~ ~~h_m2[i] = m2[i];~~ } ~~//declare device variables~~ ~~float* d_m1;~~ ~~float* d_m2;~~ ~~float* d_p;~~ ~~Error = cudaMalloc((void**)&d_m1, m1_rows * m1_columns * sizeof(float));~~ ~~if (Error != cudaSuccess) {~~ ~~cerr << "Failed @ d_m1 " << cudaGetErrorName(Error) << "!";~~ ~~exit(EXIT_FAILURE);~~ } ~~Error = cudaMalloc((void**)&d_m2, m1_rows * m2_columns * sizeof(float));~~ ~~if (Error != cudaSuccess) {~~ ~~cerr << "Failed @ d_m2 " << cudaGetErrorName(Error) << "!";~~ ~~exit(EXIT_FAILURE);~~ } ~~Error = cudaMalloc((void**)&d_p, m1_rows * m1_columns * sizeof(float));~~ ~~if (Error != cudaSuccess) {~~ ~~cerr << "Failed @ d_p " << cudaGetErrorName(Error) << "!";~~ ~~exit(EXIT_FAILURE);~~ } ~~Error = cudaMemcpy(d_m1, h_m1, m1_rows * m1_columns * sizeof(float), cudaMemcpyHostToDevice);~~ ~~if (Error != cudaSuccess) {~~ ~~cerr << "Failed @ Memcpy d_m1 " << cudaGetErrorName(Error) << "!";~~ ~~exit(EXIT_FAILURE);~~ } ~~Error = cudaMemcpy(d_m2, h_m2, m1_rows * m2_columns * sizeof(float), cudaMemcpyHostToDevice);~~ ~~if (Error != cudaSuccess) {~~ ~~cerr << "Failed @ Memcpy d_m2 " << cudaGetErrorName(Error) << "!";~~ ~~exit(EXIT_FAILURE);~~ } ~~//set blocks and call kernel~~ ~~int width = m1_rows;~~ ~~int height = m1_columns;~~ ~~dim3 dBlock(32, 32);~~ ~~dim3 dGrid((width + dBlock.x - 1) / dBlock.x, (height + dBlock.y - 1) / dBlock.y);~~ ~~kdot << < dGrid, dBlock >> > (d_m1, d_m2, d_p, m1_rows, m1_columns, m2_columns);~~ ~~if (Error != cudaSuccess) {~~ ~~cerr << "Failed @ kdot function call " << cudaGetErrorName(Error) << "!";~~ ~~exit(EXIT_FAILURE);~~ } ~~//copy device matrix to host matrix~~ ~~Error = cudaMemcpy(h_p, d_p, m1_rows * m1_columns * sizeof(float), cudaMemcpyDeviceToHost);~~ ~~if (Error != cudaSuccess) {~~ ~~cerr << "Failed @ cudaMemcpy from d_p to h_p " << cudaGetErrorName(Error) << "!";~~ ~~exit(EXIT_FAILURE);~~ } ~~//freeCuda & delete~~ ~~//display("C = A B~~ File:~~", h_p, m1_rows, m1_columns);~~ ~~Error = cudaFree(d_m1);~~ ~~cudaCheck(Error);~~ ~~Error = cudaFree(d_m2);~~ ~~cudaCheck(Error);~~ ~~Error = cudaFree(d_p);~~ ~~cudaCheck(Error);~~ ~~delete[] h_m1;~~ ~~delete[] h_m2;~~ ~~cudaDeviceReset();~~ ~~//h_p to vector~~ ~~for (int i = 0; i < (m1_rows * m1_columns); i++) {~~ ~~product~~Example.~~push_back(h_p[i~~jpg]); } ~~delete[~~] ~~h_p;~~ ~~return product;~~ }

=== Assignment 3 ===

Spdjurovic

113

edits

Changes

A-Team

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Navigation

get involved with CDOT

courses

course projects

links

Tools