113
edits
Changes
A-Team
,→Initial implementation
====Initial implementation====
[[File:kernel_ms1_call.jpg]]
//version 1 dot product __global__ void kdot(const float* d_a, const float* d_b, float* d_p, int ni, int nj, int nk) { int i = blockIdx.x * blockDim.x + threadIdx.x; int j = blockIdx.y * blockDim.y + threadIdx.y; //matrix multiplication if (i < ni && j < nj) { float sum = 0.0f; for (int k = 0; k < nk; k++) sum += d_a[i * nk + k] * d_b[File:kernel_ms1.pngk * nj + j]; d_p[i * nj + j]= sum; } }
=== Assignment 3 ===