Changes

GPU621/Intel oneMKL - Math Kernel Library

18 bytes removed, 00:55, 1 December 2021

→‎Serial

#include <stdlib.h>

#include <time.h>

/* Consider adjusting LOOP_COUNT based on the performance of your computer */

/* to make sure that total run time is at least 1 second */

#define LOOP_COUNT 220 //220 for more accurate statistics

int main()

{

double sum;

double s_initial, s_elapsed;

printf("\n This example demonstrates threading impact on computing real matrix product \n"

" C=alpha*A*B+beta*C using Intel(R) MKL function dgemm, where A, B, and C are \n"

" matrices and alpha and beta are double precision scalars \n\n");

m = 2000, p = 200, n = 1000;

printf(" Initializing data for matrix multiplication C=A*B for matrix \n"

" A(%ix%i) and matrix B(%ix%i)\n\n", m, p, p, n);

alpha = 1.0; beta = 0.0;

printf(" Allocating memory for matrices aligned on 64-byte boundary for better \n"

" performance \n\n");

return 1;

}

printf(" Intializing matrix data \n\n");

for (i = 0; i < (m * p); i++) {

A[i] = (double)(i + 1);

}

for (i = 0; i < (p * n); i++) {

B[i] = (double)(-i - 1);

}

for (i = 0; i < (m * n); i++) {

C[i] = 0.0;

} clock_t startTime = clock();

for (i = 0; i < m; i++) {

for (j = 0; j < n; j++) {

}

clock_t endTime = clock();

s_elapsed = (endTime - startTime) / LOOP_COUNT;

printf(" == Matrix multiplication using triple nested loop completed == \n"

" == at %.5f milliseconds == \n\n", (s_elapsed * 1000));

printf(" Deallocating memory \n\n");

free(A);

free(B);

free(C);

if (s_elapsed < 0.9 / LOOP_COUNT) {

s_elapsed = 1.0 / LOOP_COUNT / s_elapsed;

" of measurements\n\n", i);

}

printf(" Example completed. \n\n");

return 0;

Menglinwu

37

edits

CDOT Wiki β

Changes

GPU621/Intel oneMKL - Math Kernel Library

CDOT Wiki ^β