Changes

Jump to: navigation, search

DPS921/Intel Advisor

1,637 bytes added, 17:11, 2 December 2020
Intel Roof-line Analysis
Every function, or loop, will have specific AI, when ran we can record its GFLOPs Because we know Its AI won't change and any optimization we do will only change the performance, this is useful when we want to measure the performance of a given change or optimization.
 
 
<source>#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <chrono>
#include <omp.h>
using namespace std::chrono;
#define NUM_THREADS 1
// report system time
//
void reportTime(const char* msg, steady_clock::duration span) {
auto ms = duration_cast<milliseconds>(span);
std::cout << msg << " - took - " <<
ms.count() << " milliseconds" << std::endl;
}
 
int main(int argc, char** argv) {
if (argc != 2) {
std::cerr << argv[0] << ": invalid number of arguments\n";
std::cerr << "Usage: " << argv[0] << " no_of_slices\n";
return 1;
}
int n = std::atoi(argv[1]);
int* t;
steady_clock::time_point ts, te;
 
// calculate pi by integrating the area under 1/(1 + x^2) in n steps
ts = steady_clock::now();
int mt = omp_get_num_threads(), nthreads;
double pi;
double stepSize = 1.0 / (double)n;
omp_set_num_threads(NUM_THREADS);
t = new int[3];
#pragma omp parallel
{
int i, tid, nt;
double x, sum;
tid = omp_get_thread_num();
nt = omp_get_num_threads();
if (tid == 0) nthreads = nt;
for ( i = tid, sum=0.0; i<n; i+=nt) {
x = ((double)i + 0.5) * stepSize;
sum += 1.0 / (1.0 + x * x);
}
#pragma omp critical
pi += 4.0 * sum * stepSize;
}
 
te = steady_clock::now();
 
std::cout << "n = " << n <<" " << nthreads <<
std::fixed << std::setprecision(15) <<
"\n pi(exact) = " << 3.141592653589793 <<
"\n pi(calcd) = " << pi << std::endl;
reportTime("Integration", te - ts);
}
 
 
</source>
= Intel Memory Access Pattern Analysis =
62
edits

Navigation menu