Difference between revisions of "GPU621/Intel Advisor"
(Created page with "= Intel Parallel Studio Advisor = == Group Members == # [mailto:jespiritu@myseneca.ca?subject=GPU621 Jeffrey Espiritu] # [mailto:tahmed36@myseneca.ca?subject=GPU621 Thaharim...") |
(→Examples) |
||
Line 17: | Line 17: | ||
== Examples == | == Examples == | ||
+ | |||
+ | <source lang="cpp"> | ||
+ | // Workshop 2 - Calculate PI by integrating 1/(1+x^2) | ||
+ | // w2.serial.cpp | ||
+ | |||
+ | #include <iostream> | ||
+ | #include <iomanip> | ||
+ | #include <cstdlib> | ||
+ | #include <chrono> | ||
+ | #include <omp.h> | ||
+ | using namespace std::chrono; | ||
+ | |||
+ | // report system time | ||
+ | // | ||
+ | void reportTime(const char* msg, steady_clock::duration span) { | ||
+ | auto ms = duration_cast<milliseconds>(span); | ||
+ | std::cout << msg << " - took - " << | ||
+ | ms.count() << " milliseconds" << std::endl; | ||
+ | } | ||
+ | |||
+ | int main(int argc, char** argv) { | ||
+ | if (argc != 2) { | ||
+ | std::cerr << argv[0] << ": invalid number of arguments\n"; | ||
+ | std::cerr << "Usage: " << argv[0] << " no_of_slices\n"; | ||
+ | return 1; | ||
+ | } | ||
+ | int i; | ||
+ | int nthreads; | ||
+ | int n = std::atoi(argv[1]); | ||
+ | int mnt = omp_get_max_threads(); | ||
+ | steady_clock::time_point ts, te; | ||
+ | |||
+ | double sum = 0.0; // scalar accumulator | ||
+ | // calculate pi by integrating the area under 1/(1 + x^2) in n steps | ||
+ | double pi = 0.0; | ||
+ | double stepSize = 1.0 / (double)n; | ||
+ | |||
+ | ts = steady_clock::now(); | ||
+ | |||
+ | #pragma omp parallel | ||
+ | { | ||
+ | int i, tid, nt; | ||
+ | double x, psum; | ||
+ | tid = omp_get_thread_num(); | ||
+ | nt = omp_get_num_threads(); | ||
+ | if (tid == 0) nthreads = nt; | ||
+ | |||
+ | for (i = tid, psum = 0.0; i < n; i += nt) { | ||
+ | x = ((double)i + 0.5) * stepSize; | ||
+ | psum += 1.0 / (1.0 + x * x); | ||
+ | } | ||
+ | |||
+ | #pragma omp critical | ||
+ | sum += psum; | ||
+ | } | ||
+ | |||
+ | pi = 4.0 * sum * stepSize; | ||
+ | |||
+ | te = steady_clock::now(); | ||
+ | |||
+ | std::cout << "n = " << n << "\n" << | ||
+ | mnt << " threads available\n" << | ||
+ | nthreads << " threads used.\nTime = " << | ||
+ | std::fixed << std::setprecision(15) << | ||
+ | "\n pi(exact) = " << 3.141592653589793 << | ||
+ | "\n pi(calcd) = " << pi << std::endl; | ||
+ | reportTime("Integration", te - ts); | ||
+ | } | ||
+ | </source> | ||
= Intel Advisor Tutorial Example = | = Intel Advisor Tutorial Example = |
Revision as of 08:17, 22 November 2018
Contents
Intel Parallel Studio Advisor
Group Members
Introduction
Vectorization
Register
Vector Register
Instruction Set Architecture
Examples
// Workshop 2 - Calculate PI by integrating 1/(1+x^2)
// w2.serial.cpp
#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <chrono>
#include <omp.h>
using namespace std::chrono;
// report system time
//
void reportTime(const char* msg, steady_clock::duration span) {
auto ms = duration_cast<milliseconds>(span);
std::cout << msg << " - took - " <<
ms.count() << " milliseconds" << std::endl;
}
int main(int argc, char** argv) {
if (argc != 2) {
std::cerr << argv[0] << ": invalid number of arguments\n";
std::cerr << "Usage: " << argv[0] << " no_of_slices\n";
return 1;
}
int i;
int nthreads;
int n = std::atoi(argv[1]);
int mnt = omp_get_max_threads();
steady_clock::time_point ts, te;
double sum = 0.0; // scalar accumulator
// calculate pi by integrating the area under 1/(1 + x^2) in n steps
double pi = 0.0;
double stepSize = 1.0 / (double)n;
ts = steady_clock::now();
#pragma omp parallel
{
int i, tid, nt;
double x, psum;
tid = omp_get_thread_num();
nt = omp_get_num_threads();
if (tid == 0) nthreads = nt;
for (i = tid, psum = 0.0; i < n; i += nt) {
x = ((double)i + 0.5) * stepSize;
psum += 1.0 / (1.0 + x * x);
}
#pragma omp critical
sum += psum;
}
pi = 4.0 * sum * stepSize;
te = steady_clock::now();
std::cout << "n = " << n << "\n" <<
mnt << " threads available\n" <<
nthreads << " threads used.\nTime = " <<
std::fixed << std::setprecision(15) <<
"\n pi(exact) = " << 3.141592653589793 <<
"\n pi(calcd) = " << pi << std::endl;
reportTime("Integration", te - ts);
}