Difference between revisions of "GPU621/Intel Advisor"

From CDOT Wiki
Jump to: navigation, search
(Created page with "= Intel Parallel Studio Advisor = == Group Members == # [mailto:jespiritu@myseneca.ca?subject=GPU621 Jeffrey Espiritu] # [mailto:tahmed36@myseneca.ca?subject=GPU621 Thaharim...")
 
(Examples)
Line 17: Line 17:
  
 
== Examples ==
 
== Examples ==
 +
 +
<source lang="cpp">
 +
// Workshop 2 - Calculate PI by integrating 1/(1+x^2)
 +
// w2.serial.cpp
 +
 +
#include <iostream>
 +
#include <iomanip>
 +
#include <cstdlib>
 +
#include <chrono>
 +
#include <omp.h>
 +
using namespace std::chrono;
 +
 +
// report system time
 +
//
 +
void reportTime(const char* msg, steady_clock::duration span) {
 +
    auto ms = duration_cast<milliseconds>(span);
 +
    std::cout << msg << " - took - " <<
 +
        ms.count() << " milliseconds" << std::endl;
 +
}
 +
 +
int main(int argc, char** argv) {
 +
    if (argc != 2) {
 +
        std::cerr << argv[0] << ": invalid number of arguments\n";
 +
        std::cerr << "Usage: " << argv[0] << "  no_of_slices\n";
 +
        return 1;
 +
    }
 +
    int i;
 +
    int nthreads;
 +
    int n = std::atoi(argv[1]);
 +
    int mnt = omp_get_max_threads();
 +
    steady_clock::time_point ts, te;
 +
 +
    double sum = 0.0;  // scalar accumulator
 +
    // calculate pi by integrating the area under 1/(1 + x^2) in n steps
 +
    double pi = 0.0;
 +
    double stepSize = 1.0 / (double)n;
 +
 +
    ts = steady_clock::now();
 +
 +
    #pragma omp parallel
 +
    {
 +
        int i, tid, nt;
 +
        double x, psum;
 +
        tid = omp_get_thread_num();
 +
        nt = omp_get_num_threads();
 +
        if (tid == 0) nthreads = nt;
 +
 +
        for (i = tid, psum = 0.0; i < n; i += nt) {
 +
            x = ((double)i + 0.5) * stepSize;
 +
            psum += 1.0 / (1.0 + x * x);
 +
        }
 +
       
 +
        #pragma omp critical
 +
        sum += psum;
 +
    }
 +
   
 +
    pi = 4.0 * sum * stepSize;
 +
 +
    te = steady_clock::now();
 +
 +
    std::cout << "n = " << n << "\n" <<
 +
        mnt << " threads available\n" <<
 +
        nthreads << " threads used.\nTime = " <<
 +
        std::fixed << std::setprecision(15) <<
 +
        "\n pi(exact) = " << 3.141592653589793 <<
 +
        "\n pi(calcd) = " << pi << std::endl;
 +
    reportTime("Integration", te - ts);
 +
}
 +
</source>
  
 
= Intel Advisor Tutorial Example =
 
= Intel Advisor Tutorial Example =

Revision as of 08:17, 22 November 2018

Intel Parallel Studio Advisor

Group Members

  1. Jeffrey Espiritu
  2. Thaharim Ahmed
  3. eMail All

Introduction

Vectorization

Register

Vector Register

Instruction Set Architecture

Examples

// Workshop 2 - Calculate PI by integrating 1/(1+x^2)
// w2.serial.cpp

#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <chrono>
#include <omp.h>
using namespace std::chrono;

// report system time
//
void reportTime(const char* msg, steady_clock::duration span) {
    auto ms = duration_cast<milliseconds>(span);
    std::cout << msg << " - took - " <<
        ms.count() << " milliseconds" << std::endl;
}

int main(int argc, char** argv) {
    if (argc != 2) {
        std::cerr << argv[0] << ": invalid number of arguments\n";
        std::cerr << "Usage: " << argv[0] << "  no_of_slices\n";
        return 1;
    }
    int i;
    int nthreads;
    int n = std::atoi(argv[1]);
    int mnt = omp_get_max_threads();
    steady_clock::time_point ts, te;

    double sum = 0.0;  // scalar accumulator
    // calculate pi by integrating the area under 1/(1 + x^2) in n steps 
    double pi = 0.0;
    double stepSize = 1.0 / (double)n;

    ts = steady_clock::now();

    #pragma omp parallel
    {
        int i, tid, nt;
        double x, psum;
        tid = omp_get_thread_num();
        nt = omp_get_num_threads();
        if (tid == 0) nthreads = nt;

        for (i = tid, psum = 0.0; i < n; i += nt) {
            x = ((double)i + 0.5) * stepSize;
            psum += 1.0 / (1.0 + x * x);
        }
        
        #pragma omp critical
        sum += psum;
    }
    
    pi = 4.0 * sum * stepSize;

    te = steady_clock::now();

    std::cout << "n = " << n << "\n" <<
        mnt << " threads available\n" <<
        nthreads << " threads used.\nTime = " <<
        std::fixed << std::setprecision(15) <<
        "\n pi(exact) = " << 3.141592653589793 <<
        "\n pi(calcd) = " << pi << std::endl;
    reportTime("Integration", te - ts);
}

Intel Advisor Tutorial Example

Loop Unrolling

Pointer Alias

Memory Alignment

Dependencies

Summary