GPU621/False Sharing
Contents
Analyzing False Sharing
Group Members
- Kevin Chou
Introduction
The Cache
What is a Cache?
Cache Coherence and Cache Line
False Sharing
Analyzing Workshop Example
#include <iostream> #include <iomanip> #include <cstdlib> #include <chrono> #include <omp.h> #define NUM_THREADS 8 using namespace std::chrono; // report system time void reportTime(const char* msg, steady_clock::duration span) { auto ms = duration_cast<milliseconds>(span); std::cout << msg << " - took - " << ms.count() << " milliseconds" << std::endl; } int main(int argc, char** argv) { if (argc != 2) { std::cerr << argv[0] << ": invalid number of arguments\n"; std::cerr << "Usage: " << argv[0] << " no_of_slices\n"; return 1; } int n = std::atoi(argv[1]); steady_clock::time_point ts, te; // calculate pi by integrating the area under 1/(1 + x^2) in n steps ts = steady_clock::now(); int actual_thread_count; double pi = 0.0f; double sum[NUM_THREADS] = { 0.0f }; double step = 1.0 / (double)n; omp_set_num_threads(NUM_THREADS); #pragma omp parallel { int id, num_threads; double x; id = omp_get_thread_num(); num_threads = omp_get_num_threads(); // get master thread to return how many threads were actually created if (id == 0) { actual_thread_count = num_threads; } // each thread is responsible for calculating the area of a specific set of sections underneath the curve for (int i = id; i < n; i = i + num_threads) { x = ((double)i + 0.5f) * step; sum[id] += 1.0f / (1.0f + x * x); } } // sum up each calculation to get approximation of pi for (int i = 0; i < actual_thread_count; i++) { pi += 4 * sum[i] * step; } te = steady_clock::now(); std::cout << "n = " << n << std::fixed << std::setprecision(15) << "\n pi(exact) = " << 3.141592653589793 << "\n pi(calcd) = " << pi << std::endl; reportTime("Integration", te - ts); }