18
edits
Changes
no edit summary
== Case Studies between STL & TBB ==
=== <u>Comparison of TBB parallel sort, STL sort</u>===
<pre>
#include <stddef.h>
STL parallel sort performed similarly to TBB parallel sort<br>
<br>
=== <u>Comparison of TBB inclusive scan, STL inclusive scan</u>===
<pre>
#include <functional>
#include <iostream>
#include <iterator>
#include <numeric>
#include <vector>
#include <random>
#include <chrono>
#include <execution>
#include <tbb/tbb.h>
const size_t testSize = 10'000'000;
const int iterationCount = 5;
void print_results(const char* const tag, const std::vector<int>& result,
std::chrono::high_resolution_clock::time_point startTime,
std::chrono::high_resolution_clock::time_point endTime) {
printf("%s: %fms\n", tag,
std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(endTime - startTime).count());
}
int main()
{
std::uniform_int_distribution<int> dis(1, 10);
std::random_device rd;
std::vector<int> data(testSize);
//generating data
for (auto& d : data) {
d = dis(rd);
}
//Serial inclusive sum
for (int i = 0; i < iterationCount; ++i)
{
std::vector<int> result(data);
const auto startTime = std::chrono::high_resolution_clock::now();
std::inclusive_scan(std::execution::seq, data.begin(), data.end(), result.begin());
const auto endTime = std::chrono::high_resolution_clock::now();
print_results("STL Serial Inclusive sum", result, startTime, endTime);
std::cout << "Scan result: " << result[testSize - 1] << "\n";
}
//Inclusive sum parallel unseq
for (int i = 0; i < iterationCount; ++i)
{
std::vector<int> result(data);
const auto startTime = std::chrono::high_resolution_clock::now();
std::inclusive_scan(std::execution::par_unseq, data.begin(), data.end(), result.begin());
const auto endTime = std::chrono::high_resolution_clock::now();
print_results("STL Inclusive sum parallel unseq", result, startTime, endTime);
std::cout << "Scan result: " << result[testSize - 1] << "\n";
}
//Inclusive sum parallel
for (int i = 0; i < iterationCount; ++i)
{
std::vector<int> result(data);
const auto startTime = std::chrono::high_resolution_clock::now();
std::inclusive_scan(std::execution::par, data.begin(), data.end(), result.begin());
const auto endTime = std::chrono::high_resolution_clock::now();
print_results("STL Inclusive sum parallel", result, startTime, endTime);
std::cout << "Scan result: " << result[testSize - 1] << "\n";
}
for (int i = 0; i < iterationCount; ++i)
{
std::vector<int> result(data);
auto body = [&](const tbb::blocked_range<int>& r, int sum, bool is_final_scan)->int {
int temp = sum;
for (int i = r.begin(); i < r.end(); ++i) {
temp = temp + data[i];
if (is_final_scan)
result[i] = temp;
}
return temp;
};
const auto startTime = std::chrono::high_resolution_clock::now();
tbb::parallel_scan(tbb::blocked_range<int>(0, testSize),0, body, [](int left, int right) {
return left + right;
}
);
const auto endTime = std::chrono::high_resolution_clock::now();
print_results("TBB Inclusive sum parallel", result, startTime, endTime);
std::cout << "Scan result: " << result[testSize - 1] << "\n";
}
}
</pre>
Results on an intel i7-3770k, average of 5 runs:
[[File:scancomparison.PNG |thumb|center|600px| Results of STL vs TBB scan algorithms]]
<br>
Raw Results:
<pre>
STL Serial Inclusive sum: 9.695900ms
Scan result: 55000150
STL Serial Inclusive sum: 13.188200ms
Scan result: 55000150
STL Serial Inclusive sum: 9.139700ms
Scan result: 55000150
STL Serial Inclusive sum: 10.686900ms
Scan result: 55000150
STL Serial Inclusive sum: 7.812900ms
Scan result: 55000150
STL Inclusive sum parallel unseq: 39.005100ms
Scan result: 55000150
STL Inclusive sum parallel unseq: 29.428300ms
Scan result: 55000150
STL Inclusive sum parallel unseq: 30.756500ms
Scan result: 55000150
STL Inclusive sum parallel unseq: 26.180600ms
Scan result: 55000150
STL Inclusive sum parallel unseq: 28.135300ms
Scan result: 55000150
STL Inclusive sum parallel: 28.015000ms
Scan result: 55000150
STL Inclusive sum parallel: 30.922700ms
Scan result: 55000150
STL Inclusive sum parallel: 38.238000ms
Scan result: 55000150
STL Inclusive sum parallel: 29.686100ms
Scan result: 55000150
STL Inclusive sum parallel: 28.986200ms
Scan result: 55000150
TBB Inclusive sum parallel: 59.180100ms
Scan result: 55000150
TBB Inclusive sum parallel: 13.341900ms
Scan result: 55000150
TBB Inclusive sum parallel: 13.508600ms
Scan result: 55000150
TBB Inclusive sum parallel: 10.201700ms
Scan result: 55000150
TBB Inclusive sum parallel: 9.710400ms
Scan result: 55000150
</pre>