70
edits
Changes
→Analysis
==== Tony ====
Subject: Jacobi's method for Poisson's equation
===== Source Code =====
{| class="wikitable mw-collapsible mw-collapsed"
! poissan.h
|-
|
<source>
#ifndef POISSON_H
#define POISSON_H
#include <fstream>
namespace DPS{
class Poisson {
size_t nRowsTotal;
size_t nColumns;
float* data;
int bufferSide;
void update (size_t startRow, size_t endRow, const float wx, const float wy);
void bufferSwitch(){ bufferSide = 1 - bufferSide; };
public:
Poisson(std::ifstream& ifs);
Poisson(const size_t r, const size_t c, float* d);
~Poisson(){ delete[] data; };
float* operator()(const size_t iteration, const float wx, const float wy);
float* operator()(const size_t iteration){
return operator()(iteration,0.1,0.1);
}
void show(std::ostream& ofs) const;
};
}
#endif
</source>
|}
{| class="wikitable mw-collapsible mw-collapsed"
! poissan.cpp
|-
|
<source>
#include <cstring>
#include <cstdlib>
#include <iomanip>
#include <iostream>
#include <string>
#include "poisson.h"
namespace DPS{
Poisson::Poisson(std::ifstream& ifs){
std::string line;
bufferSide = 0;
/* find number of columns */
std::getline(ifs,line);
for (size_t i = 0 ; i < line.size() ; i++){
if(line[i]==' ') nColumns++;
}
nColumns++;
/* find number of rows */
nRowsTotal++; /* already fetched one */
while(std::getline(ifs,line))
nRowsTotal++;
ifs.clear();
try{
data = new float[nColumns * nRowsTotal * 2];
}
catch (...){
throw std::runtime_error("Failed to Allocate Memory");
}
/* readin data */
ifs.seekg(0,ifs.beg);
std::cout << ifs.tellg() << std::endl;
for (size_t i = 0 ; i < nRowsTotal * nColumns ; i++) {
ifs >> data[i];
}
std::memset(data+nRowsTotal*nColumns,0,nRowsTotal*nColumns*sizeof(float));
}
Poisson::Poisson(const size_t r, const size_t c, float* d){
bufferSide = 0;
nRowsTotal = r;
nColumns = c;
try{
data = new float[r*c*2];
}
catch (...){
throw std::runtime_error("Failed to Allocate Memory");
}
std::memcpy(data,d,r*c*sizeof(float));
std::memset(data+r*c,0,r*c*sizeof(float));
}
void Poisson::update (size_t startRow, size_t endRow, const float wx, const float wy){
float* x_new = data + (1-bufferSide)*nRowsTotal*nColumns;
float* x_old = data + bufferSide*nRowsTotal*nColumns;
for (size_t i = startRow; i <= endRow; i++)
for (size_t j = 1; j < nColumns - 1; j++)
x_new[i * nColumns + j] = x_old[i * nColumns + j]
+ wx * (x_old[(i + 1) * nColumns + j] + x_old[(i - 1) * nColumns + j]
- 2.0f * x_old[i * nColumns + j])
+ wy * (x_old[i * nColumns + j + 1] + x_old[i * nColumns + j - 1]
- 2.0f * x_old[i * nColumns + j]);
}
float* Poisson::operator()(const size_t nIterations, const float wx, const float wy){
for (size_t i = 0; i < nIterations; i++) {
update(0, nRowsTotal-1, wx, wy);
bufferSwitch();
}
return data;
}
void Poisson::show(std::ostream& ofs) const{
ofs << std::fixed << std::setprecision(1);
for (size_t j = 0; j < nColumns ; j++) {
for (size_t i = 0 ; i < nRowsTotal ; i++)
ofs << std::setw(8) << data[ bufferSide*nColumns*nRowsTotal + i * nColumns + j];
ofs << std::endl;
}
}
}
</source>
|}
{| class="wikitable mw-collapsible mw-collapsed"
! main.cpp
|-
|
<source>
// based on code from LLNL tutorial mpi_heat2d.c
// Master-Worker Programming Model
// Chris Szalwinski - 2018/11/13
// Adopted by Tony Sim - 2019/02/16
#include <iostream>
#include <fstream>
#include <iomanip>
#include <cstdlib>
#include <stdexcept>
#include "poisson.h"
// solution constants
const size_t NONE = 0;
const size_t MINPARTITIONS = 1;
const size_t MAXPARTITIONS = 7;
// weights
const float wx = 0.1f;
const float wy = 0.1f;
int main(int argc, char** argv) {
if (argc != 4) {
std::cerr << "*** Incorrect number of arguments ***\n";
std::cerr << "Usage: " << argv[0]
<< " input_file output_file no_of_iterations\n";
return 1;
}
std::ifstream input(argv[1]);
std::ofstream output(argv[2]);
std::ofstream temp("init.csv");
if(!input.is_open()){
std::cerr << "Invalid Input File" << std::endl;
return 2;
}
if(!output.is_open()){
std::cerr << "Invalid Output File" << std::endl;
return 2;
}
DPS::Poisson* p = nullptr;
try{
p = new DPS::Poisson(input);
}
catch(std::exception& e){
std::cerr << "Error: " << e.what() << std::endl;
}
p->show(temp);
size_t nIterations = std::atoi(argv[3]);
(*p)(nIterations);
// write results to file
p->show(output);
delete p;
}
</source>
|}
===== Introduction =====
The presented code simulates heat map using Jacobi's method for Poisson's equation. It is represented in a 2D array, and each element updates its value based on the adjacent elements at a given moment. Each iteration represent one instance in time. By repeating the calculation over the entire array through multiple iterations, we can estimate the state of the heat transfer after a given time interval.
===== Profiling =====
The profiling was conducted using a data set of 79 rows and 205 columns over 150000 iterations.
{| class="wikitable mw-collapsible mw-collapsed"
! Flat profile
|-
|
Flat profile:
Each sample counts as 0.01 seconds.
% cumulative self self total
time seconds seconds calls us/call us/call name
98.57 2.75 2.75 150000 18.33 18.33 DPS::Poisson::update(unsigned long, unsigned long, float, float)
0.00 2.75 0.00 1 0.00 0.00 _GLOBAL__sub_I__ZN3DPS7PoissonC2ERSt14basic_ifstreamIcSt11char_traitsIcEE
0.00 2.75 0.00 1 0.00 0.00 _GLOBAL__sub_I_main
|}
{| class="wikitable mw-collapsible mw-collapsed"
! Call graph
|-
|
Call graph
granularity: each sample hit covers 2 byte(s) for 0.36% of 2.75 seconds
index % time self children called name
2.75 0.00 150000/150000 DPS::Poisson::operator()(unsigned long, float, float) [2]
[1] 100.0 2.75 0.00 150000 DPS::Poisson::update(unsigned long, unsigned long, float, float) [1]
-----------------------------------------------
<spontaneous>
[2] 100.0 0.00 2.75 DPS::Poisson::operator()(unsigned long, float, float) [2]
2.75 0.00 150000/150000 DPS::Poisson::update(unsigned long, unsigned long, float, float) [1]
-----------------------------------------------
0.00 0.00 1/1 __libc_csu_init [21]
[10] 0.0 0.00 0.00 1 _GLOBAL__sub_I__ZN3DPS7PoissonC2ERSt14basic_ifstreamIcSt11char_traitsIcEE [10]
-----------------------------------------------
0.00 0.00 1/1 __libc_csu_init [21]
[11] 0.0 0.00 0.00 1 _GLOBAL__sub_I_main [11]
-----------------------------------------------
Index by function name
[10] _GLOBAL__sub_I__ZN3DPS7PoissonC2ERSt14basic_ifstreamIcSt11char_traitsIcEE (poisson.cpp) [11] _GLOBAL__sub_I_main (main.cpp) [1] DPS::Poisson::update(unsigned long, unsigned long, float, float)
|}
=====Analysis=====
given 98.57 percent of time is spent on the update() function, it is considered the hotspot.
Total time taken was 2.75.
If we consider a GPU environment with 1000 cores, we can estimate the following speedup:
S1000 = 1/(1-.9857 + .9857/1000) = 65.00
In fact, the speed will decrease from 2.75 seconds to 0.0450 seconds.
As each iteration depends on the product of the previous iteration, there is a dependency resolution that might hamper the parallel process.
Consideration may also be extended to resolving ghost cells across different SMX while using the device global memory as the transfer pipeline.
==== Robert ====
==== Inna ====
===== Subject:Data compression - LWZ algorithm. ===== '''Source''': http://www.cplusplus.com/articles/iL18T05o/#Version1 I tested the following source code for a compression and decompression of .txt files and a gif.{| class="wikitable mw-collapsible mw-collapsed"! lwz.cpp( ... )|-|<syntaxhighlight lang="cpp">////// @file/// @author Julius Pettersson/// @copyright MIT/Expat License./// @brief LZW file compressor/// @version 1////// This is the C++11 implementation of a Lempel-Ziv-Welch single-file command-line compressor./// It uses the simpler fixed-width code compression method./// It was written with Doxygen comments.////// @see http://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch/// @see http://marknelson.us/2011/11/08/lzw-revisited//// @see http://www.cs.duke.edu/csed/curious/compression/lzw.html/// @see http://warp.povusers.org/EfficientLZW/index.html/// @see http://en.cppreference.com//// @see http://www.doxygen.org//// #include <cstdint>#include <cstdlib>#include <exception>#include <fstream>#include <ios>#include <iostream>#include <istream>#include <limits>#include <map>#include <ostream>#include <stdexcept>#include <string>#include <vector> /// Type used to store and retrieve codes.using CodeType = std::uint16_t; namespace globals { /// Dictionary Maximum Size (when reached, the dictionary will be reset)const CodeType dms {std::numeric_limits<CodeType>::max()}; } // namespace globals ////// @brief Helper operator intended to simplify code./// @param vc original vector/// @param c element to be appended/// @returns vector resulting from appending `c` to `vc`///std::vector<char> operator + (std::vector<char> vc, char c){ vc.push_back(c); return vc;} ////// @brief Compresses the contents of `is` and writes the result to `os`./// @param [in] is input stream/// @param [out] os output stream///void compress(std::istream &is, std::ostream &os){ std::map<std::vector<char>, CodeType> dictionary; // "named" lambda function, used to reset the dictionary to its initial contents const auto reset_dictionary = [&dictionary] { dictionary.clear(); const long int minc = std::numeric_limits<char>::min(); const long int maxc = std::numeric_limits<char>::max(); for (long int c = minc; c <= maxc; ++c) { // to prevent Undefined Behavior, resulting from reading and modifying // the dictionary object at the same time const CodeType dictionary_size = dictionary.size(); dictionary[{static_cast<char> (c)}] = dictionary_size; } }; reset_dictionary(); std::vector<char> s; // String char c; while (is.get(c)) { // dictionary's maximum size was reached if (dictionary.size() == globals::dms) reset_dictionary(); s.push_back(c); if (dictionary.count(s) == 0) { // to prevent Undefined Behavior, resulting from reading and modifying // the dictionary object at the same time const CodeType dictionary_size = dictionary.size(); dictionary[s] = dictionary_size; s.pop_back(); os.write(reinterpret_cast<const char *> (&dictionary.at(s)), sizeof (CodeType)); s = {c}; } } if (!s.empty()) os.write(reinterpret_cast<const char *> (&dictionary.at(s)), sizeof (CodeType));} ////// @brief Decompresses the contents of `is` and writes the result to `os`./// @param [in] is input stream/// @param [out] os output stream///void decompress(std::istream &is, std::ostream &os){ std::vector<std::vector<char>> dictionary; // "named" lambda function, used to reset the dictionary to its initial contents const auto reset_dictionary = [&dictionary] { dictionary.clear(); dictionary.reserve(globals::dms); const long int minc = std::numeric_limits<char>::min(); const long int maxc = std::numeric_limits<char>::max(); for (long int c = minc; c <= maxc; ++c) dictionary.push_back({static_cast<char> (c)}); }; reset_dictionary(); std::vector<char> s; // String CodeType k; // Key while (is.read(reinterpret_cast<char *> (&k), sizeof (CodeType))) { // dictionary's maximum size was reached if (dictionary.size() == globals::dms) reset_dictionary(); if (k > dictionary.size()) throw std::runtime_error("invalid compressed code"); if (k == dictionary.size()) dictionary.push_back(s + s.front()); else if (!s.empty()) dictionary.push_back(s + dictionary.at(k).front()); os.write(&dictionary.at(k).front(), dictionary.at(k).size()); s = dictionary.at(k); } if (!is.eof() || is.gcount() != 0) throw std::runtime_error("corrupted compressed file");} ////// @brief Prints usage information and a custom error message./// @param s custom error message to be printed/// @param su Show Usage information///void print_usage(const std::string &s = "", bool su = true){ if (!s.empty()) std::cerr << "\nERROR: " << s << '\n'; if (su) { std::cerr << "\nUsage:\n"; std::cerr << "\tprogram -flag input_file output_file\n\n"; std::cerr << "Where `flag' is either `c' for compressing, or `d' for decompressing, and\n"; std::cerr << "`input_file' and `output_file' are distinct files.\n\n"; std::cerr << "Examples:\n"; std::cerr << "\tlzw_v1.exe -c license.txt license.lzw\n"; std::cerr << "\tlzw_v1.exe -d license.lzw new_license.txt\n"; } std::cerr << std::endl;} ////// @brief Actual program entry point./// @param argc number of command line arguments/// @param [in] argv array of command line arguments/// @retval EXIT_FAILURE for failed operation/// @retval EXIT_SUCCESS for successful operation///int main(int argc, char *argv[]){ if (argc != 4) { print_usage("Wrong number of arguments."); return EXIT_FAILURE; } enum class Mode { Compress, Decompress }; Mode m; if (std::string(argv[1]) == "-c") m = Mode::Compress; else if (std::string(argv[1]) == "-d") m = Mode::Decompress; else { print_usage(std::string("flag `") + argv[1] + "' is not recognized."); return EXIT_FAILURE; } std::ifstream input_file(argv[2], std::ios_base::binary); if (!input_file.is_open()) { print_usage(std::string("input_file `") + argv[2] + "' could not be opened."); return EXIT_FAILURE; } std::ofstream output_file(argv[3], std::ios_base::binary); if (!output_file.is_open()) { print_usage(std::string("output_file `") + argv[3] + "' could not be opened."); return EXIT_FAILURE; } try { input_file.exceptions(std::ios_base::badbit); output_file.exceptions(std::ios_base::badbit | std::ios_base::failbit); if (m == Mode::Compress) compress(input_file, output_file); else if (m == Mode::Decompress) decompress(input_file, output_file); } catch (const std::ios_base::failure &f) { print_usage(std::string("File input/output failure: ") + f.what() + '.', false); return EXIT_FAILURE; } catch (const std::exception &e) { print_usage(std::string("Caught exception: ") + e.what() + '.', false); return EXIT_FAILURE; } return EXIT_SUCCESS;} </syntaxhighlight>|} ===== Tested data ===== 1. book.txt - a 343 kilobyte text file. 2. words.txt - a 4.7 megabyte text file. 3. fire.gif - a 309 kilobyte graphical image. ===== Flat Profiles ===== ====== Book ====== Flat profile for compression: [[File:BookComp.jpg|1300px]] Flat profile for decompression: [[File:BookDecomp.jpg|700px]] ====== Text ====== Flat profile for compression: [[File:textCompression.jpg|1300px]] Flat profile for decompression: [[File:textDecomp.jpg|1300px]] ====== GIF ====== Flat profile for compression: [[File:FireComp.jpg|1300px]] Flat profile for decompression: [[File:FireDecomp.jpg|900px]]
=== Assignment 2 ===
=== Assignment 3 ===