Ghost Cells
GPU610/DPS915 | Student List | Group and Project Index | Student Resources | Glossary
Contents
Ghost Cells
Team Members
- Tony Sim, Issue Dumper
- Robert Dittrich, Issue Collector
- Inna Zhogova, Issue Resolver
Progress
Assignment 1
Tony
Subject: Jacobi's method for Poisson's equation
Robert Subject: Multi Sampling Anti Aliasing
Source Files
main.cpp |
---|
#include <cstdint>
#include <iostream>
#include <algorithm>
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"
#include "vec3.h"
struct Point {
int x;
int y;
};
uint8_t* msaa(const uint8_t* input, uint8_t* output, int width, int height, int channels, int samples) {
// directions is (samples * 2 + 1) ^ 2
int totalPoints = (samples * 2 + 1) * (samples * 2 + 1);
Point* directions = new Point[totalPoints];
size_t idx = 0;
for (int i = -samples; i <= samples; i++) {
for (int j = -samples; j <= samples; j++) {
directions[idx].x = i;
directions[idx].y = j;
idx++;
}
}
int x, y, cx, cy;
Vec3<int> average;
for (size_t i = 0; i < width*height; i++) {
x = i % width * channels;
y = i / width * channels;
for (size_t j = 0; j < totalPoints; j++) {
cx = x + directions[j].x * channels;
cy = y + directions[j].y * channels;
cx = std::clamp(cx, 0, width* channels);
cy = std::clamp(cy, 0, height* channels);
average.add(input[width * cy + cx], input[width * cy + cx + 1], input[width * cy + cx + 2]);
}
average.set(average.getX() / totalPoints, average.getY() / totalPoints, average.getZ() / totalPoints);
output[(width * y + x)] = average.getX();
output[(width * y + x) + 1] = average.getY();
output[(width * y + x) + 2] = average.getZ();
average.set(0, 0, 0);
}
delete[] directions;
return output;
}
int main(int argc, char* argv[]) {
if (argc != 5) {
std::cerr << argv[0] << ": invalid number of arguments\n";
std::cerr << "Usage: " << argv[0] << " input output sample_size passes \n";
system("pause");
return 1;
}
int width, height, channels;
uint8_t* rgb_read = stbi_load(argv[1], &width, &height, &channels, STBI_rgb);
if (channels != 3) {
std::cout << "Incorrect channels" << std::endl;
system("pause");
return 2;
}
int samples = std::atoi(argv[3]);
int passes = std::atoi(argv[4]);
uint8_t* rgb_write = new uint8_t[width*height*channels];
rgb_write = msaa(rgb_read, rgb_write, width, height, channels, samples);
for (int i = 1; i < passes; i++) {
rgb_write = msaa(rgb_write, rgb_write, width, height, channels, samples);
}
stbi_write_png(argv[2], width, height, channels, rgb_write, width*channels);
stbi_image_free(rgb_read);
delete[] rgb_write;
std::cout << "AA Done using " << samples << " sample size" << " over " << passes << " passes" << std::endl;
system("pause");
return 0;
} |
vec3.h |
---|
#ifndef VEC3_H
#define VEC3_H
#include <iostream>
template <class T>
class Vec3 {
private:
T x;
T y;
T z;
public:
Vec3() {
x = 0;
y = 0;
z = 0;
};
Vec3(T x_, T y_, T z_) {
x = x_;
y = y_;
z = z_;
}
void set(const T &x_, const T &y_, const T &z_) {
x = x_;
y = y_;
z = z_;
}
void add(const T &x_, const T &y_, const T &z_) {
x += x_;
y += y_;
z += z_;
}
T getX() const { return x; }
T getY() const { return y; }
T getZ() const { return z; }
void setX(const T &x_) { x = x_; }
void setY(const T &y_) { y = y_; }
void setZ(const T &z_) { z = z_; }
static T dot(const Vec3& vec1, const Vec3& vec2) {
return vec1.x * vec2.x + vec1.y * vec2.y + vec1.z * vec2.z;
}
T dot(const Vec3 &vec) const {
return x * vec.x + y * vec.y + z * vec.z;
}
void display(std::ostream& os) {
os << "x: " << x << ", y: " << y << ", z: " << z << "\n";
}
};
#endif // !VEC3_H |
Introduction
For my selection I chose to do Anti Aliasing since I see it a lot in video games but I never really knew how it worked. There are other anti aliasing methods like FXAA which is fast approximate anti aliasing but it seemed a lot more complicated than MSAA. The way I approached this problem is by getting the color of the pixels around a pixel. In you can specify the distance it will search in the application flags. In my implementation you specify an input file, output file, the radius of pixels to sample and how many passes to take on the image. In my tests the command line options I used was an image I made in paint with 4 sample size and 4 passes.
Before |
---|
After |
---|
Profiling
Profiling |
---|
Flat profile:
Each sample counts as 0.01 seconds.
% cumulative self self total
time seconds seconds calls ms/call ms/call name
85.72 0.18 0.18 msaa(unsigned char const*, unsigned char*, int, int, int, int)
14.29 0.21 0.03 1 30.00 30.00 stbi_zlib_compress
0.00 0.21 0.00 127820 0.00 0.00 stbiw__zlib_flushf(unsigned char*, unsigned int*, int*)
0.00 0.21 0.00 96904 0.00 0.00 stbiw__zhash(unsigned char*)
0.00 0.21 0.00 5189 0.00 0.00 stbi__fill_bits(stbi__zbuf*)
0.00 0.21 0.00 2100 0.00 0.00 stbiw__encode_png_line(unsigned char*, int, int, int, int, int, int, signed char*)
0.00 0.21 0.00 2014 0.00 0.00 stbiw__sbgrowf(void**, int, int) [clone .constprop.58]
0.00 0.21 0.00 38 0.00 0.00 stbi__get16be(stbi__context*)
0.00 0.21 0.00 19 0.00 0.00 stbi__get32be(stbi__context*)
0.00 0.21 0.00 3 0.00 0.00 stbi__skip(stbi__context*, int)
0.00 0.21 0.00 3 0.00 0.00 stbiw__wpcrc(unsigned char**, int)
0.00 0.21 0.00 3 0.00 0.00 stbi__stdio_read(void*, char*, int)
0.00 0.21 0.00 3 0.00 0.00 stbi__zbuild_huffman(stbi__zhuffman*, unsigned char const*, int)
0.00 0.21 0.00 2 0.00 0.00 stbi__mad3sizes_valid(int, int, int, int)
0.00 0.21 0.00 1 0.00 0.00 _GLOBAL__sub_I_stbi_failure_reason
0.00 0.21 0.00 1 0.00 0.00 stbi__getn(stbi__context*, unsigned char*, int)
0.00 0.21 0.00 1 0.00 0.00 stbi__readval(stbi__context*, int, unsigned char*)
0.00 0.21 0.00 1 0.00 0.00 stbi__load_main(stbi__context*, int*, int*, int*, int, stbi__result_info*, int)
0.00 0.21 0.00 1 0.00 0.00 stbi__parse_zlib(stbi__zbuf*, int)
0.00 0.21 0.00 1 0.00 0.00 stbi__malloc_mad3(int, int, int, int)
0.00 0.21 0.00 1 0.00 0.00 stbi__parse_png_file(stbi__png*, int, int)
0.00 0.21 0.00 1 0.00 0.00 stbi__start_callbacks(stbi__context*, stbi_io_callbacks*, void*)
0.00 0.21 0.00 1 0.00 0.00 stbi__decode_jpeg_header(stbi__jpeg*, int)
0.00 0.21 0.00 1 0.00 0.00 stbi__compute_huffman_codes(stbi__zbuf*)
0.00 0.21 0.00 1 0.00 0.00 stbi__load_and_postprocess_8bit(stbi__context*, int*, int*, int*, int)
0.00 0.21 0.00 1 0.00 0.00 stbi_load_from_file
0.00 0.21 0.00 1 0.00 30.00 stbi_write_png_to_mem
0.00 0.21 0.00 1 0.00 0.00 stbi_zlib_decode_malloc_guesssize_headerflag |
Conclusion
Since the msaa
function I wrote is a hotspot of the program I would suggest offloading part of it to a GPU, more specifically the part that finds the average of colors of the nearby pixels. That part also does not depend on previous iterations to finish so it is a prime candidate for parallelization.
Inna
Subject: Data compression - LWZ algorithm.
Source: http://www.cplusplus.com/articles/iL18T05o/#Version1
I tested the following source code for a compression and decompression of .txt files and a gif.
lwz.cpp( ... ) |
---|
///
/// @file
/// @author Julius Pettersson
/// @copyright MIT/Expat License.
/// @brief LZW file compressor
/// @version 1
///
/// This is the C++11 implementation of a Lempel-Ziv-Welch single-file command-line compressor.
/// It uses the simpler fixed-width code compression method.
/// It was written with Doxygen comments.
///
/// @see http://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
/// @see http://marknelson.us/2011/11/08/lzw-revisited/
/// @see http://www.cs.duke.edu/csed/curious/compression/lzw.html
/// @see http://warp.povusers.org/EfficientLZW/index.html
/// @see http://en.cppreference.com/
/// @see http://www.doxygen.org/
///
#include <cstdint>
#include <cstdlib>
#include <exception>
#include <fstream>
#include <ios>
#include <iostream>
#include <istream>
#include <limits>
#include <map>
#include <ostream>
#include <stdexcept>
#include <string>
#include <vector>
/// Type used to store and retrieve codes.
using CodeType = std::uint16_t;
namespace globals {
/// Dictionary Maximum Size (when reached, the dictionary will be reset)
const CodeType dms {std::numeric_limits<CodeType>::max()};
} // namespace globals
///
/// @brief Helper operator intended to simplify code.
/// @param vc original vector
/// @param c element to be appended
/// @returns vector resulting from appending `c` to `vc`
///
std::vector<char> operator + (std::vector<char> vc, char c)
{
vc.push_back(c);
return vc;
}
///
/// @brief Compresses the contents of `is` and writes the result to `os`.
/// @param [in] is input stream
/// @param [out] os output stream
///
void compress(std::istream &is, std::ostream &os)
{
std::map<std::vector<char>, CodeType> dictionary;
// "named" lambda function, used to reset the dictionary to its initial contents
const auto reset_dictionary = [&dictionary] {
dictionary.clear();
const long int minc = std::numeric_limits<char>::min();
const long int maxc = std::numeric_limits<char>::max();
for (long int c = minc; c <= maxc; ++c)
{
// to prevent Undefined Behavior, resulting from reading and modifying
// the dictionary object at the same time
const CodeType dictionary_size = dictionary.size();
dictionary[{static_cast<char> (c)}] = dictionary_size;
}
};
reset_dictionary();
std::vector<char> s; // String
char c;
while (is.get(c))
{
// dictionary's maximum size was reached
if (dictionary.size() == globals::dms)
reset_dictionary();
s.push_back(c);
if (dictionary.count(s) == 0)
{
// to prevent Undefined Behavior, resulting from reading and modifying
// the dictionary object at the same time
const CodeType dictionary_size = dictionary.size();
dictionary[s] = dictionary_size;
s.pop_back();
os.write(reinterpret_cast<const char *> (&dictionary.at(s)), sizeof (CodeType));
s = {c};
}
}
if (!s.empty())
os.write(reinterpret_cast<const char *> (&dictionary.at(s)), sizeof (CodeType));
}
///
/// @brief Decompresses the contents of `is` and writes the result to `os`.
/// @param [in] is input stream
/// @param [out] os output stream
///
void decompress(std::istream &is, std::ostream &os)
{
std::vector<std::vector<char>> dictionary;
// "named" lambda function, used to reset the dictionary to its initial contents
const auto reset_dictionary = [&dictionary] {
dictionary.clear();
dictionary.reserve(globals::dms);
const long int minc = std::numeric_limits<char>::min();
const long int maxc = std::numeric_limits<char>::max();
for (long int c = minc; c <= maxc; ++c)
dictionary.push_back({static_cast<char> (c)});
};
reset_dictionary();
std::vector<char> s; // String
CodeType k; // Key
while (is.read(reinterpret_cast<char *> (&k), sizeof (CodeType)))
{
// dictionary's maximum size was reached
if (dictionary.size() == globals::dms)
reset_dictionary();
if (k > dictionary.size())
throw std::runtime_error("invalid compressed code");
if (k == dictionary.size())
dictionary.push_back(s + s.front());
else
if (!s.empty())
dictionary.push_back(s + dictionary.at(k).front());
os.write(&dictionary.at(k).front(), dictionary.at(k).size());
s = dictionary.at(k);
}
if (!is.eof() || is.gcount() != 0)
throw std::runtime_error("corrupted compressed file");
}
///
/// @brief Prints usage information and a custom error message.
/// @param s custom error message to be printed
/// @param su Show Usage information
///
void print_usage(const std::string &s = "", bool su = true)
{
if (!s.empty())
std::cerr << "\nERROR: " << s << '\n';
if (su)
{
std::cerr << "\nUsage:\n";
std::cerr << "\tprogram -flag input_file output_file\n\n";
std::cerr << "Where `flag' is either `c' for compressing, or `d' for decompressing, and\n";
std::cerr << "`input_file' and `output_file' are distinct files.\n\n";
std::cerr << "Examples:\n";
std::cerr << "\tlzw_v1.exe -c license.txt license.lzw\n";
std::cerr << "\tlzw_v1.exe -d license.lzw new_license.txt\n";
}
std::cerr << std::endl;
}
///
/// @brief Actual program entry point.
/// @param argc number of command line arguments
/// @param [in] argv array of command line arguments
/// @retval EXIT_FAILURE for failed operation
/// @retval EXIT_SUCCESS for successful operation
///
int main(int argc, char *argv[])
{
if (argc != 4)
{
print_usage("Wrong number of arguments.");
return EXIT_FAILURE;
}
enum class Mode {
Compress,
Decompress
};
Mode m;
if (std::string(argv[1]) == "-c")
m = Mode::Compress;
else
if (std::string(argv[1]) == "-d")
m = Mode::Decompress;
else
{
print_usage(std::string("flag `") + argv[1] + "' is not recognized.");
return EXIT_FAILURE;
}
std::ifstream input_file(argv[2], std::ios_base::binary);
if (!input_file.is_open())
{
print_usage(std::string("input_file `") + argv[2] + "' could not be opened.");
return EXIT_FAILURE;
}
std::ofstream output_file(argv[3], std::ios_base::binary);
if (!output_file.is_open())
{
print_usage(std::string("output_file `") + argv[3] + "' could not be opened.");
return EXIT_FAILURE;
}
try
{
input_file.exceptions(std::ios_base::badbit);
output_file.exceptions(std::ios_base::badbit | std::ios_base::failbit);
if (m == Mode::Compress)
compress(input_file, output_file);
else
if (m == Mode::Decompress)
decompress(input_file, output_file);
}
catch (const std::ios_base::failure &f)
{
print_usage(std::string("File input/output failure: ") + f.what() + '.', false);
return EXIT_FAILURE;
}
catch (const std::exception &e)
{
print_usage(std::string("Caught exception: ") + e.what() + '.', false);
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
} |
Tested data
1. book.txt - a 343 kilobyte text file.
2. words.txt - a 4.7 megabyte text file.
3. fire.gif - a 309 kilobyte graphical image.
Flat Profiles
Book
Flat profile for compression:
Flat profile for decompression:
Text
Flat profile for compression:
Flat profile for decompression:
GIF
Flat profile for compression:
Flat profile for decompression: