Difference between revisions of "GPU610 Team Tsubame"

From CDOT Wiki
Jump to: navigation, search
(Pi)
(Assignment 1)
Line 13: Line 13:
 
This is a comparison between two programs that calculate Pi.
 
This is a comparison between two programs that calculate Pi.
  
Leibniz formula implementation:
+
'''Leibniz formula implementation:'''
  #include <iostream>
+
  01 #include <iostream>
  #include <iomanip>
+
  02 #include <iomanip>
   
+
  03
  #include <chrono>
+
  04 #include <chrono>
   
+
  05
  // Function duplicated from [https://scs.senecac.on.ca/~gpu610/pages/workshops/w2.html Workshop 2 - BLAS]
+
  06 // Function duplicated from [https://scs.senecac.on.ca/~gpu610/pages/workshops/w2.html Workshop 2 - BLAS]
  void reportTime(const char* msg, std::chrono::steady_clock::duration span) {
+
  07 void reportTime(const char* msg, std::chrono::steady_clock::duration span) {
    auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(span);
+
08    auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(span);
    std::cout << msg << " - took - "  
+
09    std::cout << msg << " - took - "  
    << ms.count() << " millisecs" << std::endl;
+
10    << ms.count() << " millisecs" << std::endl;
  }
+
  11 }
   
+
  12
  // Arctangent of 1; execute for ~n iterations to refine the result
+
  13 // Arctangent of 1; execute for ~n iterations to refine the result
  long double arctan1( unsigned long long int it ) {  
+
  14 long double arctan1( unsigned long long int it ) {  
    long double r = 0.0;
+
15    long double r = 0.0;
   
+
  16
    // v0.25; due to performing the operations in a different order, there are rounding issues...
+
17    // v0.25; due to performing the operations in a different order, there are rounding issues...
    for ( long double d = 0.0; d < it; d++ ) {  
+
18    for ( long double d = 0.0; d < it; d++ ) {  
        long double de = d * 4.0 + 1.0;
+
19        long double de = d * 4.0 + 1.0;
        r += (1.0 / de) - (1.0 / (de + 2.0));
+
20        r += (1.0 / de) - (1.0 / (de + 2.0));
    }   
+
21    }   
   
+
  22
    return r;
+
23    return r;
  }
+
  24 }
   
+
  25
  int main( int argc, char* argv[] ) {  
+
  26 int main( int argc, char* argv[] ) {
    unsigned long long int n = std::stoull(argv[1], 0);  
+
27    unsigned long long int n = std::stoull(argv[1], 0);  
   
+
  28
    std::chrono::steady_clock::time_point ts, te;  
+
29    std::chrono::steady_clock::time_point ts, te;  
    ts = std::chrono::steady_clock::now();
+
30    ts = std::chrono::steady_clock::now();
    long double pi = 4.0 * arctan1( n );
+
31    long double pi = 4.0 * arctan1( n );
    te = std::chrono::steady_clock::now();
+
32    te = std::chrono::steady_clock::now();
    reportTime("Arctangent(1) ", te - ts);
+
33    reportTime("Arctangent(1) ", te - ts);
   
+
  34
    // Maximum length of a long double is 64 digits; minus "3." gives 62 digits.
+
35    // Maximum length of a long double is 64 digits; minus "3." gives 62 digits.
    std::cout.precision(62);
+
36    std::cout.precision(62);
    std::cout << "Pi: " << std::fixed << pi << std::endl;
+
37    std::cout << "Pi: " << std::fixed << pi << std::endl;
  }
+
  40 }
 +
 
 +
Stage 1 - Big-O
 +
 
  
 
4 digits are correct at 10K iterations:
 
4 digits are correct at 10K iterations:
Line 80: Line 83:
 
  sys 0m0.008s
 
  sys 0m0.008s
  
Monte-Carlo algorithm implementation:
+
'''Monte-Carlo algorithm implementation:'''
  #include <iostream>
+
  01 #include <iostream>
  #include <random>
+
  02 #include <random>
   
+
  03
  #include <chrono>
+
  04 #include <chrono>
   
+
  05
  // Function duplicated from [https://scs.senecac.on.ca/~gpu610/pages/workshops/w2.html Workshop 2 - BLAS]
+
  06 // Duplicated from https://scs.senecac.on.ca/~gpu610/pages/workshops/w2.html
  void reportTime(const char* msg, std::chrono::steady_clock::duration span) {
+
  07 void reportTime(const char* msg, std::chrono::steady_clock::duration span) {
    auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(span);
+
08    auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(span);
    std::cout << msg << " - took - " <<
+
09    std::cout << msg << " - took - " <<
    ms.count() << " millisecs" << std::endl;
+
10    ms.count() << " millisecs" << std::endl;
  }
+
  11 }
   
+
  12
  int main(int argc, char* argv[]) {
+
  13 int main(int argc, char* argv[]) {
    std::chrono::steady_clock::time_point ts, te;
+
14    std::chrono::steady_clock::time_point ts, te;
   
+
  15
    ts = std::chrono::steady_clock::now();
+
16    ts = std::chrono::steady_clock::now();
    unsigned long long int n = std::stoull(argv[1], 0),
+
17    unsigned long long int n = std::stoull(argv[1], 0),
                            totalCircle = 0;
+
18                            totalCircle = 0;
   
+
  19
    int stride = 1000,
+
20    int stride = 1000,
        circleSize = n / stride;
+
21        circleSize = n / stride;
   
+
  22
    unsigned int* circle = new unsigned int[circleSize];
+
23    unsigned int* circle = new unsigned int[circleSize];
   
+
  24
    for (int i = 0; i < circleSize; i++)
+
25    for (int i = 0; i < circleSize; i++)
        circle[i] = 0;
+
26        circle[i] = 0;
   
+
  27
    std::random_device rd;
+
28    std::random_device rd;
    std::mt19937 mt(rd());
+
29    std::mt19937 mt(rd());
    std::uniform_real_distribution<long double> dist(0.0, 1.0);
+
30    std::uniform_real_distribution<long double> dist(0.0, 1.0);
    te = std::chrono::steady_clock::now();
+
31    te = std::chrono::steady_clock::now();
    reportTime("Init. ", te - ts);
+
32    reportTime("Init. ", te - ts);
   
+
  33
    ts = std::chrono::steady_clock::now();
+
34    ts = std::chrono::steady_clock::now();
    for (unsigned long long int i = 0; i < circleSize; i++) {
+
35    for (unsigned long long int i = 0; i < circleSize; i++) {
        for (int j = 0; j < stride; j++) {
+
36        for (int j = 0; j < stride; j++) {
            long double x = dist(mt),
+
37            long double x = dist(mt),
                        y = dist(mt);
+
38                        y = dist(mt);
            // if position is inside the circle...
+
39            // if position is inside the circle...
            if (x * x + y * y < 1.0) {
+
40            if (x * x + y * y < 1.0) {
                circle[i]++;
+
41                circle[i]++;
            }
+
42            }
        }
+
43        }
    }
+
44    }
   
+
  45
    for (int i = 0; i < circleSize; i++)
+
46    for (int i = 0; i < circleSize; i++)
        totalCircle += circle[i];
+
47        totalCircle += circle[i];
   
+
  48
    long double pi = 4.0 * ((long double) totalCircle) / ((long double) n);
+
49    long double pi = 4.0 * ((long double) totalCircle) / ((long double) n);
    te = std::chrono::steady_clock::now();
+
50    te = std::chrono::steady_clock::now();
    reportTime("Drop points ", te - ts);
+
51    reportTime("Drop points ", te - ts);
   
+
  52
    std::cout.precision(62);
+
53    std::cout.precision(62);
    std::cout << "Pi: " << std::fixed << pi << std::endl;
+
54    std::cout << "Pi: " << std::fixed << pi << std::endl;
   
+
  55
    delete [] circle;
+
56    delete [] circle;
  }
+
  57 }
  
 
At around 10K iterations, the first decimal is stable.
 
At around 10K iterations, the first decimal is stable.

Revision as of 17:14, 9 February 2017

TBD...

Team Member

  1. Mark Anthony Villaflor (Leader)
  2. Huachen Li
  3. Yanhao Lei
eMail All

Progress

Assignment 1

Pi

This is a comparison between two programs that calculate Pi.

Leibniz formula implementation:

01 #include <iostream>
02 #include <iomanip>
03 
04 #include <chrono>
05
06 // Function duplicated from Workshop 2 - BLAS
07 void reportTime(const char* msg, std::chrono::steady_clock::duration span) {
08     auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(span);
09     std::cout << msg << " - took - " 
10     << ms.count() << " millisecs" << std::endl;
11 }
12 
13 // Arctangent of 1; execute for ~n iterations to refine the result
14 long double arctan1( unsigned long long int it ) { 
15     long double r = 0.0;
16 
17     // v0.25; due to performing the operations in a different order, there are rounding issues...
18     for ( long double d = 0.0; d < it; d++ ) { 
19         long double de = d * 4.0 + 1.0;
20         r += (1.0 / de) - (1.0 / (de + 2.0));
21     }   
22 
23     return r;
24 }
25 
26 int main( int argc, char* argv[] ) {
27     unsigned long long int n = std::stoull(argv[1], 0); 
28 
29     std::chrono::steady_clock::time_point ts, te; 
30     ts = std::chrono::steady_clock::now();
31    long double pi = 4.0 * arctan1( n );
32    te = std::chrono::steady_clock::now();
33    reportTime("Arctangent(1) ", te - ts);
34 
35     // Maximum length of a long double is 64 digits; minus "3." gives 62 digits.
36     std::cout.precision(62);
37     std::cout << "Pi: " << std::fixed << pi << std::endl;
40 }

Stage 1 - Big-O


4 digits are correct at 10K iterations:

> time ./leibniz 10000
Arctangent(1)  - took - 0 millisecs
Pi: 3.14154265358982449354505184224706226814305409789085388183593750

real 	0m0.016s
user	0m0.004s
sys	0m0.008s

7 digits are correct at 10M iterations:

> time ./leibniz 10000000
Arctangent(1)  - took - 171 millisecs
Pi: 3.14159260358979321929411010483335076060029678046703338623046875

real	0m0.187s
user	0m0.172s
sys	0m0.012s

No difference at 100M iterations:

> time ./leibniz 100000000
Arctangent(1)  - took - 1708 millisecs
Pi: 3.14159264858979533105269588144636827564681880176067352294921875

real	0m1.725s
user	0m1.704s
sys	0m0.008s

Monte-Carlo algorithm implementation:

01 #include <iostream>
02 #include <random>
03 
04 #include <chrono>
05 
06 // Duplicated from https://scs.senecac.on.ca/~gpu610/pages/workshops/w2.html
07 void reportTime(const char* msg, std::chrono::steady_clock::duration span) {
08     auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(span);
09     std::cout << msg << " - took - " <<
10     ms.count() << " millisecs" << std::endl;
11 }
12 
13 int main(int argc, char* argv[]) {
14     std::chrono::steady_clock::time_point ts, te;
15 
16     ts = std::chrono::steady_clock::now();
17     unsigned long long int n = std::stoull(argv[1], 0),
18                            totalCircle = 0;
19 
20     int stride = 1000,
21         circleSize = n / stride;
22 
23     unsigned int* circle = new unsigned int[circleSize];
24 
25     for (int i = 0; i < circleSize; i++)
26         circle[i] = 0;
27 
28     std::random_device rd;
29     std::mt19937 mt(rd());
30     std::uniform_real_distribution<long double> dist(0.0, 1.0);
31     te = std::chrono::steady_clock::now();
32     reportTime("Init. ", te - ts);
33 
34     ts = std::chrono::steady_clock::now();
35     for (unsigned long long int i = 0; i < circleSize; i++) {
36         for (int j = 0; j < stride; j++) {
37             long double x = dist(mt),
38                         y = dist(mt);
39             // if position is inside the circle...
40             if (x * x + y * y < 1.0) {
41                 circle[i]++;
42             }
43         }
44     }
45 
46     for (int i = 0; i < circleSize; i++)
47         totalCircle += circle[i];
48 
49     long double pi = 4.0 * ((long double) totalCircle) / ((long double) n);
50     te = std::chrono::steady_clock::now();
51     reportTime("Drop points ", te - ts);
52 
53     std::cout.precision(62);
54     std::cout << "Pi: " << std::fixed << pi << std::endl;
55 
56     delete [] circle;
57 }

At around 10K iterations, the first decimal is stable.

> time ./monte-carlo 10000
Init.  - took - 0 millisecs
Drop points  - took - 1 millisecs
Pi: 3.11679999999999999995940747066214271399076096713542938232421875

real	0m0.019s
user	0m0.004s
sys	0m0.008s
> time ./monte-carlo 10000
Init.  - took - 0 millisecs
Drop points  - took - 1 millisecs
Pi: 3.16480000000000000009124645483638005316606722772121429443359375

real	0m0.018s
user	0m0.008s
sys	0m0.004s
> time ./monte-carlo 10000
Init.  - took - 0 millisecs
Drop points  - took - 1 millisecs
Pi: 3.16639999999999999995108079797745403993758372962474822998046875

real	0m0.018s
user	0m0.004s
sys	0m0.008s

The next digit is stable at around 10M iterations

> time ./monte-carlo 10000000
Init.  - took - 0 millisecs
Drop points  - took - 1096 millisecs
Pi: 3.14150879999999999990685506379151092914980836212635040283203125

real	0m1.114s
user	0m1.092s
sys	0m0.008s
> time ./monte-carlo 10000000
Init.  - took - 0 millisecs
Drop points  - took - 1097 millisecs
Pi: 3.14219679999999999993332000514101309818215668201446533203125000

real	0m1.114s
user	0m1.092s
sys	0m0.016s
> time ./monte-carlo 10000000
Init.  - took - 0 millisecs
Drop points  - took - 1097 millisecs
Pi: 3.14158840000000000010696443730751070688711479306221008300781250

real	0m1.115s
user	0m1.088s
sys	0m0.012s

By 100M, the third digit appears to be stable.

> time ./monte-carlo 100000000
Init.  - took - 1 millisecs
Drop points  - took - 10910 millisecs
Pi: 3.14138611999999999989559296142971334120375104248523712158203125

real	0m10.930s
user	0m10.881s
sys	0m0.012s
> time ./monte-carlo 100000000
Init.  - took - 1 millisecs
Drop points  - took - 10847 millisecs
Pi: 3.14185203999999999998835042980260823242133483290672302246093750

real	0m10.868s
user	0m10.833s
sys	0m0.016s
> time ./monte-carlo 100000000
Init.  - took - 1 millisecs
Drop points  - took - 10883 millisecs
Pi: 3.14160056000000000009896028441147564080893062055110931396484375

real	0m10.903s
user	0m10.865s
sys	0m0.016s

Assignment 2

Assignment 3