Difference between revisions of "GPU610/TeamKappa"

From CDOT Wiki
Jump to: navigation, search
(Team Members)
(Assignment 1)
Line 8: Line 8:
 
== Progress ==
 
== Progress ==
 
=== Assignment 1 ===
 
=== Assignment 1 ===
 +
 +
== Program 1: RSA Encryption ==
 +
 +
'''Profiling'''
 +
 +
* gprof: Encryption 10MB
 +
 +
  %  cumulative  self              self    total         
 +
  time  seconds  seconds    calls  s/call  s/call  name   
 +
  60.48      1.50    1.50  3495254    0.00    0.00  fastModularExp(unsigned long long, unsigned long long, unsigned long long)
 +
  32.66      2.31    0.81  3495254    0.00    0.00  findHighestSetBit(unsigned long long)
 +
  4.44      2.42    0.11        1    0.11    2.48  encryption(std::string, std::string, std::string)
 +
  2.42      2.48    0.06  3495254    0.00    0.00  intToByteArray(int, char*)
 +
  0.00      2.48    0.00        1    0.00    0.00  _GLOBAL__sub_I__Z10encryptionSsSsSs
 +
  0.00      2.48    0.00        1    0.00    0.00  _GLOBAL__sub_I_main
 +
  0.00      2.48    0.00        1    0.00    0.00  __static_initialization_and_destruction_0(int, int)
 +
  0.00      2.48    0.00        1    0.00    0.00  __static_initialization_and_destruction_0(int, int)
 +
 +
* gprof: Encryption 20MB
 +
 +
  %  cumulative  self              self    total         
 +
  time  seconds  seconds    calls  s/call  s/call  name   
 +
  63.65      3.27    3.27  6990507    0.00    0.00  fastModularExp(unsigned long long, unsigned long long, unsigned long long)
 +
  29.04      4.75    1.49  6990507    0.00    0.00  findHighestSetBit(unsigned long long)
 +
  3.90      4.96    0.20        1    0.20    5.13  encryption(std::string, std::string, std::string)
 +
  3.41      5.13    0.17  6990507    0.00    0.00  intToByteArray(int, char*)
 +
  0.00      5.13    0.00        1    0.00    0.00  _GLOBAL__sub_I__Z10encryptionSsSsSs
 +
  0.00      5.13    0.00        1    0.00    0.00  _GLOBAL__sub_I_main
 +
  0.00      5.13    0.00        1    0.00    0.00  __static_initialization_and_destruction_0(int, int)
 +
  0.00      5.13    0.00        1    0.00    0.00  __static_initialization_and_destruction_0(int, int)
 +
 +
* gprof: Encryption 30MB
 +
 +
  %  cumulative  self              self    total         
 +
  time  seconds  seconds    calls  s/call  s/call  name   
 +
  68.04      5.11    5.11 10485760    0.00    0.00  fastModularExp(unsigned long long, unsigned long long, unsigned long long)
 +
  25.23      7.00    1.90 10485760    0.00    0.00  findHighestSetBit(unsigned long long)
 +
  3.99      7.30    0.30        1    0.30    7.51  encryption(std::string, std::string, std::string)
 +
  2.73      7.51    0.20 10485760    0.00    0.00  intToByteArray(int, char*)
 +
  0.00      7.51    0.00        1    0.00    0.00  _GLOBAL__sub_I__Z10encryptionSsSsSs
 +
  0.00      7.51    0.00        1    0.00    0.00  _GLOBAL__sub_I_main
 +
  0.00      7.51    0.00        1    0.00    0.00  __static_initialization_and_destruction_0(int, int)
 +
  0.00      7.51    0.00        1    0.00    0.00  __static_initialization_and_destruction_0(int, int)
 +
 +
* gprof: Decryption 10MB
 +
 +
  %  cumulative  self              self    total         
 +
  time  seconds  seconds    calls  s/call  s/call  name   
 +
  69.80      1.71    1.71  3495254    0.00    0.00  fastModularExp(unsigned long long, unsigned long long, unsigned long long)
 +
  21.63      2.24    0.53  3495254    0.00    0.00  findHighestSetBit(unsigned long long)
 +
  6.12      2.39    0.15        1    0.15    2.45  decryption(std::string, std::string, std::string)
 +
  2.45      2.45    0.06  3495254    0.00    0.00  intToByteArray(int, char*)
 +
  0.00      2.45    0.00        1    0.00    0.00  _GLOBAL__sub_I__Z10encryptionSsSsSs
 +
  0.00      2.45    0.00        1    0.00    0.00  _GLOBAL__sub_I_main
 +
  0.00      2.45    0.00        1    0.00    0.00  __static_initialization_and_destruction_0(int, int)
 +
  0.00      2.45    0.00        1    0.00    0.00  __static_initialization_and_destruction_0(int, int)
 +
 +
* gprof: Decryption 20MB
 +
 +
  %  cumulative  self              self    total         
 +
  time  seconds  seconds    calls  s/call  s/call  name   
 +
  64.11      3.38    3.38  6990507    0.00    0.00  fastModularExp(unsigned long long, unsigned long long, unsigned long long)
 +
  27.18      4.82    1.44  6990507    0.00    0.00  findHighestSetBit(unsigned long long)
 +
  4.73      5.07    0.25        1    0.25    5.28  decryption(std::string, std::string, std::string)
 +
  3.98      5.28    0.21  6990507    0.00    0.00  intToByteArray(int, char*)
 +
  0.00      5.28    0.00        1    0.00    0.00  _GLOBAL__sub_I__Z10encryptionSsSsSs
 +
  0.00      5.28    0.00        1    0.00    0.00  _GLOBAL__sub_I_main
 +
  0.00      5.28    0.00        1    0.00    0.00  __static_initialization_and_destruction_0(int, int)
 +
  0.00      5.28    0.00        1    0.00    0.00  __static_initialization_and_destruction_0(int, int)
 +
 +
* gprof: Decryption 30MB
 +
 +
  %  cumulative  self              self    total         
 +
  time  seconds  seconds    calls  s/call  s/call  name   
 +
  66.88      5.25    5.25 10485760    0.00    0.00  fastModularExp(unsigned long long, unsigned long long, unsigned long long)
 +
  24.59      7.18    1.93 10485760    0.00    0.00  findHighestSetBit(unsigned long long)
 +
  5.73      7.63    0.45        1    0.45    7.85  decryption(std::string, std::string, std::string)
 +
  2.80      7.85    0.22 10485760    0.00    0.00  intToByteArray(int, char*)
 +
  0.00      7.85    0.00        1    0.00    0.00  _GLOBAL__sub_I__Z10encryptionSsSsSs
 +
  0.00      7.85    0.00        1    0.00    0.00  _GLOBAL__sub_I_main
 +
  0.00      7.85    0.00        1    0.00    0.00  __static_initialization_and_destruction_0(int, int)
 +
  0.00      7.85    0.00        1    0.00    0.00  __static_initialization_and_destruction_0(int, int)
 +
 +
 +
'''Bottleneck Code'''
 +
 +
The two slowest parts of this encryption method are fastModularExp and findHighestSetBit. A third function, intToByteArray, takes up a relatively small amount of time but may still be able to be optimized.
 +
 +
unsigned int fastModularExp(ULong a, ULong b, ULong c) {
 +
    ULong result = 1;
 +
    ULong leadingbit = findHighestSetBit(b); // Heighest set bit
 +
    while(leadingbit > 0){ //while there are bits left
 +
        result = ((result*result) % c); //case 1: bit is a 0
 +
        if((b & leadingbit) > 0){
 +
            result = ((result * a) % c); //case 2: if bit is a 1
 +
        }
 +
        leadingbit = leadingbit >> 1;
 +
    }
 +
    return (unsigned int)result;
 +
}
 +
 +
ULong findHighestSetBit(ULong num){
 +
    ULong result = 0;
 +
    for(int i = 63; i >= 0; i--){
 +
        if(num & (1ULL << i)){
 +
            result = 1ULL << i;
 +
            return result;
 +
        }
 +
    }
 +
    return result;
 +
}
 +
 +
byte* intToByteArray(int num, byte *result){
 +
    for(int i = 0; i < 4; i++){
 +
        result[i] = (num & (0xFF << (8 *(3-i)))) >> (8 *(3-i));
 +
    }
 +
    return result;
 +
}
 +
 +
At first glance, the fastest function is also the function that appears like it would be the easiest to run on a GPU. The other code does not look like it can be optimized with a GPU easily as it does not use large arrays of N size. However, after some short internet research, several documents turned up on the topic of using a GPU to make RSA faster (example: [http://research.ijcaonline.org/volume87/number6/pxc3893704.pdf here] and [http://ijssst.info/Vol-15/No-3/data/3857a529.pdf here]). This leaves me hopeful that in the further stages of this assignment that this could be an interesting program to work with.
 +
 
=== Assignment 2 ===
 
=== Assignment 2 ===
 
=== Assignment 3 ===
 
=== Assignment 3 ===

Revision as of 15:57, 14 October 2015

Team Kappa

Team Members

  1. Ryan Mullings, Team Member
  2. Andy Cooc, Team Member
  3. Matt Jang, Team Member

Email All

Progress

Assignment 1

Program 1: RSA Encryption

Profiling

  • gprof: Encryption 10MB
  %   cumulative   self              self     total           
 time   seconds   seconds    calls   s/call   s/call  name    
 60.48      1.50     1.50  3495254     0.00     0.00  fastModularExp(unsigned long long, unsigned long long, unsigned long long)
 32.66      2.31     0.81  3495254     0.00     0.00  findHighestSetBit(unsigned long long)
  4.44      2.42     0.11        1     0.11     2.48  encryption(std::string, std::string, std::string)
  2.42      2.48     0.06  3495254     0.00     0.00  intToByteArray(int, char*)
  0.00      2.48     0.00        1     0.00     0.00  _GLOBAL__sub_I__Z10encryptionSsSsSs
  0.00      2.48     0.00        1     0.00     0.00  _GLOBAL__sub_I_main
  0.00      2.48     0.00        1     0.00     0.00  __static_initialization_and_destruction_0(int, int)
  0.00      2.48     0.00        1     0.00     0.00  __static_initialization_and_destruction_0(int, int)
  • gprof: Encryption 20MB
  %   cumulative   self              self     total           
 time   seconds   seconds    calls   s/call   s/call  name    
 63.65      3.27     3.27  6990507     0.00     0.00  fastModularExp(unsigned long long, unsigned long long, unsigned long long)
 29.04      4.75     1.49  6990507     0.00     0.00  findHighestSetBit(unsigned long long)
  3.90      4.96     0.20        1     0.20     5.13  encryption(std::string, std::string, std::string)
  3.41      5.13     0.17  6990507     0.00     0.00  intToByteArray(int, char*)
  0.00      5.13     0.00        1     0.00     0.00  _GLOBAL__sub_I__Z10encryptionSsSsSs
  0.00      5.13     0.00        1     0.00     0.00  _GLOBAL__sub_I_main
  0.00      5.13     0.00        1     0.00     0.00  __static_initialization_and_destruction_0(int, int)
  0.00      5.13     0.00        1     0.00     0.00  __static_initialization_and_destruction_0(int, int)
  • gprof: Encryption 30MB
  %   cumulative   self              self     total           
 time   seconds   seconds    calls   s/call   s/call  name    
 68.04      5.11     5.11 10485760     0.00     0.00  fastModularExp(unsigned long long, unsigned long long, unsigned long long)
 25.23      7.00     1.90 10485760     0.00     0.00  findHighestSetBit(unsigned long long)
  3.99      7.30     0.30        1     0.30     7.51  encryption(std::string, std::string, std::string)
  2.73      7.51     0.20 10485760     0.00     0.00  intToByteArray(int, char*)
  0.00      7.51     0.00        1     0.00     0.00  _GLOBAL__sub_I__Z10encryptionSsSsSs
  0.00      7.51     0.00        1     0.00     0.00  _GLOBAL__sub_I_main
  0.00      7.51     0.00        1     0.00     0.00  __static_initialization_and_destruction_0(int, int)
  0.00      7.51     0.00        1     0.00     0.00  __static_initialization_and_destruction_0(int, int)
  • gprof: Decryption 10MB
  %   cumulative   self              self     total           
 time   seconds   seconds    calls   s/call   s/call  name    
 69.80      1.71     1.71  3495254     0.00     0.00  fastModularExp(unsigned long long, unsigned long long, unsigned long long)
 21.63      2.24     0.53  3495254     0.00     0.00  findHighestSetBit(unsigned long long)
  6.12      2.39     0.15        1     0.15     2.45  decryption(std::string, std::string, std::string)
  2.45      2.45     0.06  3495254     0.00     0.00  intToByteArray(int, char*)
  0.00      2.45     0.00        1     0.00     0.00  _GLOBAL__sub_I__Z10encryptionSsSsSs
  0.00      2.45     0.00        1     0.00     0.00  _GLOBAL__sub_I_main
  0.00      2.45     0.00        1     0.00     0.00  __static_initialization_and_destruction_0(int, int)
  0.00      2.45     0.00        1     0.00     0.00  __static_initialization_and_destruction_0(int, int)
  • gprof: Decryption 20MB
  %   cumulative   self              self     total           
 time   seconds   seconds    calls   s/call   s/call  name    
 64.11      3.38     3.38  6990507     0.00     0.00  fastModularExp(unsigned long long, unsigned long long, unsigned long long)
 27.18      4.82     1.44  6990507     0.00     0.00  findHighestSetBit(unsigned long long)
  4.73      5.07     0.25        1     0.25     5.28  decryption(std::string, std::string, std::string)
  3.98      5.28     0.21  6990507     0.00     0.00  intToByteArray(int, char*)
  0.00      5.28     0.00        1     0.00     0.00  _GLOBAL__sub_I__Z10encryptionSsSsSs
  0.00      5.28     0.00        1     0.00     0.00  _GLOBAL__sub_I_main
  0.00      5.28     0.00        1     0.00     0.00  __static_initialization_and_destruction_0(int, int)
  0.00      5.28     0.00        1     0.00     0.00  __static_initialization_and_destruction_0(int, int)
  • gprof: Decryption 30MB
  %   cumulative   self              self     total           
 time   seconds   seconds    calls   s/call   s/call  name    
 66.88      5.25     5.25 10485760     0.00     0.00  fastModularExp(unsigned long long, unsigned long long, unsigned long long)
 24.59      7.18     1.93 10485760     0.00     0.00  findHighestSetBit(unsigned long long)
  5.73      7.63     0.45        1     0.45     7.85  decryption(std::string, std::string, std::string)
  2.80      7.85     0.22 10485760     0.00     0.00  intToByteArray(int, char*)
  0.00      7.85     0.00        1     0.00     0.00  _GLOBAL__sub_I__Z10encryptionSsSsSs
  0.00      7.85     0.00        1     0.00     0.00  _GLOBAL__sub_I_main
  0.00      7.85     0.00        1     0.00     0.00  __static_initialization_and_destruction_0(int, int)
  0.00      7.85     0.00        1     0.00     0.00  __static_initialization_and_destruction_0(int, int)


Bottleneck Code

The two slowest parts of this encryption method are fastModularExp and findHighestSetBit. A third function, intToByteArray, takes up a relatively small amount of time but may still be able to be optimized.

unsigned int fastModularExp(ULong a, ULong b, ULong c) {
    ULong result = 1;
    ULong leadingbit = findHighestSetBit(b); // Heighest set bit
    while(leadingbit > 0){ //while there are bits left
        result = ((result*result) % c); //case 1: bit is a 0
        if((b & leadingbit) > 0){
            result = ((result * a) % c); //case 2: if bit is a 1
        }
        leadingbit = leadingbit >> 1;
    }
    return (unsigned int)result;
}
ULong findHighestSetBit(ULong num){
    ULong result = 0;
    for(int i = 63; i >= 0; i--){
        if(num & (1ULL << i)){
            result = 1ULL << i;
            return result;
        }
    }
    return result;
}
byte* intToByteArray(int num, byte *result){
    for(int i = 0; i < 4; i++){
        result[i] = (num & (0xFF << (8 *(3-i)))) >> (8 *(3-i));
    }
    return result;
}

At first glance, the fastest function is also the function that appears like it would be the easiest to run on a GPU. The other code does not look like it can be optimized with a GPU easily as it does not use large arrays of N size. However, after some short internet research, several documents turned up on the topic of using a GPU to make RSA faster (example: here and here). This leaves me hopeful that in the further stages of this assignment that this could be an interesting program to work with.

Assignment 2

Assignment 3