GPU610/DPS915 MCM Decrypt
Contents
MySQL Decrypt
Team Members
Progress
Assignment 1
Topic
I have always been interested in computer security, and especially interested in the algorithms used encrypt the passwords that are stored in various databases. After browsing around multiple open source code repositories I found a relatively simplistic C program (which I am planning on converting to C++ at a later date) that is used to decrypt MYSQL passwords given the MYSQL hash of the password.
Required Files
https://drive.google.com/#folders/0B63ZcmSNsUsqX3MyTGNSeFBuSlU
Compilation And Running
Below you will see a compilation and example run of the application with the hash 29bad1457ee5e49e (which equals the password pass (a small password so I could test the application in a short period of time).
Output
[ 500000 ] 0x4e 0x42 0x5c (NB\)
[ 1000000 ] 0x21 0x65 0x3d 0x21 (!e=!)
[ 1500000 ] 0x4f 0x2c 0x7a 0x21 (O,z!)
[ 2000000 ] 0x22 0x4f 0x5b 0x22 ("O[")
[ 2500000 ] 0x50 0x71 0x3c 0x23 (Pq<#)
[ 3000000 ] 0x23 0x39 0x79 0x23 (#9y#)
[ 3500000 ] 0x51 0x5b 0x5a 0x24 (Q[Z$)
[ 4000000 ] 0x24 0x23 0x3c 0x25 ($#<%)
[ 4500000 ] 0x52 0x45 0x78 0x25 (REx%)
[ 5000000 ] 0x25 0x68 0x59 0x26 (%hY&)
[ 5500000 ] 0x53 0x2f 0x3b 0x27 (S/;')
[ 6000000 ] 0x26 0x52 0x77 0x27 (&Rw')
[ 6500000 ] 0x54 0x74 0x58 0x28 (TtX()
[ 7000000 ] 0x27 0x3c 0x3a 0x29 ('<:))
[ 7500000 ] 0x55 0x5e 0x76 0x29 (U^v))
[ 8000000 ] 0x28 0x26 0x58 0x2a ((&X*)
[ 8500000 ] 0x56 0x48 0x39 0x2b (VH9+)
[ 9000000 ] 0x29 0x6b 0x75 0x2b ()ku+)
[ 9500000 ] 0x57 0x32 0x57 0x2c (W2W,)
[ 10000000 ] 0x2a 0x55 0x38 0x2d (*U8-)
[ 10500000 ] 0x58 0x77 0x74 0x2d (Xwt-)
[ 11000000 ] 0x2b 0x3f 0x56 0x2e (+?V.)
[ 11500000 ] 0x59 0x61 0x37 0x2f (Ya7/)
[ 12000000 ] 0x2c 0x29 0x74 0x2f (,)t/)
[ 12500000 ] 0x5a 0x4b 0x55 0x30 (ZKU0)
[ 13000000 ] 0x2d 0x6e 0x36 0x31 (-n61)
[ 13500000 ] 0x5b 0x35 0x73 0x31 ([5s1)
[ 14000000 ] 0x2e 0x58 0x54 0x32 (.XT2)
[ 14500000 ] 0x5c 0x7a 0x35 0x33 (\z53)
[ 15000000 ] 0x2f 0x42 0x72 0x33 (/Br3)
[ 15500000 ] 0x5d 0x64 0x53 0x34 (]dS4)
[ 16000000 ] 0x30 0x2c 0x35 0x35 (0,55)
[ 16500000 ] 0x5e 0x4e 0x71 0x35 (^Nq5)
[ 17000000 ] 0x31 0x71 0x52 0x36 (1qR6)
[ 17500000 ] 0x5f 0x38 0x34 0x37 (_847)
[ 18000000 ] 0x32 0x5b 0x70 0x37 (2[p7)
[ 18500000 ] 0x60 0x22 0x52 0x38 (`"R8)
[ 19000000 ] 0x33 0x45 0x33 0x39 (3E39)
[ 19500000 ] 0x61 0x67 0x6f 0x39 (ago9)
[ 20000000 ] 0x34 0x2f 0x51 0x3a (4/Q:)
[ 20500000 ] 0x62 0x51 0x32 0x3b (bQ2;)
[ 21000000 ] 0x35 0x74 0x6e 0x3b (5tn;)
[ 21500000 ] 0x63 0x3b 0x50 0x3c (c;P<)
[ 22000000 ] 0x36 0x5e 0x31 0x3d (6^1=)
[ 22500000 ] 0x64 0x25 0x6e 0x3d (d%n=)
[ 23000000 ] 0x37 0x48 0x4f 0x3e (7HO>)
[ 23500000 ] 0x65 0x6a 0x30 0x3f (ej0?)
[ 24000000 ] 0x38 0x32 0x6d 0x3f (82m?)
[ 24500000 ] 0x66 0x54 0x4e 0x40 (fTN@)
[ 25000000 ] 0x39 0x77 0x2f 0x41 (9w/A)
[ 25500000 ] 0x67 0x3e 0x6c 0x41 (g>lA)
[ 26000000 ] 0x3a 0x61 0x4d 0x42 (:aMB)
[ 26500000 ] 0x68 0x28 0x2f 0x43 (h(/C)
[ 27000000 ] 0x3b 0x4b 0x6b 0x43 (;KkC)
[ 27500000 ] 0x69 0x6d 0x4c 0x44 (imLD)
[ 28000000 ] 0x3c 0x35 0x2e 0x45 (<5.E)
[ 28500000 ] 0x6a 0x57 0x6a 0x45 (jWjE)
[ 29000000 ] 0x3d 0x7a 0x4b 0x46 (=zKF)
[ 29500000 ] 0x6b 0x41 0x2d 0x47 (kA-G)
[ 30000000 ] 0x3e 0x64 0x69 0x47 (>diG)
[ 30500000 ] 0x6c 0x2b 0x4b 0x48 (l+KH)
[ 31000000 ] 0x3f 0x4e 0x2c 0x49 (?N,I)
[ 31500000 ] 0x6d 0x70 0x68 0x49 (mphI)
[ 32000000 ] 0x40 0x38 0x4a 0x4a (@8JJ)
[ 32500000 ] 0x6e 0x5a 0x2b 0x4b (nZ+K)
[ 33000000 ] 0x41 0x22 0x68 0x4b (A"hK)
[ 33500000 ] 0x6f 0x44 0x49 0x4c (oDIL)
[ 34000000 ] 0x42 0x67 0x2a 0x4d (Bg*M)
[ 34500000 ] 0x70 0x2e 0x67 0x4d (p.gM)
[ 35000000 ] 0x43 0x51 0x48 0x4e (CQHN)
[ 35500000 ] 0x71 0x73 0x29 0x4f (qs)O)
[ 36000000 ] 0x44 0x3b 0x66 0x4f (D;fO)
[ 36500000 ] 0x72 0x5d 0x47 0x50 (r]GP)
[ 37000000 ] 0x45 0x25 0x29 0x51 (E%)Q)
[ 37500000 ] 0x73 0x47 0x65 0x51 (sGeQ)
[ 38000000 ] 0x46 0x6a 0x46 0x52 (FjFR)
[ 38500000 ] 0x74 0x31 0x28 0x53 (t1(S)
[ 39000000 ] 0x47 0x54 0x64 0x53 (GTdS)
[ 39500000 ] 0x75 0x76 0x45 0x54 (uvET)
[ 40000000 ] 0x48 0x3e 0x27 0x55 (H>'U)
[ 40500000 ] 0x76 0x60 0x63 0x55 (v`cU)
[ 41000000 ] 0x49 0x28 0x45 0x56 (I(EV)
[ 41500000 ] 0x77 0x4a 0x26 0x57 (wJ&W)
[ 42000000 ] 0x4a 0x6d 0x62 0x57 (JmbW)
[ 42500000 ] 0x78 0x34 0x44 0x58 (x4DX)
[ 43000000 ] 0x4b 0x57 0x25 0x59 (KW%Y)
[ 43500000 ] 0x79 0x79 0x61 0x59 (yyaY)
[ 44000000 ] 0x4c 0x41 0x43 0x5a (LACZ)
[ 44500000 ] 0x7a 0x63 0x24 0x5b (zc$[)
[ 45000000 ] 0x4d 0x2b 0x61 0x5b (M+a[)
[ 45500000 ] 0x20 0x4e 0x42 0x5c ( NB\)
[ 46000000 ] 0x4e 0x70 0x23 0x5d (Np#])
[ 46500000 ] 0x21 0x38 0x60 0x5d (!8`])
[ 47000000 ] 0x4f 0x5a 0x41 0x5e (OZA^)
[ 47500000 ] 0x22 0x22 0x23 0x5f (""#_)
[ 48000000 ] 0x50 0x44 0x5f 0x5f (PD__)
[ 48500000 ] 0x23 0x67 0x40 0x60 (#g@`)
[ 49000000 ] 0x51 0x2e 0x22 0x61 (Q."a)
[ 49500000 ] 0x24 0x51 0x5e 0x61 ($Q^a)
[ 50000000 ] 0x52 0x73 0x3f 0x62 (Rs?b)
[ 50500000 ] 0x25 0x3b 0x21 0x63 (%;!c)
[ 51000000 ] 0x53 0x5d 0x5d 0x63 (S]]c)
[ 51500000 ] 0x26 0x25 0x3f 0x64 (&%?d)
[ 52000000 ] 0x54 0x47 0x20 0x65 (TG e)
[ 52500000 ] 0x27 0x6a 0x5c 0x65 ('j\e)
[ 53000000 ] 0x55 0x31 0x3e 0x66 (U1>f)
[ 53500000 ] 0x28 0x54 0x7a 0x66 ((Tzf)
[ 54000000 ] 0x56 0x76 0x5b 0x67 (Vv[g)
[ 54500000 ] 0x29 0x3e 0x3d 0x68 ()>=h)
[ 55000000 ] 0x57 0x60 0x79 0x68 (W`yh)
[ 55500000 ] 0x2a 0x28 0x5b 0x69 (*([i)
[ 56000000 ] 0x58 0x4a 0x3c 0x6a (XJ<j)
[ 56500000 ] 0x2b 0x6d 0x78 0x6a (+mxj)
[ 57000000 ] 0x59 0x34 0x5a 0x6b (Y4Zk)
[ 57500000 ] 0x2c 0x57 0x3b 0x6c (,W;l)
[ 58000000 ] 0x5a 0x79 0x77 0x6c (Zywl)
[ 58500000 ] 0x2d 0x41 0x59 0x6d (-AYm)
[ 59000000 ] 0x5b 0x63 0x3a 0x6e ([c:n)
[ 59500000 ] 0x2e 0x2b 0x77 0x6e (.+wn)
[ 60000000 ] 0x5c 0x4d 0x58 0x6f (\MXo)
[ 60500000 ] 0x2f 0x70 0x39 0x70 (/p9p)
[ 61000000 ] 0x5d 0x37 0x76 0x70 (]7vp)
[ 61500000 ] 0x30 0x5a 0x57 0x71 (0ZWq)
[ 62000000 ] 0x5e 0x21 0x39 0x72 (^!9r)
[ 62500000 ] 0x31 0x44 0x75 0x72 (1Dur)
[ 63000000 ] 0x5f 0x66 0x56 0x73 (_fVs)
[ 63239711 ] 0x70 0x61 0x73 0x73 (pass)
MATCH [pass] [29bad1457ee5e49e]==[29bad1457ee5e49e]
Profile
After looking at the code and profiling it, I believe that there are multiple areas which could be parallelized and optimised to reduce the execution time significantly. Most of the execution time takes place in the brute(char const*) and hash_password(unsigned long*, char const*) functions, so I will focus most of my effort and time into those areas when the time comes to optimise them.
Assignment 2
Kernel Attempts
So far I have made attempts to convert different pieces of compute heavy code into kernels in order to obtain the same results as the original code but none have been successful.
Original Code 1:
unsigned long nr=1345345333L;
unsigned long add=7;
unsigned long nr2=0x12345671L;
unsigned long tmp;
int passLen = strlen(password);
for (int i = 0; i < passLen; i++) {
if (*password == ' ' || *password == '\t')
continue;
tmp= (unsigned long) (unsigned char) password[i];
nr^= (((nr & 63)+add)*tmp)+ (nr << 8);
nr2+=(nr2 << 8) ^ nr;
add+=tmp;
}
Kernel Attempt 1(Not working):
__global__ void hash_password_kernel(unsigned long* nr, unsigned long* nr2, unsigned long* add, const char *password) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
unsigned long tmp;
tmp= (unsigned long) (unsigned char) password[idx];
*nr^= (((*nr & 63)+*add)*tmp)+ (*nr << 8);
*nr2+=(*nr2 << 8) ^ *nr;
*add+=tmp;
}
Original Code 2:
for(int i=pos; i<max; i++) {
if(data[i] != max) {
data[i]++;
pos=i;
break;
}
}
Kernel Attempt 2(Not working):
__global__ void testKernel(unsigned char *data, unsigned int max, unsigned int* pos) {
if(data[threadIdx.x] != max) {
data[threadIdx.x]++;
*pos=threadIdx.x;
}
}
Original Code 3:
memset(encrypted_password, 0, 255);
memset((char*)&data, min, 255);
Kernal Attempt 3(Working):
__global__ void initialise(unsigned char* data, char* encrypted_password) {
data[threadIdx.x] = 32;
encrypted_password[threadIdx.x] = 0;
}
-Currently I am trying to get the above kernels to work before handing in the assignment as I feel that having just the initialization kernel would not be nearly sufficient for the purpose of this assignment.
-At the moment I am working on various simplified versions of a prefix sum algorithm that I am hoping will lead me on the right path to completing my assignment. These algorithms have been gathered from various sources such as MIT, NVIDIA, as well as CUDA documentation.
-Below is a sequential prescan used to perform a prescan on an array.
void scan( float* arr1, float* input, int n) {
output[0] = 0; // since this is a prescan, not a scan
for(int i = 1; i < length; ++i) {
arr1[i] = input[i-1] + arr1[i-1];
}
}