Difference between revisions of "DPS915/M-N-M"
(→Nitin Prakash Panicker: File Compression) |
(→Assignment 1) |
||
Line 182: | Line 182: | ||
0.21 48.45 0.10 7095561 14.09 14.09 CLZWCompressFile::putc_comp(int) | 0.21 48.45 0.10 7095561 14.09 14.09 CLZWCompressFile::putc_comp(int) | ||
+ | </pre> | ||
+ | |||
+ | === lzw.cpp === | ||
+ | |||
+ | <pre> | ||
+ | |||
+ | #include <time.h> | ||
+ | |||
+ | #include "lzw.h" | ||
+ | |||
+ | |||
+ | |||
+ | /******************************************************************** | ||
+ | |||
+ | ** | ||
+ | |||
+ | ** This program gets a file name from the command line. It compresses the | ||
+ | |||
+ | ** file, placing its output in a file named test.lzw. It then expands | ||
+ | |||
+ | ** test.lzw into test.out. Test.out should then be an exact duplicate of | ||
+ | |||
+ | ** the input file. | ||
+ | |||
+ | ** | ||
+ | |||
+ | *************************************************************************/ | ||
+ | |||
+ | |||
+ | |||
+ | main(int argc, char *argv[]) | ||
+ | |||
+ | { | ||
+ | |||
+ | clock_t timer; | ||
+ | |||
+ | CLZWCompressFile lzw; | ||
+ | |||
+ | |||
+ | |||
+ | /* | ||
+ | |||
+ | ** Get the file name, open it up, and open up the lzw output file. | ||
+ | |||
+ | */ | ||
+ | |||
+ | if (argc==1) | ||
+ | |||
+ | { | ||
+ | |||
+ | printf("Input file name to compress?\n"); | ||
+ | |||
+ | return 0; | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | printf("testing %s...\n", argv[1]); | ||
+ | |||
+ | /* | ||
+ | |||
+ | ** Compress the file. | ||
+ | |||
+ | */ | ||
+ | |||
+ | timer = clock(); | ||
+ | |||
+ | int crunch = lzw.Compress(argv[1], "test.lzw"); | ||
+ | |||
+ | timer = clock() - timer; //CLOCKS_PER_SEC | ||
+ | |||
+ | printf("compress time=%d ms, encoding=%d, size=%u", timer, lzw.get_bits(), crunch); | ||
+ | |||
+ | int filesize = lzw.u_io; | ||
+ | |||
+ | printf(" (ratio=%d%%)\n", filesize ? (filesize-crunch)*100/filesize : 0); | ||
+ | |||
+ | if(lzw.AnyIOErrors()) | ||
+ | |||
+ | printf("***I/O ERROR***\n"); | ||
+ | |||
+ | |||
+ | |||
+ | /* | ||
+ | |||
+ | ** Expand the file. | ||
+ | |||
+ | */ | ||
+ | |||
+ | timer = clock(); | ||
+ | |||
+ | int orig = lzw.Expand("test.lzw", "test.out"); | ||
+ | |||
+ | timer = clock() - timer; //CLOCKS_PER_SEC | ||
+ | |||
+ | printf("expand time=%d ms, encoding=%d\n", timer, lzw.get_bits()); | ||
+ | |||
+ | if(lzw.AnyIOErrors()) | ||
+ | |||
+ | printf("***I/O ERROR***\n"); | ||
+ | |||
+ | |||
+ | |||
+ | ATLASSERT(filesize == orig); // did we mangle the file? | ||
+ | |||
+ | return 0; | ||
+ | |||
+ | } | ||
+ | </pre> | ||
+ | |||
+ | === lzw.h === | ||
+ | |||
+ | <pre> | ||
+ | |||
+ | #ifndef UPRIGHT_LZW_H | ||
+ | |||
+ | #define UPRIGHT_LZW_H | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | |||
+ | /* LZW.h by N.A.Bozinis @ 19/01/2010 08:55:52 | ||
+ | |||
+ | * ---------------------------------------------------------------------------------- | ||
+ | |||
+ | * | ||
+ | |||
+ | * Plain C++ port of LZW compression algorithm and code originally (c) Mark R. Nelson | ||
+ | |||
+ | * http://marknelson.us/1989/10/01/lzw-data-compression | ||
+ | |||
+ | * Variable bit length encoding idea and code originally by Michael Dipperstein | ||
+ | |||
+ | * http://michael.dipperstein.com/lzw | ||
+ | |||
+ | * | ||
+ | |||
+ | * There are a lot of compression classes floating around but most are based on the | ||
+ | |||
+ | * zlib (zip/unzip) library, which is good but a bit of overkill for simple and small | ||
+ | |||
+ | * code. LZW combines decent compression ratios with very small code footprint. If | ||
+ | |||
+ | * you need something more powerful here are a few resources: | ||
+ | |||
+ | * | ||
+ | |||
+ | * http://www.codeproject.com/KB/files/zip_utils.aspx | ||
+ | |||
+ | * http://www.codeproject.com/KB/cpp/xzipunzip.aspx | ||
+ | |||
+ | * http://www.codeproject.com/KB/cpp/ChauMemzip.aspx | ||
+ | |||
+ | * | ||
+ | |||
+ | * Microsoft types can check the CAB protocol that is available in all windows: | ||
+ | |||
+ | * http://www.codeproject.com/KB/files/CABCompressExtract.aspx | ||
+ | |||
+ | * http://msdn.microsoft.com/en-us/library/bb417343.aspx | ||
+ | |||
+ | * | ||
+ | |||
+ | */ | ||
+ | |||
+ | |||
+ | |||
+ | #include <stdio.h> | ||
+ | |||
+ | #include <stdlib.h> | ||
+ | |||
+ | #include <limits.h> | ||
+ | |||
+ | #include <string.h> | ||
+ | |||
+ | #include <assert.h> | ||
+ | |||
+ | #define ATLASSERT assert | ||
+ | |||
+ | |||
+ | |||
+ | /* NOTE: function and variable names left as much as possible matching the original | ||
+ | |||
+ | LZW.c by Mark, naturally bundled in classes to get rid of static/globals etc | ||
+ | |||
+ | */ | ||
+ | |||
+ | |||
+ | |||
+ | #define MIN_CODE_LEN 9 /* min # bits in a code word */ | ||
+ | |||
+ | #define MAX_CODE_LEN 20 /* max # bits in a code word */ | ||
+ | |||
+ | #define CURRENT_MAX_CODES(x) (1UL << (x)) | ||
+ | |||
+ | |||
+ | |||
+ | #define FIRST_CODE (1 << CHAR_BIT) /* value of 1st string code */ | ||
+ | |||
+ | |||
+ | |||
+ | #if (MIN_CODE_LEN <= CHAR_BIT) | ||
+ | |||
+ | #error Code words must be larger than 1 character | ||
+ | |||
+ | #endif | ||
+ | |||
+ | |||
+ | |||
+ | #if (MAX_CODE_LEN >= 25) | ||
+ | |||
+ | #error Code words must fit in an integer | ||
+ | |||
+ | #endif | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | |||
+ | /* VARIABLE BIT LENGTH ENCODING | ||
+ | |||
+ | * Instead of using a fixed number of bits for code words, we start at 9 (=MIN_CODE_LEN) | ||
+ | |||
+ | * and go up to BITS (<=MAX_CODE_LEN) so that small files are tightly packed and larger | ||
+ | |||
+ | * files are fine too. The BITS constant determines the maximum hash table size. For 18 | ||
+ | |||
+ | * this means 250KB runtime table size which is enough for files ~4MB. | ||
+ | |||
+ | * There is no problem for files larger than that; if we run out of table space for new | ||
+ | |||
+ | * codes then the same codes are emitted (uncompressed obviously) | ||
+ | |||
+ | */ | ||
+ | |||
+ | |||
+ | |||
+ | #define BITS 17 /* Setting the number of bits to 12, 13*/ | ||
+ | |||
+ | #define HASHING_SHIFT (BITS-8) /* or 14 affects several constants. */ | ||
+ | |||
+ | #define MAX_VALUE (1 << BITS) - 1 /* Note that MS-DOS machines need to */ | ||
+ | |||
+ | #define MAX_CODE MAX_VALUE - 1 /* compile their code in large model if*/ | ||
+ | |||
+ | /* 14 bits are selected. */ | ||
+ | |||
+ | |||
+ | |||
+ | #if BITS == 20 | ||
+ | |||
+ | #define TABLE_SIZE 1048583 | ||
+ | |||
+ | #elif BITS == 19 | ||
+ | |||
+ | #define TABLE_SIZE 524309 | ||
+ | |||
+ | #elif BITS == 18 | ||
+ | |||
+ | #define TABLE_SIZE 262147 | ||
+ | |||
+ | #elif BITS == 17 | ||
+ | |||
+ | #define TABLE_SIZE 131101 | ||
+ | |||
+ | #elif BITS == 16 | ||
+ | |||
+ | #define TABLE_SIZE 65543 | ||
+ | |||
+ | #elif BITS == 15 | ||
+ | |||
+ | #define TABLE_SIZE 32797 | ||
+ | |||
+ | #elif BITS == 14 | ||
+ | |||
+ | #define TABLE_SIZE 18041 /* The string table size needs to be a */ | ||
+ | |||
+ | /* prime number that is somewhat larger*/ | ||
+ | |||
+ | #elif BITS == 13 /* than 2**BITS. */ | ||
+ | |||
+ | #define TABLE_SIZE 9029 | ||
+ | |||
+ | #elif BITS == 12 | ||
+ | |||
+ | #define TABLE_SIZE 5021 | ||
+ | |||
+ | #else | ||
+ | |||
+ | #error define smaller or bigger table sizes | ||
+ | |||
+ | #endif | ||
+ | |||
+ | |||
+ | |||
+ | #if (TABLE_SIZE <= MAX_VALUE) | ||
+ | |||
+ | #error your prime numbers need attention | ||
+ | |||
+ | #endif | ||
+ | |||
+ | |||
+ | |||
+ | #if (BITS > MAX_CODE_LEN) | ||
+ | |||
+ | #error BITS can only go up to a maximum | ||
+ | |||
+ | #endif | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | |||
+ | /* | ||
+ | |||
+ | This class does most of the job, except reading source and writing the compressed data | ||
+ | |||
+ | A derived class does that so that there's flexibility to read either from files or memory | ||
+ | |||
+ | */ | ||
+ | |||
+ | |||
+ | |||
+ | class CLZWImpl { | ||
+ | |||
+ | protected: | ||
+ | |||
+ | int *code_value; /* This is the code value array */ | ||
+ | |||
+ | unsigned int *prefix_code; /* This array holds the prefix codes */ | ||
+ | |||
+ | unsigned char *append_character; /* This array holds the appended chars */ | ||
+ | |||
+ | unsigned char decode_stack[4000]; /* This array holds the decoded string */ | ||
+ | |||
+ | unsigned char CUR_BITS; /* ~nab: added for variable bit size */ | ||
+ | |||
+ | /* we are processing bits but in the end of the day we do I/O in bytes */ | ||
+ | |||
+ | int input_bit_count, output_bit_count; | ||
+ | |||
+ | unsigned long input_bit_buffer, output_bit_buffer; | ||
+ | |||
+ | |||
+ | |||
+ | public: | ||
+ | |||
+ | CLZWImpl() { | ||
+ | |||
+ | code_value = 0; | ||
+ | |||
+ | prefix_code = 0; | ||
+ | |||
+ | append_character = 0; | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | ~CLZWImpl() { | ||
+ | |||
+ | if(code_value) | ||
+ | |||
+ | free(code_value); | ||
+ | |||
+ | if(prefix_code) | ||
+ | |||
+ | free(prefix_code); | ||
+ | |||
+ | if(append_character) | ||
+ | |||
+ | free(append_character); | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | int get_bits() { return CUR_BITS; } | ||
+ | |||
+ | |||
+ | |||
+ | protected: | ||
+ | |||
+ | int Init() { | ||
+ | |||
+ | ATLASSERT(!code_value); /* call just once */ | ||
+ | |||
+ | |||
+ | |||
+ | code_value=(int*)malloc(TABLE_SIZE*sizeof(int)); | ||
+ | |||
+ | prefix_code=(unsigned int*)malloc(TABLE_SIZE*sizeof(unsigned int)); | ||
+ | |||
+ | append_character=(unsigned char*)malloc(TABLE_SIZE*sizeof(unsigned char)); | ||
+ | |||
+ | |||
+ | |||
+ | return code_value != 0 && prefix_code != 0 && append_character != 0; | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | /* override these 4: read a byte from source */ | ||
+ | |||
+ | virtual int getc_src() = 0; | ||
+ | |||
+ | /* read a byte from compressed source (during expansion) and write to compressed output */ | ||
+ | |||
+ | virtual int getc_comp() = 0; | ||
+ | |||
+ | /* write a byte to compressed output */ | ||
+ | |||
+ | virtual int putc_comp(int ch) = 0; | ||
+ | |||
+ | /* write a byte to expanded output */ | ||
+ | |||
+ | virtual int putc_out(int ch) = 0; | ||
+ | |||
+ | |||
+ | |||
+ | /* | ||
+ | |||
+ | ** This is the compression routine. The code should be a fairly close | ||
+ | |||
+ | ** match to the algorithm accompanying the article. | ||
+ | |||
+ | ** | ||
+ | |||
+ | */ | ||
+ | |||
+ | |||
+ | |||
+ | void compress() | ||
+ | |||
+ | { | ||
+ | |||
+ | unsigned int next_code; | ||
+ | |||
+ | unsigned int character; | ||
+ | |||
+ | unsigned int string_code; | ||
+ | |||
+ | unsigned int index; | ||
+ | |||
+ | unsigned int bit_limit; | ||
+ | |||
+ | int i; | ||
+ | |||
+ | |||
+ | |||
+ | ATLASSERT(code_value); /* initialized? */ | ||
+ | |||
+ | |||
+ | |||
+ | CUR_BITS = MIN_CODE_LEN; | ||
+ | |||
+ | bit_limit = CURRENT_MAX_CODES(CUR_BITS) - 1; | ||
+ | |||
+ | output_bit_count=0; | ||
+ | |||
+ | output_bit_buffer=0L; | ||
+ | |||
+ | |||
+ | |||
+ | ATLASSERT(256==FIRST_CODE); | ||
+ | |||
+ | next_code=FIRST_CODE; /* Next code is the next available string code*/ | ||
+ | |||
+ | for (i=0;i<TABLE_SIZE;i++) /* Clear out the string table before starting */ | ||
+ | |||
+ | code_value[i]=-1; | ||
+ | |||
+ | |||
+ | |||
+ | string_code=getc_src(); /* Get the first code */ | ||
+ | |||
+ | if(-1 == string_code) | ||
+ | |||
+ | return; /* empty file or error */ | ||
+ | |||
+ | |||
+ | |||
+ | /* | ||
+ | |||
+ | ** This is the main loop where it all happens. This loop runs util all of | ||
+ | |||
+ | ** the input has been exhausted. Note that it stops adding codes to the | ||
+ | |||
+ | ** table after all of the possible codes have been defined. | ||
+ | |||
+ | */ | ||
+ | |||
+ | while ((character=getc_src()) != -1) | ||
+ | |||
+ | { | ||
+ | |||
+ | index=find_match(string_code,character);/* See if the string is in */ | ||
+ | |||
+ | if (code_value[index] != -1) /* the table. If it is, */ | ||
+ | |||
+ | string_code=code_value[index]; /* get the code value. If */ | ||
+ | |||
+ | else /* the string is not in the*/ | ||
+ | |||
+ | { /* table, try to add it. */ | ||
+ | |||
+ | if (next_code <= MAX_CODE) | ||
+ | |||
+ | { | ||
+ | |||
+ | code_value[index]=next_code++; | ||
+ | |||
+ | prefix_code[index]=string_code; | ||
+ | |||
+ | append_character[index]=character; | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | /* are we using enough bits to write out this code word? */ | ||
+ | |||
+ | if(string_code >= bit_limit && CUR_BITS < BITS) | ||
+ | |||
+ | { | ||
+ | |||
+ | /* mark need for bigger code word with all ones */ | ||
+ | |||
+ | output_code(bit_limit); | ||
+ | |||
+ | CUR_BITS++; | ||
+ | |||
+ | bit_limit = (CURRENT_MAX_CODES(CUR_BITS) - 1); | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | ATLASSERT(string_code < bit_limit); | ||
+ | |||
+ | |||
+ | |||
+ | output_code(string_code); /* When a string is found */ | ||
+ | |||
+ | string_code=character; /* that is not in the table*/ | ||
+ | |||
+ | } /* I output the last string*/ | ||
+ | |||
+ | } /* after adding the new one*/ | ||
+ | |||
+ | /* | ||
+ | |||
+ | ** End of the main loop. | ||
+ | |||
+ | */ | ||
+ | |||
+ | |||
+ | |||
+ | output_code(string_code); /* Output the last code */ | ||
+ | |||
+ | output_code(-1); /* This code flushes the output buffer*/ | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | /* | ||
+ | |||
+ | ** This is the hashing routine. It tries to find a match for the prefix+char | ||
+ | |||
+ | ** string in the string table. If it finds it, the index is returned. If | ||
+ | |||
+ | ** the string is not found, the first available index in the string table is | ||
+ | |||
+ | ** returned instead. | ||
+ | |||
+ | */ | ||
+ | |||
+ | int find_match(unsigned int hash_prefix,unsigned int hash_character) | ||
+ | |||
+ | { | ||
+ | |||
+ | int index; | ||
+ | |||
+ | int offset; | ||
+ | |||
+ | |||
+ | |||
+ | index = (hash_character << HASHING_SHIFT) ^ hash_prefix; | ||
+ | |||
+ | if (index == 0) | ||
+ | |||
+ | offset = 1; | ||
+ | |||
+ | else | ||
+ | |||
+ | offset = TABLE_SIZE - index; | ||
+ | |||
+ | while (1) | ||
+ | |||
+ | { | ||
+ | |||
+ | if (code_value[index] == -1) | ||
+ | |||
+ | return(index); | ||
+ | |||
+ | if (prefix_code[index] == hash_prefix && | ||
+ | |||
+ | append_character[index] == hash_character) | ||
+ | |||
+ | return(index); | ||
+ | |||
+ | index -= offset; | ||
+ | |||
+ | if (index < 0) | ||
+ | |||
+ | index += TABLE_SIZE; | ||
+ | |||
+ | } | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | /* | ||
+ | |||
+ | ** This is the expansion routine. It takes an LZW format file, and expands | ||
+ | |||
+ | ** it to an output file. The code here should be a fairly close match to | ||
+ | |||
+ | ** the algorithm in the accompanying article. | ||
+ | |||
+ | */ | ||
+ | |||
+ | |||
+ | |||
+ | void expand() | ||
+ | |||
+ | { | ||
+ | |||
+ | unsigned int next_code; | ||
+ | |||
+ | unsigned int new_code; | ||
+ | |||
+ | unsigned int old_code; | ||
+ | |||
+ | int character; | ||
+ | |||
+ | unsigned char *string; | ||
+ | |||
+ | unsigned int bit_limit; | ||
+ | |||
+ | |||
+ | |||
+ | ATLASSERT(code_value); /* initialized? */ | ||
+ | |||
+ | |||
+ | |||
+ | CUR_BITS = MIN_CODE_LEN; | ||
+ | |||
+ | bit_limit = CURRENT_MAX_CODES(CUR_BITS) - 1; | ||
+ | |||
+ | input_bit_count=0; | ||
+ | |||
+ | input_bit_buffer=0L; | ||
+ | |||
+ | |||
+ | |||
+ | // @@@ what if we pass uncompressed file to decode? | ||
+ | |||
+ | |||
+ | |||
+ | next_code=FIRST_CODE; /* This is the next available code to define */ | ||
+ | |||
+ | |||
+ | |||
+ | old_code=input_code(); /* Read in the first code, initialize the */ | ||
+ | |||
+ | if(-1 == old_code) | ||
+ | |||
+ | return; /* read error? */ | ||
+ | |||
+ | character=old_code; /* character variable, and send the first */ | ||
+ | |||
+ | if(putc_out(old_code)==-1) /* code to the output file */ | ||
+ | |||
+ | return; /* write error */ | ||
+ | |||
+ | /* | ||
+ | |||
+ | ** This is the main expansion loop. It reads in characters from the LZW file | ||
+ | |||
+ | ** until it sees the special code used to inidicate the end of the data. | ||
+ | |||
+ | */ | ||
+ | |||
+ | while ((new_code=input_code()) != (-1)) | ||
+ | |||
+ | { | ||
+ | |||
+ | /* look for code length increase marker */ | ||
+ | |||
+ | if(bit_limit == new_code && CUR_BITS < BITS) | ||
+ | |||
+ | { | ||
+ | |||
+ | CUR_BITS++; | ||
+ | |||
+ | bit_limit = CURRENT_MAX_CODES(CUR_BITS) - 1; | ||
+ | |||
+ | |||
+ | |||
+ | new_code=input_code(); | ||
+ | |||
+ | ATLASSERT(new_code != -1); /* must be read error? */ | ||
+ | |||
+ | if(new_code == -1) | ||
+ | |||
+ | break; | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | ATLASSERT(new_code < bit_limit); | ||
+ | |||
+ | |||
+ | |||
+ | /* | ||
+ | |||
+ | ** This code checks for the special STRING+CHARACTER+STRING+CHARACTER+STRING | ||
+ | |||
+ | ** case which generates an undefined code. It handles it by decoding | ||
+ | |||
+ | ** the last code, and adding a single character to the end of the decode string. | ||
+ | |||
+ | */ | ||
+ | |||
+ | if (new_code>=next_code) | ||
+ | |||
+ | { | ||
+ | |||
+ | *decode_stack=character; | ||
+ | |||
+ | string=decode_string(decode_stack+1,old_code); | ||
+ | |||
+ | } | ||
+ | |||
+ | /* | ||
+ | |||
+ | ** Otherwise we do a straight decode of the new code. | ||
+ | |||
+ | */ | ||
+ | |||
+ | else | ||
+ | |||
+ | string=decode_string(decode_stack,new_code); | ||
+ | |||
+ | /* | ||
+ | |||
+ | ** Now we output the decoded string in reverse order. | ||
+ | |||
+ | */ | ||
+ | |||
+ | character=*string; | ||
+ | |||
+ | while (string >= decode_stack) | ||
+ | |||
+ | putc_out(*string--); | ||
+ | |||
+ | /* | ||
+ | |||
+ | ** Finally, if possible, add a new code to the string table. | ||
+ | |||
+ | */ | ||
+ | |||
+ | if (next_code <= MAX_CODE) | ||
+ | |||
+ | { | ||
+ | |||
+ | prefix_code[next_code]=old_code; | ||
+ | |||
+ | append_character[next_code]=character; | ||
+ | |||
+ | next_code++; | ||
+ | |||
+ | } | ||
+ | |||
+ | old_code=new_code; | ||
+ | |||
+ | } | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | /* | ||
+ | |||
+ | ** This routine simply decodes a string from the string table, storing | ||
+ | |||
+ | ** it in a buffer. The buffer can then be output in reverse order by | ||
+ | |||
+ | ** the expansion program. | ||
+ | |||
+ | */ | ||
+ | |||
+ | /* ~nab: these char* aren't a risk for unicode; we are reading bytes */ | ||
+ | |||
+ | unsigned char *decode_string(unsigned char *buffer,unsigned int code) | ||
+ | |||
+ | { | ||
+ | |||
+ | int i; | ||
+ | |||
+ | |||
+ | |||
+ | i=0; | ||
+ | |||
+ | while (code >= FIRST_CODE) | ||
+ | |||
+ | { | ||
+ | |||
+ | *buffer++ = append_character[code]; | ||
+ | |||
+ | code=prefix_code[code]; | ||
+ | |||
+ | i++; | ||
+ | |||
+ | ATLASSERT(i < sizeof(decode_stack)); /* buffer overrun if it blows, increase stack size! */ | ||
+ | |||
+ | } | ||
+ | |||
+ | *buffer=code; | ||
+ | |||
+ | return(buffer); | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | /* | ||
+ | |||
+ | ** The following two routines are used to output variable length | ||
+ | |||
+ | ** codes. They are written strictly for clarity, and are not | ||
+ | |||
+ | ** particularyl efficient. | ||
+ | |||
+ | |||
+ | |||
+ | ~nab: there's room for improvement in these I/O functions eg work in DWORDS instead of bytes | ||
+ | |||
+ | */ | ||
+ | |||
+ | |||
+ | |||
+ | unsigned int input_code() | ||
+ | |||
+ | { | ||
+ | |||
+ | int c; | ||
+ | |||
+ | unsigned int return_value; | ||
+ | |||
+ | //static int input_bit_count=0; | ||
+ | |||
+ | //static unsigned long input_bit_buffer=0L; | ||
+ | |||
+ | |||
+ | |||
+ | while (input_bit_count <= 24) | ||
+ | |||
+ | { | ||
+ | |||
+ | if ((c = getc_comp()) == -1) | ||
+ | |||
+ | break; | ||
+ | |||
+ | |||
+ | |||
+ | input_bit_buffer |= | ||
+ | |||
+ | (unsigned long) c << (24-input_bit_count); | ||
+ | |||
+ | input_bit_count += 8; | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | if(input_bit_count < CUR_BITS) { | ||
+ | |||
+ | ATLASSERT(!input_bit_buffer); | ||
+ | |||
+ | return -1; /* EOF */ | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | return_value=input_bit_buffer >> (32-CUR_BITS); | ||
+ | |||
+ | input_bit_buffer <<= CUR_BITS; | ||
+ | |||
+ | input_bit_count -= CUR_BITS; | ||
+ | |||
+ | |||
+ | |||
+ | ATLASSERT(return_value < (1UL << CUR_BITS)); | ||
+ | |||
+ | return(return_value); | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | /* bits are written outside normal byte boundaries, hence the need for keeping old values */ | ||
+ | |||
+ | void output_code(unsigned int code) | ||
+ | |||
+ | { | ||
+ | |||
+ | //static int output_bit_count=0; | ||
+ | |||
+ | //static unsigned long output_bit_buffer=0L; | ||
+ | |||
+ | |||
+ | |||
+ | ATLASSERT(output_bit_count < 8); /* leftovers */ | ||
+ | |||
+ | ATLASSERT(CUR_BITS + output_bit_count <= 32); | ||
+ | |||
+ | /*codes <256 are possible for single characters, zero bytes etc*/ | ||
+ | |||
+ | |||
+ | |||
+ | if(-1 == code) { | ||
+ | |||
+ | /* pad remaining zeros and flush the last byte */ | ||
+ | |||
+ | if(output_bit_count) { | ||
+ | |||
+ | output_bit_buffer >>= 24; | ||
+ | |||
+ | ATLASSERT((output_bit_buffer & 0xFF) == output_bit_buffer); | ||
+ | |||
+ | putc_comp(output_bit_buffer); | ||
+ | |||
+ | |||
+ | |||
+ | output_bit_count = 0; | ||
+ | |||
+ | output_bit_buffer = 0; /* in case some eejit calls us again */ | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | return; | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | ATLASSERT(code < (1UL << CUR_BITS)); | ||
+ | |||
+ | |||
+ | |||
+ | /* sends new bytes near the top (MSB) */ | ||
+ | |||
+ | output_bit_buffer |= (unsigned long) code << (32-CUR_BITS-output_bit_count); | ||
+ | |||
+ | output_bit_count += CUR_BITS; | ||
+ | |||
+ | while (output_bit_count >= 8) | ||
+ | |||
+ | { | ||
+ | |||
+ | /* no check for error but if there was a problem we'd know from the time we wrote the identifier */ | ||
+ | |||
+ | putc_comp(output_bit_buffer >> 24); | ||
+ | |||
+ | output_bit_buffer <<= 8; | ||
+ | |||
+ | output_bit_count -= 8; | ||
+ | |||
+ | } | ||
+ | |||
+ | } | ||
+ | |||
+ | }; /* CLZWImpl */ | ||
+ | |||
+ | |||
+ | |||
+ | /* example derived class using C buffered I/O functions */ | ||
+ | |||
+ | class CLZWCompressFile : public CLZWImpl { | ||
+ | |||
+ | public: | ||
+ | |||
+ | CLZWCompressFile() { | ||
+ | |||
+ | io_file = 0; | ||
+ | |||
+ | lzw_file = 0; | ||
+ | |||
+ | }; | ||
+ | |||
+ | |||
+ | |||
+ | ~CLZWCompressFile() { | ||
+ | |||
+ | ATLASSERT(!io_file); | ||
+ | |||
+ | ATLASSERT(!lzw_file); | ||
+ | |||
+ | }; | ||
+ | |||
+ | |||
+ | |||
+ | int AnyIOErrors() {return io_error; } | ||
+ | |||
+ | |||
+ | |||
+ | // @@@ these char* should be changed for unicode builds | ||
+ | |||
+ | unsigned int Compress(char* input_file_name, char* to_name) | ||
+ | |||
+ | { | ||
+ | |||
+ | ATLASSERT(input_file_name && *input_file_name); | ||
+ | |||
+ | ATLASSERT(to_name && *to_name); | ||
+ | |||
+ | ATLASSERT(strcmp(to_name, input_file_name)); | ||
+ | |||
+ | |||
+ | |||
+ | io_error = 1; | ||
+ | |||
+ | |||
+ | |||
+ | if(!code_value) | ||
+ | |||
+ | if(!Init()) | ||
+ | |||
+ | return 0; /* rare memory error */ | ||
+ | |||
+ | |||
+ | |||
+ | u_comp = 0; | ||
+ | |||
+ | u_io = 0; | ||
+ | |||
+ | io_file=fopen(input_file_name,"rb"); | ||
+ | |||
+ | if(io_file) { | ||
+ | |||
+ | lzw_file=fopen(to_name,"wb"); | ||
+ | |||
+ | if(lzw_file) { | ||
+ | |||
+ | /* write LZW identifier L+starting bytes */ | ||
+ | |||
+ | putc('L', lzw_file); | ||
+ | |||
+ | if(putc(MIN_CODE_LEN, lzw_file) == MIN_CODE_LEN) { | ||
+ | |||
+ | compress(); | ||
+ | |||
+ | io_error = ferror(lzw_file) || ferror(io_file); | ||
+ | |||
+ | if(!io_error) | ||
+ | |||
+ | ATLASSERT(u_comp <= u_io); /* this is bound to bomb every now and then, no compression! */ | ||
+ | |||
+ | } | ||
+ | |||
+ | fclose(lzw_file); | ||
+ | |||
+ | lzw_file = 0; | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | fclose(io_file); | ||
+ | |||
+ | io_file = 0; | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | return u_comp; | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | unsigned int Expand(char* lzw_name, char* to_name) | ||
+ | |||
+ | { | ||
+ | |||
+ | ATLASSERT(lzw_name && *lzw_name); | ||
+ | |||
+ | ATLASSERT(to_name && *to_name); | ||
+ | |||
+ | ATLASSERT(strcmp(to_name, lzw_name)); | ||
+ | |||
+ | |||
+ | |||
+ | io_error = 1; | ||
+ | |||
+ | |||
+ | |||
+ | if(!code_value) | ||
+ | |||
+ | if(!Init()) | ||
+ | |||
+ | return 0; /* rare memory error */ | ||
+ | |||
+ | |||
+ | |||
+ | u_comp = 0; | ||
+ | |||
+ | u_io = 0; | ||
+ | |||
+ | lzw_file=fopen(lzw_name,"rb"); | ||
+ | |||
+ | if(lzw_file) { | ||
+ | |||
+ | /* check LZW identifier L+starting bytes */ | ||
+ | |||
+ | int ch1 = getc(lzw_file); | ||
+ | |||
+ | int ch2 = getc(lzw_file); | ||
+ | |||
+ | if('L' == ch1 && MIN_CODE_LEN==ch2) { | ||
+ | |||
+ | io_file=fopen(to_name,"wb"); | ||
+ | |||
+ | if(io_file) { | ||
+ | |||
+ | expand(); | ||
+ | |||
+ | io_error = ferror(lzw_file) || ferror(io_file); | ||
+ | |||
+ | |||
+ | |||
+ | fclose(io_file); | ||
+ | |||
+ | io_file = 0; | ||
+ | |||
+ | } | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | fclose(lzw_file); | ||
+ | |||
+ | lzw_file = 0; | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | return u_io; | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | protected: | ||
+ | |||
+ | /* -1 return indicates either EOF or some IO error */ | ||
+ | |||
+ | virtual int getc_src() { | ||
+ | |||
+ | ATLASSERT(io_file); | ||
+ | |||
+ | int ch = getc(io_file); | ||
+ | |||
+ | if(EOF == ch) | ||
+ | |||
+ | return -1; | ||
+ | |||
+ | |||
+ | |||
+ | u_io++; | ||
+ | |||
+ | return ch; | ||
+ | |||
+ | } | ||
+ | |||
+ | virtual int getc_comp() { | ||
+ | |||
+ | ATLASSERT(lzw_file); | ||
+ | |||
+ | int ch = getc(lzw_file); | ||
+ | |||
+ | if(EOF == ch) | ||
+ | |||
+ | return -1; | ||
+ | |||
+ | |||
+ | |||
+ | u_comp++; | ||
+ | |||
+ | return ch; | ||
+ | |||
+ | } | ||
+ | |||
+ | virtual int putc_comp(int ch) { | ||
+ | |||
+ | ATLASSERT(lzw_file); | ||
+ | |||
+ | ATLASSERT(ch >= 0 && ch < 256); | ||
+ | |||
+ | int ret = putc(ch, lzw_file); | ||
+ | |||
+ | |||
+ | |||
+ | if(ret != EOF) { | ||
+ | |||
+ | ATLASSERT(ret == ch); | ||
+ | |||
+ | u_comp++; | ||
+ | |||
+ | } | ||
+ | |||
+ | else | ||
+ | |||
+ | ret = -1; | ||
+ | |||
+ | |||
+ | |||
+ | return ret; | ||
+ | |||
+ | } | ||
+ | |||
+ | virtual int putc_out(int ch) { | ||
+ | |||
+ | ATLASSERT(io_file); | ||
+ | |||
+ | ATLASSERT(ch >= 0 && ch < 256); | ||
+ | |||
+ | int ret = putc(ch, io_file); | ||
+ | |||
+ | |||
+ | |||
+ | if(ret != EOF) | ||
+ | |||
+ | u_io++; | ||
+ | |||
+ | else | ||
+ | |||
+ | ret = -1; | ||
+ | |||
+ | |||
+ | |||
+ | return ret; | ||
+ | |||
+ | } | ||
+ | |||
+ | |||
+ | |||
+ | FILE* io_file; | ||
+ | |||
+ | FILE *lzw_file; | ||
+ | |||
+ | int io_error; | ||
+ | |||
+ | public: | ||
+ | |||
+ | unsigned long u_io, u_comp; /* bytes read and written */ | ||
+ | |||
+ | }; | ||
+ | |||
+ | |||
+ | |||
+ | // @@@ could have a generic one on IStream, CreateStreamOnHGlobal/SHCreateStreamOnFile | ||
+ | |||
+ | |||
+ | |||
+ | #endif /* UPRIGHT_LZW_H */ | ||
</pre> | </pre> | ||
---- | ---- |
Revision as of 15:18, 20 February 2013
GPU610/DPS915 | Student List | Group and Project Index | Student Resources | Glossary
Team: M-N-M
Team Members
Potential Projects
Project Name | Project Description | Status |
---|---|---|
C Compiler | Take the compilation process and transfer it to the GPU | failed |
Galactic Collision Simulation | A 2D simulation of two galaxies colliding, with fully simulated gravity effects. | cannot benefit from parallel computing,already fast enough |
Isomorphism of graphs | Check if two graphs are isomorphic in nature. This is a very straight forward program. | failed |
Image Processing | Mathematical operations applied to images for colour negation, rotation, blurring effects, etc. | scrapped |
Facial Recognition system | Speed up time taken to match face with database | already CUDA enabled |
Steganography | Speed up the process of encrypting one type of file into another | Profiled |
Prime number generator | Generate prime numbers | Profiled |
LZW File compression | Compress files using Lempel-Ziv-Welch algorithm | Profiled |
Progress
Assignment 1
Mohamed Baig: Steganography using Steghide
- Steghide has certain dependencies that it uses to complete its function.
- Dependencies:
- libmash
- libcrypt
- libjpeg
- zlib
- Ran the Configure file to see if I have all the Dependencies
- Installed the all the dependencies
- Ran Configure again to generate Makefile in the src folder
- Altered the Makefile to enable profiling
- Altered some source files to avoid errors
AuSampleValues.cc
#include "AuSampleValues.h" // AuMuLawSampleValue template <> //My change const BYTE AuMuLawSampleValue::MinValue = 0 ; template <> //My change const BYTE AuMuLawSampleValue::MaxValue = BYTE_MAX ; // AuPCM8SampleValue template <> //My change const SBYTE AuPCM8SampleValue::MinValue = SBYTE_MIN ; template <> //My change const SBYTE AuPCM8SampleValue::MaxValue = SBYTE_MAX ; // AuPCM16SampleValue template <> //My change const SWORD16 AuPCM16SampleValue::MinValue = SWORD16_MIN ; template <> //My change const SWORD16 AuPCM16SampleValue::MaxValue = SWORD16_MAX ; // AuPCM32SampleValue template <> //My change const SWORD32 AuPCM32SampleValue::MinValue = SWORD32_MIN ; template <> //My change const SWORD32 AuPCM32SampleValue::MaxValue = SWORD32_MAX ;
AuData.h
#ifndef SH_AUDATA_H #define SH_AUDATA_H #include "BinaryIO.h" #include "AudioData.h" // AuMuLawAudioData typedef AudioDataImpl<AuMuLaw,BYTE> AuMuLawAudioData ; template <> //My change inline BYTE AuMuLawAudioData::readValue (BinaryIO* io) const { return (io->read8()) ; } template <> //My change inline void AuMuLawAudioData::writeValue (BinaryIO* io, BYTE v) const { io->write8(v) ; } // AuPCM8AudioData typedef AudioDataImpl<AuPCM8,SBYTE> AuPCM8AudioData ; template <> //My change inline SBYTE AuPCM8AudioData::readValue (BinaryIO* io) const { return ((SBYTE) io->read8()) ; } template <> //My change inline void AuPCM8AudioData::writeValue (BinaryIO* io, SBYTE v) const { io->write8((BYTE) v) ; } // AuPCM16AudioData typedef AudioDataImpl<AuPCM16,SWORD16> AuPCM16AudioData ; template <> //My change inline SWORD16 AuPCM16AudioData::readValue (BinaryIO* io) const { return ((SWORD16) io->read16_be()) ; } template <> //My change inline void AuPCM16AudioData::writeValue (BinaryIO* io, SWORD16 v) const { io->write16_be((UWORD16) v) ; } // AuPCM32AudioData typedef AudioDataImpl<AuPCM32,SWORD32> AuPCM32AudioData ; template <> //My change inline SWORD32 AuPCM32AudioData::readValue (BinaryIO* io) const { return ((SWORD32) io->read32_be()) ; } template <> //My change inline void AuPCM32AudioData::writeValue (BinaryIO* io, SWORD32 v) const { io->write32_be((UWORD32) v) ; } #endif // ndef SH_AUDATA_H
- The result of embedding kilobytes of text data into an image
Flat profile: Each sample counts as 0.01 seconds. % cumulative self self total time seconds seconds calls s/call s/call name 13.38 0.21 0.21 1318276 0.00 0.00 Selector::idxX(unsigned int, unsigned int, unsigned int*) const 11.47 0.39 0.18 4054789 0.00 0.00 Vertex::getDegree() const 9.55 0.54 0.15 659139 0.00 0.00 Selector::calculate(unsigned int) 7.64 0.66 0.12 659141 0.00 0.00 JpegFile::getEmbeddedValue(unsigned int) const 7.01 0.77 0.11 659139 0.00 0.00 __gnu_cxx::hashtable<std::pair<unsigned int const, unsigned int>, unsigned int, __gnu_cxx::hash<unsigned int>, std::_Select1st<std::pair<unsigned int const, unsigned int> >, std::equal_to<unsigned int>, std::allocator<unsigned int> >::resize(unsigned long) 6.37 0.87 0.10 328293 0.00 0.00 JpegFile::getSampleValue(unsigned int) const 5.73 0.96 0.09 1 0.09 0.09 Selector::~Selector() 3.82 1.02 0.06 1 0.06 0.09 JpegFile::read(BinaryIO*)
- The result of attempting to embed 1.7 GB of data into an image
Flat profile: Each sample counts as 0.01 seconds. % cumulative self self total time seconds seconds calls s/call s/call name 54.43 21.52 21.52 1192388169 0.00 0.00 BitString::_append(bool) 16.79 28.15 6.64 593200712 0.00 0.00 BitString::operator[](unsigned long) const 9.30 31.83 3.68 74898412 0.00 0.00 BitString::append(unsigned char, unsigned short) 4.78 33.72 1.89 3 0.63 6.41 BitString::append(BitString const&) 3.50 35.10 1.39 BitString::BitString(unsigned long) 1.91 35.86 0.76 BitString::clear() 1.29 36.37 0.51 1 0.51 37.26 Embedder::Embedder() 1.20 36.84 0.48 37823336 0.00 0.00 BinaryIO::eof() const
Assignment 1
Muhammad Ahsan: Prime Number Generator( 1,000,000,000 primes)
Flat profile Each sample counts as 0.01 seconds. % cumulative self self total time seconds seconds calls Ts/call Ts/call name 100.00 53.83 53.83 get_primes(unsigned long) 0.00 53.83 0.00 27 0.00 0.00 void std::vector<unsigned long, std::allocator<unsigned long> >::_M_emplace_back_aux<unsigned long const&>(unsigned long const&&&) 0.00 53.83 0.00 1 0.00 0.00 _GLOBAL__sub_I__Z10get_primesm
Assignment 1
Nitin Prakash Panicker: LZW File Compression
Flat profile: Each sample counts as 0.01 seconds. % cumulative self self total time seconds seconds calls ns/call ns/call name 99.46 48.19 48.19 CLZWCompressFile::Compress(char*, char*) 0.33 48.35 0.16 17122488 9.34 9.34 CLZWCompressFile::getc_src() 0.21 48.45 0.10 7095561 14.09 14.09 CLZWCompressFile::putc_comp(int)
lzw.cpp
#include <time.h> #include "lzw.h" /******************************************************************** ** ** This program gets a file name from the command line. It compresses the ** file, placing its output in a file named test.lzw. It then expands ** test.lzw into test.out. Test.out should then be an exact duplicate of ** the input file. ** *************************************************************************/ main(int argc, char *argv[]) { clock_t timer; CLZWCompressFile lzw; /* ** Get the file name, open it up, and open up the lzw output file. */ if (argc==1) { printf("Input file name to compress?\n"); return 0; } printf("testing %s...\n", argv[1]); /* ** Compress the file. */ timer = clock(); int crunch = lzw.Compress(argv[1], "test.lzw"); timer = clock() - timer; //CLOCKS_PER_SEC printf("compress time=%d ms, encoding=%d, size=%u", timer, lzw.get_bits(), crunch); int filesize = lzw.u_io; printf(" (ratio=%d%%)\n", filesize ? (filesize-crunch)*100/filesize : 0); if(lzw.AnyIOErrors()) printf("***I/O ERROR***\n"); /* ** Expand the file. */ timer = clock(); int orig = lzw.Expand("test.lzw", "test.out"); timer = clock() - timer; //CLOCKS_PER_SEC printf("expand time=%d ms, encoding=%d\n", timer, lzw.get_bits()); if(lzw.AnyIOErrors()) printf("***I/O ERROR***\n"); ATLASSERT(filesize == orig); // did we mangle the file? return 0; }
lzw.h
#ifndef UPRIGHT_LZW_H #define UPRIGHT_LZW_H /* LZW.h by N.A.Bozinis @ 19/01/2010 08:55:52 * ---------------------------------------------------------------------------------- * * Plain C++ port of LZW compression algorithm and code originally (c) Mark R. Nelson * http://marknelson.us/1989/10/01/lzw-data-compression * Variable bit length encoding idea and code originally by Michael Dipperstein * http://michael.dipperstein.com/lzw * * There are a lot of compression classes floating around but most are based on the * zlib (zip/unzip) library, which is good but a bit of overkill for simple and small * code. LZW combines decent compression ratios with very small code footprint. If * you need something more powerful here are a few resources: * * http://www.codeproject.com/KB/files/zip_utils.aspx * http://www.codeproject.com/KB/cpp/xzipunzip.aspx * http://www.codeproject.com/KB/cpp/ChauMemzip.aspx * * Microsoft types can check the CAB protocol that is available in all windows: * http://www.codeproject.com/KB/files/CABCompressExtract.aspx * http://msdn.microsoft.com/en-us/library/bb417343.aspx * */ #include <stdio.h> #include <stdlib.h> #include <limits.h> #include <string.h> #include <assert.h> #define ATLASSERT assert /* NOTE: function and variable names left as much as possible matching the original LZW.c by Mark, naturally bundled in classes to get rid of static/globals etc */ #define MIN_CODE_LEN 9 /* min # bits in a code word */ #define MAX_CODE_LEN 20 /* max # bits in a code word */ #define CURRENT_MAX_CODES(x) (1UL << (x)) #define FIRST_CODE (1 << CHAR_BIT) /* value of 1st string code */ #if (MIN_CODE_LEN <= CHAR_BIT) #error Code words must be larger than 1 character #endif #if (MAX_CODE_LEN >= 25) #error Code words must fit in an integer #endif /* VARIABLE BIT LENGTH ENCODING * Instead of using a fixed number of bits for code words, we start at 9 (=MIN_CODE_LEN) * and go up to BITS (<=MAX_CODE_LEN) so that small files are tightly packed and larger * files are fine too. The BITS constant determines the maximum hash table size. For 18 * this means 250KB runtime table size which is enough for files ~4MB. * There is no problem for files larger than that; if we run out of table space for new * codes then the same codes are emitted (uncompressed obviously) */ #define BITS 17 /* Setting the number of bits to 12, 13*/ #define HASHING_SHIFT (BITS-8) /* or 14 affects several constants. */ #define MAX_VALUE (1 << BITS) - 1 /* Note that MS-DOS machines need to */ #define MAX_CODE MAX_VALUE - 1 /* compile their code in large model if*/ /* 14 bits are selected. */ #if BITS == 20 #define TABLE_SIZE 1048583 #elif BITS == 19 #define TABLE_SIZE 524309 #elif BITS == 18 #define TABLE_SIZE 262147 #elif BITS == 17 #define TABLE_SIZE 131101 #elif BITS == 16 #define TABLE_SIZE 65543 #elif BITS == 15 #define TABLE_SIZE 32797 #elif BITS == 14 #define TABLE_SIZE 18041 /* The string table size needs to be a */ /* prime number that is somewhat larger*/ #elif BITS == 13 /* than 2**BITS. */ #define TABLE_SIZE 9029 #elif BITS == 12 #define TABLE_SIZE 5021 #else #error define smaller or bigger table sizes #endif #if (TABLE_SIZE <= MAX_VALUE) #error your prime numbers need attention #endif #if (BITS > MAX_CODE_LEN) #error BITS can only go up to a maximum #endif /* This class does most of the job, except reading source and writing the compressed data A derived class does that so that there's flexibility to read either from files or memory */ class CLZWImpl { protected: int *code_value; /* This is the code value array */ unsigned int *prefix_code; /* This array holds the prefix codes */ unsigned char *append_character; /* This array holds the appended chars */ unsigned char decode_stack[4000]; /* This array holds the decoded string */ unsigned char CUR_BITS; /* ~nab: added for variable bit size */ /* we are processing bits but in the end of the day we do I/O in bytes */ int input_bit_count, output_bit_count; unsigned long input_bit_buffer, output_bit_buffer; public: CLZWImpl() { code_value = 0; prefix_code = 0; append_character = 0; } ~CLZWImpl() { if(code_value) free(code_value); if(prefix_code) free(prefix_code); if(append_character) free(append_character); } int get_bits() { return CUR_BITS; } protected: int Init() { ATLASSERT(!code_value); /* call just once */ code_value=(int*)malloc(TABLE_SIZE*sizeof(int)); prefix_code=(unsigned int*)malloc(TABLE_SIZE*sizeof(unsigned int)); append_character=(unsigned char*)malloc(TABLE_SIZE*sizeof(unsigned char)); return code_value != 0 && prefix_code != 0 && append_character != 0; } /* override these 4: read a byte from source */ virtual int getc_src() = 0; /* read a byte from compressed source (during expansion) and write to compressed output */ virtual int getc_comp() = 0; /* write a byte to compressed output */ virtual int putc_comp(int ch) = 0; /* write a byte to expanded output */ virtual int putc_out(int ch) = 0; /* ** This is the compression routine. The code should be a fairly close ** match to the algorithm accompanying the article. ** */ void compress() { unsigned int next_code; unsigned int character; unsigned int string_code; unsigned int index; unsigned int bit_limit; int i; ATLASSERT(code_value); /* initialized? */ CUR_BITS = MIN_CODE_LEN; bit_limit = CURRENT_MAX_CODES(CUR_BITS) - 1; output_bit_count=0; output_bit_buffer=0L; ATLASSERT(256==FIRST_CODE); next_code=FIRST_CODE; /* Next code is the next available string code*/ for (i=0;i<TABLE_SIZE;i++) /* Clear out the string table before starting */ code_value[i]=-1; string_code=getc_src(); /* Get the first code */ if(-1 == string_code) return; /* empty file or error */ /* ** This is the main loop where it all happens. This loop runs util all of ** the input has been exhausted. Note that it stops adding codes to the ** table after all of the possible codes have been defined. */ while ((character=getc_src()) != -1) { index=find_match(string_code,character);/* See if the string is in */ if (code_value[index] != -1) /* the table. If it is, */ string_code=code_value[index]; /* get the code value. If */ else /* the string is not in the*/ { /* table, try to add it. */ if (next_code <= MAX_CODE) { code_value[index]=next_code++; prefix_code[index]=string_code; append_character[index]=character; } /* are we using enough bits to write out this code word? */ if(string_code >= bit_limit && CUR_BITS < BITS) { /* mark need for bigger code word with all ones */ output_code(bit_limit); CUR_BITS++; bit_limit = (CURRENT_MAX_CODES(CUR_BITS) - 1); } ATLASSERT(string_code < bit_limit); output_code(string_code); /* When a string is found */ string_code=character; /* that is not in the table*/ } /* I output the last string*/ } /* after adding the new one*/ /* ** End of the main loop. */ output_code(string_code); /* Output the last code */ output_code(-1); /* This code flushes the output buffer*/ } /* ** This is the hashing routine. It tries to find a match for the prefix+char ** string in the string table. If it finds it, the index is returned. If ** the string is not found, the first available index in the string table is ** returned instead. */ int find_match(unsigned int hash_prefix,unsigned int hash_character) { int index; int offset; index = (hash_character << HASHING_SHIFT) ^ hash_prefix; if (index == 0) offset = 1; else offset = TABLE_SIZE - index; while (1) { if (code_value[index] == -1) return(index); if (prefix_code[index] == hash_prefix && append_character[index] == hash_character) return(index); index -= offset; if (index < 0) index += TABLE_SIZE; } } /* ** This is the expansion routine. It takes an LZW format file, and expands ** it to an output file. The code here should be a fairly close match to ** the algorithm in the accompanying article. */ void expand() { unsigned int next_code; unsigned int new_code; unsigned int old_code; int character; unsigned char *string; unsigned int bit_limit; ATLASSERT(code_value); /* initialized? */ CUR_BITS = MIN_CODE_LEN; bit_limit = CURRENT_MAX_CODES(CUR_BITS) - 1; input_bit_count=0; input_bit_buffer=0L; // @@@ what if we pass uncompressed file to decode? next_code=FIRST_CODE; /* This is the next available code to define */ old_code=input_code(); /* Read in the first code, initialize the */ if(-1 == old_code) return; /* read error? */ character=old_code; /* character variable, and send the first */ if(putc_out(old_code)==-1) /* code to the output file */ return; /* write error */ /* ** This is the main expansion loop. It reads in characters from the LZW file ** until it sees the special code used to inidicate the end of the data. */ while ((new_code=input_code()) != (-1)) { /* look for code length increase marker */ if(bit_limit == new_code && CUR_BITS < BITS) { CUR_BITS++; bit_limit = CURRENT_MAX_CODES(CUR_BITS) - 1; new_code=input_code(); ATLASSERT(new_code != -1); /* must be read error? */ if(new_code == -1) break; } ATLASSERT(new_code < bit_limit); /* ** This code checks for the special STRING+CHARACTER+STRING+CHARACTER+STRING ** case which generates an undefined code. It handles it by decoding ** the last code, and adding a single character to the end of the decode string. */ if (new_code>=next_code) { *decode_stack=character; string=decode_string(decode_stack+1,old_code); } /* ** Otherwise we do a straight decode of the new code. */ else string=decode_string(decode_stack,new_code); /* ** Now we output the decoded string in reverse order. */ character=*string; while (string >= decode_stack) putc_out(*string--); /* ** Finally, if possible, add a new code to the string table. */ if (next_code <= MAX_CODE) { prefix_code[next_code]=old_code; append_character[next_code]=character; next_code++; } old_code=new_code; } } /* ** This routine simply decodes a string from the string table, storing ** it in a buffer. The buffer can then be output in reverse order by ** the expansion program. */ /* ~nab: these char* aren't a risk for unicode; we are reading bytes */ unsigned char *decode_string(unsigned char *buffer,unsigned int code) { int i; i=0; while (code >= FIRST_CODE) { *buffer++ = append_character[code]; code=prefix_code[code]; i++; ATLASSERT(i < sizeof(decode_stack)); /* buffer overrun if it blows, increase stack size! */ } *buffer=code; return(buffer); } /* ** The following two routines are used to output variable length ** codes. They are written strictly for clarity, and are not ** particularyl efficient. ~nab: there's room for improvement in these I/O functions eg work in DWORDS instead of bytes */ unsigned int input_code() { int c; unsigned int return_value; //static int input_bit_count=0; //static unsigned long input_bit_buffer=0L; while (input_bit_count <= 24) { if ((c = getc_comp()) == -1) break; input_bit_buffer |= (unsigned long) c << (24-input_bit_count); input_bit_count += 8; } if(input_bit_count < CUR_BITS) { ATLASSERT(!input_bit_buffer); return -1; /* EOF */ } return_value=input_bit_buffer >> (32-CUR_BITS); input_bit_buffer <<= CUR_BITS; input_bit_count -= CUR_BITS; ATLASSERT(return_value < (1UL << CUR_BITS)); return(return_value); } /* bits are written outside normal byte boundaries, hence the need for keeping old values */ void output_code(unsigned int code) { //static int output_bit_count=0; //static unsigned long output_bit_buffer=0L; ATLASSERT(output_bit_count < 8); /* leftovers */ ATLASSERT(CUR_BITS + output_bit_count <= 32); /*codes <256 are possible for single characters, zero bytes etc*/ if(-1 == code) { /* pad remaining zeros and flush the last byte */ if(output_bit_count) { output_bit_buffer >>= 24; ATLASSERT((output_bit_buffer & 0xFF) == output_bit_buffer); putc_comp(output_bit_buffer); output_bit_count = 0; output_bit_buffer = 0; /* in case some eejit calls us again */ } return; } ATLASSERT(code < (1UL << CUR_BITS)); /* sends new bytes near the top (MSB) */ output_bit_buffer |= (unsigned long) code << (32-CUR_BITS-output_bit_count); output_bit_count += CUR_BITS; while (output_bit_count >= 8) { /* no check for error but if there was a problem we'd know from the time we wrote the identifier */ putc_comp(output_bit_buffer >> 24); output_bit_buffer <<= 8; output_bit_count -= 8; } } }; /* CLZWImpl */ /* example derived class using C buffered I/O functions */ class CLZWCompressFile : public CLZWImpl { public: CLZWCompressFile() { io_file = 0; lzw_file = 0; }; ~CLZWCompressFile() { ATLASSERT(!io_file); ATLASSERT(!lzw_file); }; int AnyIOErrors() {return io_error; } // @@@ these char* should be changed for unicode builds unsigned int Compress(char* input_file_name, char* to_name) { ATLASSERT(input_file_name && *input_file_name); ATLASSERT(to_name && *to_name); ATLASSERT(strcmp(to_name, input_file_name)); io_error = 1; if(!code_value) if(!Init()) return 0; /* rare memory error */ u_comp = 0; u_io = 0; io_file=fopen(input_file_name,"rb"); if(io_file) { lzw_file=fopen(to_name,"wb"); if(lzw_file) { /* write LZW identifier L+starting bytes */ putc('L', lzw_file); if(putc(MIN_CODE_LEN, lzw_file) == MIN_CODE_LEN) { compress(); io_error = ferror(lzw_file) || ferror(io_file); if(!io_error) ATLASSERT(u_comp <= u_io); /* this is bound to bomb every now and then, no compression! */ } fclose(lzw_file); lzw_file = 0; } fclose(io_file); io_file = 0; } return u_comp; } unsigned int Expand(char* lzw_name, char* to_name) { ATLASSERT(lzw_name && *lzw_name); ATLASSERT(to_name && *to_name); ATLASSERT(strcmp(to_name, lzw_name)); io_error = 1; if(!code_value) if(!Init()) return 0; /* rare memory error */ u_comp = 0; u_io = 0; lzw_file=fopen(lzw_name,"rb"); if(lzw_file) { /* check LZW identifier L+starting bytes */ int ch1 = getc(lzw_file); int ch2 = getc(lzw_file); if('L' == ch1 && MIN_CODE_LEN==ch2) { io_file=fopen(to_name,"wb"); if(io_file) { expand(); io_error = ferror(lzw_file) || ferror(io_file); fclose(io_file); io_file = 0; } } fclose(lzw_file); lzw_file = 0; } return u_io; } protected: /* -1 return indicates either EOF or some IO error */ virtual int getc_src() { ATLASSERT(io_file); int ch = getc(io_file); if(EOF == ch) return -1; u_io++; return ch; } virtual int getc_comp() { ATLASSERT(lzw_file); int ch = getc(lzw_file); if(EOF == ch) return -1; u_comp++; return ch; } virtual int putc_comp(int ch) { ATLASSERT(lzw_file); ATLASSERT(ch >= 0 && ch < 256); int ret = putc(ch, lzw_file); if(ret != EOF) { ATLASSERT(ret == ch); u_comp++; } else ret = -1; return ret; } virtual int putc_out(int ch) { ATLASSERT(io_file); ATLASSERT(ch >= 0 && ch < 256); int ret = putc(ch, io_file); if(ret != EOF) u_io++; else ret = -1; return ret; } FILE* io_file; FILE *lzw_file; int io_error; public: unsigned long u_io, u_comp; /* bytes read and written */ }; // @@@ could have a generic one on IStream, CreateStreamOnHGlobal/SHCreateStreamOnFile #endif /* UPRIGHT_LZW_H */