#include "huffman.h"
// Roger Boerdijk 30.06.2002
// email: kitt3n@home.nl
// Use the code any way you wish; you assume all responsibility for its
// correctness and suitability should you use it.
//-------------------------------------------------------------------------------------------------
int cHuffmanNode::operator () (const cHuffmanNode& a, const cHuffmanNode& b) const
{ return a.frequency>b.frequency;
}
int cHuffmanNode::operator () (const cHuffmanNode* a, const cHuffmanNode* b) const
{ return a->frequency>b->frequency;
}
//-------------------------------------------------------------------------------------------------
cHuffmanTree::cHuffmanTree (U32 frequency_table[256]) : root(NULL)
{
// Constructor for a byte-frequency table - usefull for compressing files
ft.resize (256);
for (int i=0; i<256; i++)
{
ft[i].first = i;
ft[i].second=frequency_table[i];
}
buildTreeDo (ft);
}
cHuffmanTree::cHuffmanTree (std::vector<pair<U8, U32> > frequency_table) : root (NULL)
{
// more generic frequency table, can contain more/less ch's with
// respective frequencies than the byte-frequency-table.
// Nice if you have a limited character-set, for example only
// A-Z and 0-9, instead of the entire ascii-table
ft = frequency_table;
buildTreeDo (ft);
}
cHuffmanTree::cHuffmanTree (U8* data, U32 size) : root (NULL)
{
// very simular to cHuffmanTree::buildTree (U32 frequency_table[256])
// only this function will build the frequency-table itself
pair<U8, U32> thePair;
ft.resize (256);
for (int z=0; z<ft.size(); z++)
{
thePair.first = z;
thePair.second = 0;
ft[z]=thePair;
}
// count frequencies
for (int i=0; i<size; i++)
ft[data[i]].second++;
buildTreeDo (ft);
}
cHuffmanTree ::~cHuffmanTree ()
{
freeTree ();
}
//-------------------------------------------------------------------------------------------------
bool cHuffmanTree::buildTreeDo (const std::vector<pair<U8, U32> > frequency_table)
{
// If there was already a tree, cleanup and build the new one
if (root) { freeTree (); }
ft = frequency_table;
std::priority_queue<cHuffmanNode*, std::vector<cHuffmanNode*>, cHuffmanNode> h;
cHuffmanNode* t = NULL;
// 1. Fill a heap with characters sorted on frequency
// First in heap character with highest frequency
for (int i=0; i<frequency_table.size(); i++)
{
// We push if the symbol has at least 1 occurance,
// if the symbol has 0 occurances it's not usefull
// to put it in the tree.
if (frequency_table[i].second!=0)
{
cHuffmanNode* tmp = new cHuffmanNode;
assert (tmp && "cHuffman::buildTree() Failed!");
tmp->ch = frequency_table[i].first;
tmp->frequency = frequency_table[i].second;
h.push (tmp);
}
}
// 2. Build the tree from the heap
while (!h.empty())
{
if (h.size()==1)
{
// If h contains only one character X, make X the root of T
t = h.top();
h.pop();
}
else
{
// Pick two characters X and Y with the lowest frequencies
// and delete them from H
cHuffmanNode* X=h.top(); h.pop();
cHuffmanNode* Y=h.top(); h.pop();
// replace X and Y with a new character Z, whose frequency
// is the sum of the frequencies of X and Y
cHuffmanNode* Z = new cHuffmanNode;
Z->frequency = X->frequency+Y->frequency;
Z->left_child = X;
Z->right_child = Y;
h.push (Z);
}
}
root = t;
//buildCode (frequency_table, t)
return true;
}
//-------------------------------------------------------------------------------------------------
void cHuffmanTree::freeTree (void)
{
if (root)
{
freeTreeR (root);
root=NULL;
}
}
void cHuffmanTree::freeTreeR (cHuffmanNode* node)
{
if (!node) return;
if (!(node->right_child && node->left_child))
{
delete node;
return;
}
if (node->left_child)
freeTreeR (node->left_child);
if (node->right_child)
freeTreeR (node->right_child);
// we deleted all of this node's children, now delete
// the node itself
delete node;
}
//-------------------------------------------------------------------------------------------------
U32 cHuffmanTree::getTree (std::string& theTree)
{
return getTreeR (theTree, root);
}
U32 cHuffmanTree::getTreeR (std::string& theTree, cHuffmanNode* node)
{
// This function stores reconstruction-data for our tree
// Note that the "1" and "0" do NOT represent huffman-codes
// but it tells if a node is a branch or a leaf
// Note that we don't store frequency data for reconstruction!
if (!node) return 0;
if (!(node->left_child && node->right_child))
{ // leaf
char ch = node->ch;
theTree.append ("1");
// note: will give problems with >256 frequency-tables!
theTree+=ch;
return 2;
}
else
{ // Branch
theTree.append ("0");
return 1+
getTreeR (theTree, node->left_child)+
getTreeR (theTree, node->right_child);
}
std::vector<std::string> bs;
getStringTable (bs);
}
void cHuffmanTree::setTree (const std::string& theTree)
{
if (root)
freeTree ();
U32 index=0;
root = setTreeR (theTree, index);
}
cHuffmanNode* cHuffmanTree::setTreeR (const std::string& theTree, U32& idx)
{
// This function reconstructs our tree
// Note that we don't reconstruct frequency data!
if (idx>=theTree.size())
return NULL;
cHuffmanNode* n = new cHuffmanNode;
U8 b = theTree[idx];
if (b == '0')
{ // branch
n->ch = 0;
n->left_child = setTreeR (theTree, ++idx);
n->right_child = setTreeR (theTree, ++idx);
}
else
{ // leaf
idx++;
n->ch = theTree[idx];
n->left_child = NULL;
n->right_child = NULL;
}
return n;
}
//-------------------------------------------------------------------------------------------------
bool cHuffmanTree::gcp(char* s, cHuffmanNode* n, int c)
{
// get-character-path worker function.
if (!n) return 0;
if (!(n->left_child && n->right_child))
{
if (c == n->ch)
return true;
else
return false;
}
else
{
char _s[256];
if (gcp(s, n->left_child, c))
{
strcpy(_s, s);
strcpy(s, "0");
strcat(s, _s);
return true;
}
else if (gcp(s, n->right_child, c))
{
strcpy(_s, s);
strcpy(s, "1");
strcat(s, _s);
return true;
}
else
return false;
}
}
char* cHuffmanTree::getCharacterPath (cHuffmanNode* n, int c)
{
// Get the path through the tree to find the bit-string
// we need to encode / decode a character. Note that
// we are using a static-member ie NOT an example of
// good programming!
static char string[256];
strcpy(string, "");
if (gcp(string, n, c))
return string;
else
return "";
}
void cHuffmanTree::getStringTable (std::vector<std::string>& bitString)
{
// Example function which will get all string encodings of each character.
// Note that '0' is a 0-bit and '1' is a 1-bit
bitString.resize (ft.size());
for (int i=0; i<ft.size(); i++)
{
char* tmp = getCharacterPath (root, ft[i].first);
bitString[i]=tmp;
}
}
//-------------------------------------------------------------------------------------------------
bool cHuffmanTree::write_bit (bool bit, U8& theByte, bool reset)
{
// grow a byte, bit by bit, and then flush it theByte reset will
// initialize the function to default-state. will return true as
// soon as 8 bits were written and it is up to the called to save
// the value in theByte.
// note that if the function returns false, theByte does not
// contain any usefull value
static long bit_index = 7;
static U8 byte = 0x00;
bool flush = false;
if (reset)
{ bit_index=7;
byte = 0x00;
return false;
}
if(bit_index == -1)
{
// flush byte
bit_index = 7;
byte = 0x00;
}
//byte |= (bit ? 1 : 0) << bit_index;
byte |= bit << bit_index;
bit_index--;
if(bit_index == -1)
{
theByte = byte;
flush=true;
}
return flush;
}
bool cHuffmanTree::read_bit(bool& bit, U8 byte, bool reset)
{
// return one bit at a time, returns true when the
// byte finished and it wants the next byte
static long bit_index = 7;
bool requestNextByte=false;
int bit_value;
if (reset)
{ bit_index = 7;
requestNextByte=false;
return true;
}
// bit_value = (byte & (1 << bit_index)) ? 1 : 0;
bit_value = byte & (1 << bit_index);
bit = bit_value!=0;
bit_index--;
if(bit_index == -1)
{
requestNextByte=true;
bit_index = 7;
}
return requestNextByte;
}
U32 cHuffmanTree::getCompressedSize (U8* udata, U32 len)
{
assert (root && udata);
// get stringtable with compression info
int i;
std::vector<std::string> bs;
getStringTable (bs);
// calculate datasize
U32 bitsEstimated=0;
U32 byteEstimated=0;
for (i=0; i<len; i++)
{
bitsEstimated+=bs[udata[i]].length();
assert (bs[udata[i]].length()!=0 && "cHuffmanTree::compress() Unknown ch");
}
// 4 bytes (U32) extra for uncompressed size
byteEstimated=bitsEstimated/8; //+bitsEstimated%8==0?0:1+4;
byteEstimated+=bitsEstimated%8==0?0:1;
byteEstimated+=4;
return byteEstimated;
}
//-------------------------------------------------------------------------------------------------
U32 cHuffmanTree::compress (U8* udata, U32 len, U8** cdata)
{
// NOTE: Client has to deallocate memory which we allocate here!
// (maybe we can somehow change this)
assert (root);
// get stringtable with compression info
int i;
std::vector<std::string> bs;
getStringTable (bs);
U32 byteEstimated = getCompressedSize (udata, len);
U8* data = new U8[byteEstimated];
if (data)
{
*cdata = data;
*((U32*)data)=len;
data+=4;
// compress data
U8 cbyte;
write_bit (false, cbyte, true); // reset
for (i=0; i<len; i++)
{
U32 bitlen = bs[udata[i]].length();
const I8* str = bs[udata[i]].c_str();
for (int j=0; j<bitlen; j++)
{
// write the correct bit into our temp-buffer
if (write_bit(((str[j] - '0')!=0), cbyte))
{
// we wrote 8 bits, copy to our compressed buffer
// and prepare for next byte
*data=cbyte;
data++;
}
}
}
// fill last byte to end with 0
for (int fillbit=0; fillbit<8; fillbit++)
{
if (write_bit(0, cbyte))
{ *data=cbyte;
data++;
break;
}
}
return byteEstimated;
}
return 0;
}
U32 cHuffmanTree::uncompress (U8* cdata, U32 len, U8** udata)
{
// NOTE: Client has to deallocate memory which we allocate here!
// (maybe we can somehow change this)
U32 ulen = *((U32*)cdata);
cdata+=4;
U8* data = new U8[ulen];
if (data)
{
*udata=data;
bool bit=0;
cHuffmanNode* curnode = root;
U32 decodedU8s=0;
// make sure our readbit is in initialized state
read_bit (bit, cdata[0], true);
while (decodedU8s<ulen)
{
if (cHuffmanTree::read_bit(bit, cdata[0]))
{ // next byte!
cdata++;
}
if (bit==0)
{ curnode = curnode->left_child;
}
else
{ curnode = curnode->right_child;
}
// if we're at a leaf, output the character and start over
if (!(curnode->left_child && curnode->right_child))
{
data[decodedU8s]=(curnode->ch);
decodedU8s++;
curnode = root;
}
}
return ulen;
}
return 0;
}
//-------------------------------------------------------------------------------------------------
U32 cHuffmanTree::compress (const I8* inname, const I8* outfile)
{
bool success=false;
FILE* fp, *fpout;
if ((fp=fopen((char*)inname, "rb"))!=NULL)
{
if ((fpout=fopen((char*)outfile, "wb"))!=NULL)
{
// determine filesize
fseek (fp, 0, SEEK_END);
U32 len = ftell (fp);
fseek (fp, 0, SEEK_SET);
// read entire file into ram
U8* ram = new U8[len];
U8* zip;
if (ram)
{ fread (ram, len, 1, fp);
}
// build huffman tree & compress
cHuffmanTree tree2 (ram, len);
U32 csize = tree2.compress (ram , len, &zip);
std::string htR;
tree2.getTree (htR);
U16 htS = htR.size();
U8 htC;
fwrite (&htS, sizeof (htS), 1, fpout);
for (int i=0; i<htR.size(); i++)
{ htC = htR[i];
fwrite (&htC, 1, 1, fpout);
}
fwrite (zip, csize, 1, fpout);
delete zip;
delete ram;
fclose (fpout);
success=true;
}
fclose (fp);
}
return success;
}
U32 cHuffmanTree::uncompress (const I8* inname, const I8* outfile)
{
bool success=false;
FILE* fp, *fpout;
if ((fp=fopen((char*)inname, "rb"))!=NULL)
{
if ((fpout=fopen((char*)outfile, "wb"))!=NULL)
{
// determine filesize
fseek (fp, 0, SEEK_END);
U32 len = ftell (fp);
fseek (fp, 0, SEEK_SET);
// read entire file into ram
U8* zip = new U8[len];
if (zip)
{ fread (zip, len, 1, fp);
}
// build huffman tree & decompress
U16 htS = *(U16*)(zip);
std::string htR;
htR.resize (htS);
for (int i=0; i<htS; i++)
{ htR[i]=*(zip+i+2);
}
U32 htSize = sizeof(htS)+htR.size()*sizeof(U8);
U8* ram;
cHuffmanTree tree2;
tree2.setTree (htR);
U32 usize = tree2.uncompress (zip+htSize, len-htSize, &ram);
fwrite (ram, usize, 1, fpout);
delete zip;
delete ram;
fclose (fpout);
success=true;
}
fclose (fp);
}
return success;
}
|