📄 huf.c
字号:
/*******************************************************************************
* *
* HUF.C by Shaun Case April 1991 *
* *
* Written in Borland C++ 2.0 under MS-DOS 3.3 *
* *
* Uses Huffman encoding to compress a single file. *
* This program is in the public domain. *
* *
* atman%ecst.csuchico.edu@RELAY.CS.NET *
* *
* *
*******************************************************************************/
#include <stdio.h>
#include <math.h>
#include <dir.h> /* for storing filename in output file */
#define FALSE 0 /* i'm still deciding on a style... */
#define TRUE !FALSE
/*
* for lots of interesting (?) diagnostics, uncomment the following:
#define VERBOSE
*
*/
typedef struct chardata { /* template for leaves and nodes in tree */
short charnum; /* which character to be encoded */
/* don't want it lost in the sort */
unsigned long total; /* total occurances in the file */
short seq_length; /* length of this char's huffman code (bits)*/
short bit_sequence; /* the huffman code sequence for this char */
double frequency; /* frequency of occurance, < 1.0 */
struct chardata *up, *left, *right; /* pointers to rest of tree */
};
typedef struct decode_table_element { /* template for decode table element (wow) */
unsigned char letter; /* which character to decode to */
char spare; /* force 16-bit word alignment */
short left; /* index of lower left element from tree */
short right; /* index of lower right element from tree */
};
int compare(const void*, const void*); /* prototype for compare() for qsort() */
struct chardata *huftable[256]; /* array that points to leaves in tree */
struct chardata *root=NULL; /* future root of tree */
struct decode_table_element *decode_table; /* pointer to decode table */
short array_max_index=0; /* max number of elements in array (to be */
/* determined in create_decode_table() */
long real_bit_total=0L;
double avg_bits_per_symbol=0; /* averages num of bits needed to represent */
unsigned long total; /* total number of unencoded bytes */
long real_bits_total = 0L;
FILE *infile; /* file ptr to original file (uncompressed) */
FILE *outfile; /* file ptr to output fiel (compressed) */
char *infile_name; /* ptr to name of input file */
char *outfile_name; /* ptr to name of output file */
int main(int argc, char **argv)
{
#ifdef VERBOSE
void show_encode_info(void); /* prototype */
void show_encoded_file_size(void); /* prototype */
void show_decode_table(void); /* prototype */
#endif
void dumptable(void); /* prototype */
short create_tree(void); /* prototype */
void gen_bit_sequences(struct chardata *node); /* prototype */
short create_decode_table(void); /* prototype */
short compress(void); /* prototype */
register short c; /* a character */
if (argc != 3) { /* check command line arguments */
puts("'huf file1 file2' encodes file1 into file2.");
return 1;
}
puts("Huf by Shaun Case, 1991, Public Domain");
infile_name = argv[1];
outfile_name = argv[2];
puts("Analyzing data.");
for (c=0; c < 256; c++) /* initialize decode table */
{
if ((huftable[c] = (struct chardata *)malloc(sizeof (struct chardata)))== NULL)
{
printf("Unable to allocate space for %dth huftable node.",c);
return 1;
}
huftable[c]->charnum = c; /* need to know who we are after qsort() gets done with us */
huftable[c]->total = 0L;
huftable[c]->frequency = 0;
huftable[c]->up = NULL;
huftable[c]->left = NULL;
huftable[c]->right = NULL;
}
if ((infile=fopen(infile_name, "rb")) == NULL) /* open the input file */
{
printf("Unable to open %s.\n", infile_name);
return 1;
}
while (!feof(infile)) /* get character distribution data */
{
c = fgetc(infile);
if (feof(infile))
continue;
++total;
++huftable[c]->total; /* increment total for char c */
}
if (total == 0L) /* handle empty file */
{
puts("Input file is empty, aborting.");
return 0;
}
fclose(infile);
/* sort according to frequency of occurance */
qsort((void *)huftable, (size_t)256, (size_t)sizeof(struct chardata *),
compare);
dumptable(); /* fill huftable with distribution freqs */
if (create_tree() != TRUE) /* make the huffman tree (bit sequences) */
return 1;
#ifdef VERBOSE
printf("\nFreq of root is %f.\n",root->frequency); /* this should be 1.0 if all went well */
puts("\nBit sequences:\n\n");
#endif
avg_bits_per_symbol = 0;
gen_bit_sequences(root); /* build bit sequences, stuff seqs & */
/* lengths into associated leaves */
#ifdef VERBOSE
printf("\n\nActual bits per symbol = %f", avg_bits_per_symbol);
printf("\nActual final data size w/o table should be %f\n",
avg_bits_per_symbol * (double)total / ((double) 8));
show_encoded_file_size();
#endif
puts("Building decode table...");
if (create_decode_table() != TRUE) /* create decode array from tree */
{
puts("Unable to allocate space for decode table, exiting.");
return 1;
}
printf("Decode table built, contains %d elements.\n",array_max_index + 1);
#ifdef VERBOSE
show_decode_table();
show_encode_info();
#endif
if (compress() != 0) /* now that we have all necessary info, */
return 1; /* perform the compression. */
puts("Done.");
return 0;
}
/*****************************************************************************
* this code is going to be a little strange -- we have an array of items *
* that are going to be the leaves of the tree, and we have to go upward *
* linking them together to make the root. For algorithm, see my notebook, *
* or look up huffman compression in a good algorithms book, or see *
* huffman's paper -- "A Method for the Construction of Minimum-Redundancy *
* Codes" David A. Huffman, Proceedings of the IRE 40, 1098-- 1101 *
* *
* after the tree is built, the root of the tree is assigned to the global *
* variable root. *
* *
* -- Shaun Case *
*****************************************************************************/
struct chardata *ptr1, *ptr2; /* 1 = smallest freq, 2 = 2nd smallest */
short create_tree()
{
void find_lowest_freqs(void); /* prototype */
short only_one_up_ptr_left(void); /* prototype */
double maxfreq = 0 ; /* max combined freqency in the tree */
struct chardata *new_node = NULL; /* ptr to new node to be created */
puts("Creating tree from frequencies...");
while (maxfreq < 0.99999 ) /* while we haven't made the root yet */
{
find_lowest_freqs(); /* find the two lowest combined or */
/* unused single frequencies */
if ( /* create a new node */
(new_node = (struct chardata *)malloc(sizeof (struct chardata)) )
== NULL
)
{
puts("Insufficient memory, malloc() failed in create_tree().");
return FALSE;
}
/* initialize the new node */
new_node->up = NULL;
new_node->left = ptr2;
new_node->right = ptr1;
new_node->charnum = -1; /* if you get a lot of -1s when traversing the tree, */
/* you aren't looking at the leaves. */
ptr1->up = new_node;
ptr2->up = new_node;
new_node->frequency = ptr1->frequency + ptr2->frequency; /* combine 2 old freqs */
maxfreq = new_node->frequency; /* clever optimization */
#ifdef VERBOSE
printf("Newly created freq == %f\n", maxfreq);
#endif
}
root = new_node; /* update global variable */
if (only_one_up_ptr_left()) /* verify tree is correct */
{ /* algorirthm seems to work */
puts("Done creating tree."); /* fine, this is a saftey */
#ifdef verbose /* check. */
puts("Win: apparently only one remaining up-pointer.");
#endif
}
else
{
puts("Lose: apparently more than one remaining up-pointer.");
return FALSE;
}
return TRUE; /* everything is fine. */
}
/************************************************************
* verify there is only one root after the tree is created *
* by making sure only one node has an up pointer that has *
* the value NULL. *
* *
* Just a double-check. *
* *
************************************************************/
short only_one_up_ptr_left(void)
{
short bottom=255;
register struct chardata *tptr;
register struct chardata *first_null_up_ptr=NULL;
tptr=huftable[bottom];
while (bottom != -1)
{
while (tptr->up != NULL) /* find root for this leaf */
tptr = tptr->up;
if (first_null_up_ptr == NULL) /* assign uptr to root, 1st time */
first_null_up_ptr = tptr;
else
if (first_null_up_ptr != tptr) /* uh oh, found another root.. */
return FALSE;
--bottom;
}
return TRUE;
}
/****************************************************************
* *
* Find the two smallest combined or unused single frequencies *
* in the partially-constructed tree. *
* *
****************************************************************/
void find_lowest_freqs(void)
{
register struct chardata *tptr;
/* the following are local for speed (I think double indirection is slow */
register struct chardata *local_minptr; /* ptr to smallest unused freq */
register struct chardata *local_second_minptr; /* ptr to 2nd smallest unused freq */
struct chardata dummy;
short bottom = 255;
dummy.frequency = 2.0; /* initialize to bigger than 1.000 */
local_minptr=&dummy; /* initialize to bigger than 1.000 */
while(bottom != -1) /* find smallest "unused" frequency of all */
{
tptr = huftable[bottom];
if (tptr->total == 0L) /* skip unused byte values (not all files */
{ /* contain all 256 chars) */
--bottom;
continue;
}
while (tptr->up != NULL) /* find top of tree ending in current leaf */
tptr = tptr->up;
if (tptr->frequency < local_minptr->frequency) /* if current < min */
local_minptr = tptr; /* then min = current */
--bottom; /* keep going... */
}
/* now find second smallest "unused" frequency */
bottom = 255; /* start at the bottom again */
local_second_minptr=&dummy; /* initialize to bigger than 1.000 */
while(bottom != -1)
{
tptr = huftable[bottom];
if (tptr->total == 0L) /* skip unused byte values */
{
--bottom;
continue;
}
while (tptr->up != NULL) /* find top of tree ending in current leaf */
tptr = tptr->up;
if (
(tptr->frequency < local_second_minptr->frequency) /* if current < min2 */
&& (tptr != local_minptr) /* and curr <> min1 */
)
local_second_minptr = tptr; /* then min2 = current */
--bottom; /* keep going... */
}
ptr1 = local_minptr;
ptr2 = local_second_minptr;
}
/*******************************************************
* *
* Dump the contents of the huffman table. *
* also fills huftable with distribution frequencies *
* *
*******************************************************/
void dumptable(void)
{
register short i;
int bits_per_char;
double check_total = 0;
double percent = 0;
double frac;
#ifdef VERBOSE
puts("Totals:\n");
#endif
#ifdef VERBOSE
printf("\n\ntotal chars = %ld.\n", total );
#endif
if (total==0) /* otherwise get divide by zero error */
return;
avg_bits_per_symbol=0;
for (i=0; i< (256); i++)
{
huftable[i]->frequency = frac = (((double)huftable[i]->total))/(double)total;
percent= (double)(100)*frac;
if (frac >0)
bits_per_char = (int)ceil( -log10(frac)/log10((double)2));
#ifdef VERBOSE
printf("\n%3d = %6ld = %%%6.3f => %d bits",huftable[i]->charnum,
huftable[i]->total, percent,
bits_per_char );
#endif
check_total += percent;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -