⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 huffman-compress.c

📁 自适应哈弗曼(adaptive huffman)压缩和解压程序
💻 C
字号:
/*
 * File:      huffman-compress.c
 * Author:    JD, with a bit of code pirated from a 2103 student
 * Date:      2008-02-03
 * Version:   1.1
 *
 * Description:   This program reads in either stdin or the given file
 *	argument and compresses this data using Huffman compression.  The
 *	output is written to stdout (if the input was stdin) and is
 *	otherwise written to the same file name as the original with ".huf"
 *	appended.
 *
 *	If the first argument is "-a", the output codes (but not the
 *	overhead data) are written in ASCII, one code per line.
 */


#include    <stdio.h>
#include    <stdlib.h>
#include    <stdint.h>	    /* C99 */
#include    <ctype.h>
#include    <string.h>


#define	    NUM_SYMBOLS	    256
#define	    NUM_ITEMS       (2 * NUM_SYMBOLS - 1)
#define	    MAX_NUM_MERGES  (NUM_SYMBOLS - 1)
#define	    NO_MORE         (-1)
#define     NO_PARENT	    (-1)


typedef struct
{
    uint32_t count;
    short parent_index;
    short code_length;
    char code[MAX_NUM_MERGES];
    char merged;
    char left_or_right;		    /* left or right child of parent? */
} Item_t;


int get_smallest(Item_t items[], int length);
void write_bit(int bit);
void flush_bits();
int parse_args(int argc, char * argv[], int * ascii_output);
void write_header(Item_t items[], int ascii_output);



void
usage(const char * progname)
{
    fprintf(stderr, "Usage: %s [-a] [infile]\n", progname);
}



int
main(int argc, char * argv[])
{
    int input, i, j, next_item;
    int small1, small2;
    Item_t items[NUM_ITEMS];
    int ascii_output = 0;

    if (parse_args(argc, argv, &ascii_output) != 0)
	return EXIT_FAILURE;

    for (i = 0; i < NUM_ITEMS; i++)
    {
	items[i].count = 0;
	items[i].merged = 0;
	items[i].parent_index = NO_PARENT;      
    }

    /* Read the input */
    while ((input = getchar()) != EOF)
	items[input].count++;

    write_header(items, ascii_output);

    /* Merge the items */
    next_item = NUM_SYMBOLS;
    while (1)
    {
	small1 = get_smallest(items, NUM_ITEMS);
	if (small1 == NO_MORE)
	    break;
	items[small1].merged = 1;

	small2 = get_smallest(items, NUM_ITEMS);
	if (small2 == NO_MORE)
	    break;
	items[small2].merged = 1;

	items[small1].left_or_right = 0;
	items[small1].parent_index = next_item;

	items[small2].left_or_right = 1;
	items[small2].parent_index = next_item;

	items[next_item].count = items[small1].count + items[small2].count;
	next_item++;
    }

    /* Compute the codes */
    for (i = 0; i < NUM_SYMBOLS; i++)
    {
	char rev_code[MAX_NUM_MERGES];
	int item_index;

	if (items[i].count == 0)
	    continue;

	for (j = 0, item_index = i;
	     items[item_index].parent_index != NO_PARENT; j++)
	{
	    rev_code[j] = items[item_index].left_or_right;
	    item_index = items[item_index].parent_index;
	}

	items[i].code_length = 0;
	j--;
	while (j >= 0)
	    items[i].code[items[i].code_length++] = rev_code[j--];

#ifdef DEBUG
	fprintf(stderr, "Code for char %d is", i);
	for (j = 0; j < items[i].code_length; j++)
	    fprintf(stderr, " %d", items[i].code[j]);
	fprintf(stderr, "\n");
#endif
    }

    /* Output codes for each letter */
    rewind(stdin);
    if (ascii_output)
	while ((input = getchar()) != EOF)
	{
	    for (i = 0; i < items[input].code_length; i++)
		printf("%d", items[input].code[i]);
	    printf("\n");
	}
    else
    {
	while ((input = getchar()) != EOF)
	    for (i = 0; i < items[input].code_length; i++)
		write_bit(items[input].code[i]);
	flush_bits();
    }

    return EXIT_SUCCESS;
}



/*
 * Function:  parse_args
 * Purpose:   Examine the arguments and freopen() streams as necessary.
 * Inputs:    argc, argv and a pointer to an int
 * Returns:   0 on success, non-zero on failure
 *
 * Error checking: enough...
 * Sample usage: parse_args(argc, argv, &use_ascii_output);
 */

int
parse_args(int argc, char * argv[], int * ascii_output)
{
    char * progname = argv[0];

    if (argc > 1 && 0 == strcmp(argv[1], "-a"))
    {
	*ascii_output = 1;
	argc--;
	argv++;
    }
    if (argc > 2)
    {
	usage(argv[0]);
	return 1;
    }
    if (argc > 1)
    {
	char * outfile;

	if (freopen(argv[1], "r", stdin) == NULL)
	{
	    fprintf(stderr, "%s: unable to open input file '%s' for reading\n",
		    progname, argv[1]);
	    return 2;
	}
	outfile = malloc(strlen(argv[1] + 5));
	if (outfile == NULL)
	{
	    fprintf(stderr, "%s: unable to allocate memory; quitting\n",
		    progname);
	    return 3;
	}
	strcpy(outfile, argv[1]);
	strcat(outfile, ".huf");
	if (freopen(outfile, "w", stdout) == NULL)
	{
	    fprintf(stderr, "%s: unable to open output file '%s' for writing\n",
		    progname, outfile);
	    return 4;
	}
    }

    return 0;
}



/*
 * Function:  write_header
 * Purpose:   Write out the header, which precedes all the codes.
 * Inputs:    items[] and ascii_output
 * Returns:   nothing
 *
 * Error checking: none
 * Sample usage: write_header(items, ascii_output);
 *
 * The header starts with two ints which specify the min and max symbol
 * values.  Then it follows this with a 'b', 'h' or 'w' according to
 * whether the counts are output with unsigned bytes, halfwords or words.
 * Then the counts themselves are output in binary.
 */

void
write_header(Item_t items[], int ascii_output)
{
    int i, min_symb, max_symb, max_count;

    i = 0;
    for (i = 0; items[i].count == 0 && i < NUM_SYMBOLS; i++)
	;
    min_symb = i;
    max_symb = i;
    max_count = items[i].count;
    for (i++; i < NUM_SYMBOLS; i++)
    {
	if (items[i].count > 0)
	{
	    if (items[i].count > max_count)
		max_count = items[i].count;
	    max_symb = i;
	}
    }

    fwrite(&min_symb, sizeof(min_symb), 1, stdout);
    fwrite(&max_symb, sizeof(max_symb), 1, stdout);
    if (max_count < 256)
    {
	unsigned char byte;

	fputc((unsigned)'b', stdout);
	for (i = min_symb; i <= max_symb; i++)
	{
	    byte = items[i].count;
	    fwrite(&byte, sizeof(byte), 1, stdout);
	}
    }
    else if (max_count < 65536)
    {
	uint16_t halfword;

	fputc((unsigned)'h', stdout);
	for (i = min_symb; i <= max_symb; i++)
	{
	    halfword = items[i].count;
	    fwrite(&halfword, sizeof(halfword), 1, stdout);
	}
    }
    else
    {
	uint32_t word;

	fputc((unsigned)'w', stdout);
	for (i = min_symb; i <= max_symb; i++)
	{
	    word = items[i].count;
	    fwrite(&word, sizeof(word), 1, stdout);
	}
    }

    if (ascii_output)
	printf("\n");
}



/*
 * Function:  get_smallest
 * Purpose:   Gets the un-merged item with the smallest count.
 * Inputs:    items[] and length
 * Returns:   an int >= 0 or NO_MORE (NO_MORE if all items are merged)
 *
 * Error checking: none
 * Sample usage: get_smallest(items, MAX);
 */

int
get_smallest(Item_t items[], int length)
{
    int smallest = NO_MORE;      /* Smallest item index */
    int i;

    for (i = 0; i < length; i++)
    {
	if (items[i].merged || items[i].count == 0)
	    continue;

	if (smallest == NO_MORE)
	    smallest = i;
	else if (items[i].count < items[smallest].count)
	    smallest = i;
    }

    return smallest;
}



/*
 * Declare two global variables here, so they can only be used by the
 * functions below here.
 *
 * Note for 2103 students:
 * These two functions need to share the two variables.  We could pass
 * these as parameters to the functions, or combine the two functions into
 * one so that the global variables would be static inside the function.
 * To make that latter way work, we would need another parameter to the
 * write_bit() function to tell it to flush the last few bits, and I like
 * that design less than the way I did it.
 */

static unsigned char byte = 0;
static int bits_used = 0;


void
write_bit(int bit)
{
    byte <<= 1;
    byte |= bit;
    if (++bits_used == 8)
    {
	fwrite(&byte, sizeof(byte), 1, stdout);
	byte = 0;
	bits_used = 0;	
    }
}


void
flush_bits()
{
    if (bits_used > 0)
    {
	byte <<= 8 - bits_used;
	fwrite(&byte, sizeof(byte), 1, stdout);
	byte = 0;
	bits_used = 0;	
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -