📄 huffman-decompress.c

📁 自适应哈弗曼(adaptive huffman)压缩和解压程序
💻 C
字号:
/*
 * File:      huffman-decompress.c
 * Author:    JD, with a bit of code pirated from a 2103 student
 * Date:      2008-02-03
 * Version:   1.1
 *
 * Description: decompress the output of the huffman-compress program
 *	to obtain a copy of the original file.  This program reads in
 *	either stdin or the given file argument and decompresses this
 *	data.  The output is written to stdout (if the input was stdin) and
 *	is otherwise written to the same file name as the original with
 *	".dehuf" appended.
 *
 *	If the first argument is "-a", the input codes (but not the
 *	overhead data) are assumed to be written in ASCII, one code per
 *	line.
 */


#include    <stdio.h>
#include    <stdlib.h>
#include    <stdint.h>	    /* C99 */
#include    <ctype.h>
#include    <string.h>


#define	    NUM_SYMBOLS	    256
#define	    NUM_ITEMS       (2 * NUM_SYMBOLS - 1)
#define	    MAX_NUM_MERGES  (NUM_SYMBOLS - 1)
#define	    NO_MORE         (-1)
#define     NO_PARENT	    (-1)
#define     NO_CHILD	    (-1)


typedef struct
{
    unsigned int count;
    short parent_index;
    short left_child_index, right_child_index;
    short code_length;
    char code[MAX_NUM_MERGES];
    char merged;
    char left_or_right;		    /* left or right child of parent? */
} Item_t;


int read_bit(int * bit);
int get_smallest(Item_t items[], int length);
int parse_args(int argc, char * argv[], int * ascii_input);
int read_header(Item_t items[], unsigned long * total_chars, int ascii_input);



void
usage(const char * progname)
{
    fprintf(stderr, "Usage: %s [-a] [infile]\n", progname);
}



int
main(int argc, char * argv[])
{
    int i, next_item, root_index, this_node, next_bit;
    int small1, small2;
    unsigned long total_chars;
    Item_t items[NUM_ITEMS];
    int ascii_input = 0;

    if (parse_args(argc, argv, &ascii_input) != 0)
	return EXIT_FAILURE;

    for (i = 0; i < NUM_ITEMS; i++)
    {
	items[i].count = 0;
	items[i].merged = 0;
	items[i].parent_index = NO_PARENT;      
	items[i].left_child_index = NO_CHILD;      
	items[i].right_child_index = NO_CHILD;      
    }

    if (read_header(items, &total_chars, ascii_input) != 0)
    {
	fprintf(stderr, "%s: invalid header in compressed input\n", argv[0]);
	return EXIT_FAILURE;
    }

#ifdef DEBUG
    /* Output the counts */
    for (i = 0; i < NUM_SYMBOLS; i++)
	printf("count %d: %d\n", i, items[i].count);
#endif

    /* Merge the items */
    next_item = NUM_SYMBOLS;
    while (1)
    {
	small1 = get_smallest(items, NUM_ITEMS);
	if (small1 == NO_MORE)
	    break;
	items[small1].merged = 1;

	small2 = get_smallest(items, NUM_ITEMS);
	if (small2 == NO_MORE)
	    break;
	items[small2].merged = 1;

	items[small1].left_or_right = 0;
	items[small1].parent_index = next_item;

	items[small2].left_or_right = 1;
	items[small2].parent_index = next_item;

	items[next_item].count = items[small1].count + items[small2].count;
	items[next_item].left_child_index = small1;
	items[next_item].right_child_index = small2;
	next_item++;
    }

    if (next_item == NUM_SYMBOLS)
    {
	/* There was only one symbol in the input! Special case! */
	/* fprintf(stderr, "small1 is %d\n", small1); */
	for (i = 0; i < items[small1].count; i++)
	    putchar(small1);
	
	return EXIT_SUCCESS;
    }

    root_index = next_item - 1;
    this_node = root_index;
    if (ascii_input)
    {
	getchar();  /* Toss away '\n' which follows the counts */
	while ((next_bit = getchar()) != EOF && total_chars > 0)
	{
#ifdef DEBUG
	    printf("%d ", next_bit);
#endif
	    if (next_bit == '0')
		this_node = items[this_node].left_child_index;
	    else
		this_node = items[this_node].right_child_index;

	    if (items[this_node].left_child_index == NO_CHILD)
	    {
		putchar(this_node);
		this_node = root_index;
		total_chars--;
		getchar();  /* Toss away '\n' which follows the code */
	    }
	}
    }
    else
    {
	while (read_bit(&next_bit) == 0 && total_chars > 0)
	{
#ifdef DEBUG
	    printf("%d ", next_bit);
#endif
	    if (next_bit == 0)
		this_node = items[this_node].left_child_index;
	    else
		this_node = items[this_node].right_child_index;

	    if (items[this_node].left_child_index == NO_CHILD)
	    {
		putchar(this_node);
		this_node = root_index;
		total_chars--;
	    }
	}
    }

    return EXIT_SUCCESS;
}



/*
 * Function:  parse_args
 * Purpose:   Examine the arguments and freopen() streams as necessary.
 * Inputs:    argc, argv and a pointer to an int
 * Returns:   0 on success, non-zero on failure
 *
 * Error checking: enough...
 * Sample usage: parse_args(argc, argv, &use_ascii_input);
 */

int
parse_args(int argc, char * argv[], int * ascii_input)
{
    char * progname = argv[0];

    if (argc > 1 && 0 == strcmp(argv[1], "-a"))
    {
	*ascii_input = 1;
	argc--;
	argv++;
    }
    if (argc > 2)
    {
	usage(argv[0]);
	return 1;
    }
    if (argc > 1)
    {
	char * outfile;

	if (freopen(argv[1], "r", stdin) == NULL)
	{
	    fprintf(stderr, "%s: unable to open input file '%s' for reading\n",
		    progname, argv[1]);
	    return 2;
	}
	outfile = malloc(strlen(argv[1] + 7));
	if (outfile == NULL)
	{
	    fprintf(stderr, "%s: unable to allocate memory; quitting\n",
		    progname);
	    return 3;
	}
	strcpy(outfile, argv[1]);
	strcat(outfile, ".dehuf");
	if (freopen(outfile, "w", stdout) == NULL)
	{
	    fprintf(stderr, "%s: unable to open output file '%s' for writing\n",
		    progname, outfile);
	    return 4;
	}
    }

    return 0;
}



/*
 * Function:  get_smallest
 * Purpose:   Gets the un-merged item with the smallest count.
 * Inputs:    items[] and length
 * Returns:   0 on success, non-0 otherwise
 * Assumption: item counts are initialized to 0
 *
 * Error checking: none
 * Sample usage: get_smallest(items, MAX);
 */

int
read_header(Item_t items[], unsigned long * total_chars, int ascii_input)
{
    int i, min_symb, max_symb;
    unsigned char data_type;

    *total_chars = 0;
    fread(&min_symb, sizeof(min_symb), 1, stdin);
    fread(&max_symb, sizeof(max_symb), 1, stdin);
    data_type = fgetc(stdin);
    if (data_type == 'b')
    {
	unsigned char byte;

	for (i = min_symb; i <= max_symb; i++)
	{
	    fread(&byte, sizeof(byte), 1, stdin);
	    items[i].count = byte;
	    *total_chars += items[i].count;
	}
    }
    else if (data_type == 'h')
    {
	uint16_t halfword;

	for (i = min_symb; i <= max_symb; i++)
	{
	    fread(&halfword, sizeof(halfword), 1, stdin);
	    items[i].count = halfword;
	    *total_chars += items[i].count;
	}
    }
    else if (data_type == 'w')
    {
	uint32_t word;

	for (i = min_symb; i <= max_symb; i++)
	{
	    fread(&word, sizeof(word), 1, stdin);
	    items[i].count = word;
	    *total_chars += items[i].count;
	}
    }
    else
	return 1;

    return 0;
}



/*
 * Function:  get_smallest
 * Purpose:   Gets the un-merged item with the smallest count.
 * Inputs:    items[] and length
 * Returns:   an int >= 0 or NO_MORE (NO_MORE if all items are merged)
 *
 * Error checking: none
 * Sample usage: get_smallest(items, MAX);
 */

int
get_smallest(Item_t items[], int length)
{
    int smallest = NO_MORE;      /* Smallest item index */
    int i;

    for (i = 0; i < length; i++)
    {
	if (items[i].merged || items[i].count == 0)
	    continue;

	if (smallest == NO_MORE)
	    smallest = i;
	else if (items[i].count < items[smallest].count)
	    smallest = i;
    }

    return smallest;
}


static unsigned char byte = 0;
static int bits_available = 0;

int read_bit(int * bit)
{
    if (bits_available == 0)
    {
	if (fread(&byte, sizeof(byte), 1, stdin) != 1)
	    return 1;
	bits_available = 8;
    }

    *bit = byte >> 7;
    byte <<= 1;
    bits_available--;

    return 0;
}
💿 文件大小 7 K
👤 上传用户 a415834839
📂 所属分类压缩解压
📄 代码行数 350 行
💻 语言类型 C语言
🏷️ 相关标签

#adaptive #huffman #解压 #程序
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -