⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 word.c

📁 用c语言编写用于数据压缩的源程序
💻 C
📖 第 1 页 / 共 2 页
字号:
/******************************************************************************
File: 		word.c

Authors: 	John Carpinelli   (johnfc@ecr.mu.oz.au)
	 	Wayne Salamonsen  (wbs@mundil.cs.mu.oz.au)

Purpose:	Data compression using a word-based model and revised 
		arithmetic coding method.

Based on: 	A. Moffat, R. Neal, I.H. Witten, "Arithmetic Coding Revisted",
		Proc. IEEE Data Compression Conference, Snowbird, Utah, 
		March 1995.


Copyright 1995 John Carpinelli and Wayne Salamonsen, All Rights Reserved.

These programs are supplied free of charge for research purposes only,
and may not sold or incorporated into any commercial product.  There is
ABSOLUTELY NO WARRANTY of any sort, nor any undertaking that they are
fit for ANY PURPOSE WHATSOEVER.  Use them at your own risk.  If you do
happen to find a bug, or have modifications to suggest, please report
the same to Alistair Moffat, alistair@cs.mu.oz.au.  The copyright
notice above and this statement of conditions must remain an integral
part of each and every copy made of these files.

******************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "hashtable.h"
#include "stats.h"
#include "coder.h"
#ifdef SYSV
#include <sys/times.h>
#include <limits.h>
#endif

#define ENCODE          0
#define DECODE          1
#define	WORD		0		/* flag to process a word */
#define NON_WORD	1		/* flag to process a non-word */

#define INIT_CONTEXT	1023		/* initial size of word contexts */
#define CHAR_CONTEXT	256		/* length of character contexts */

#define BUFFER_SIZE	512		/* size of file input buffer */
#define MEGABYTE	(1 << 20)	/* size of one megabyte */

#define DEFAULT_MEM	1		/* default 1 megabyte limit */
#define MIN_MBYTES      1	       	/* minimum allowable memory size */
#define MAX_MBYTES      255		/* maximum no for 8 bit int */
#define MAGICNO         "123w"         	/* Magic Number for files */
#define MAGICNO_LENGTH	4		/* length of magic number */


/* Macro to specify what a word is */
#define ISWORD(c) (((c >= 'A') && (c <= 'Z')) || \
                   ((c >= 'a') && (c <= 'z')) || \
                   ((c >= '0') && (c <= '9')))


/* function prototypes */
void init_word_model(hash_table *tables[], context *words[]);
void purge_word_model(hash_table *tables[], context *words[]);
void init_char_model(context *characters[], context *lengths[]);
void encode_file(unsigned char tempstore[], int templength);
void decode_file();
void read_word(char buffer[], int *buffer_length, int *curr_pos, 
	       string *pWord, int type);
void print_results(int operation);


/* global variables */
int mbytes = DEFAULT_MEM; 	/* stores no. megabytes allowable for mem */
int total_memory;		/* total memory used by all models */
int base_memory;	       	/* memory used by character model */
int verbose = 0;		/* flag set if stats are to be printed */
int purge_counter=0;		/* counts number of memory purges */
unsigned int nWords[2]; 	/* counts number of words */
unsigned int nDistinctWords[2];	/* counts number of distinct words */


/* 
 * parse command line arguments. Decide whether to decode or encode
 * and optional memory size. Also sets filename to stdin if none specified 
 */
int 
main(int argc, char *argv[])
{	
    int i;			/* loop counter */
    int what = ENCODE;		/* flag as to whether to encode or decode */
    unsigned char tempstore[MAGICNO_LENGTH];	/* stores magic no */
    int templength = 0;		/* number of bytes read for magic number */
    int	selected = -1;		/* stores if decode set at command line */
    
    /*
     * parse command line arguments. Sets up whether to decode or encode
     * and optional memory limit. Also replaces stdin with input file 
     */
    for (i = 1; i < argc; ) 
    {
	if (argv[i][0] == '-') 
	{
	    switch(argv[i][1]) 
	    {
	      case 'e':		/* do encode */
		selected = ENCODE;
		i++;
		break;
	      case 'd':		/* do decode */
		selected = DECODE;
		i++;
		break;
	      case 'm':		/* set memory size */
		i++;
		mbytes = atoi(argv[i]);
		i++;
		break;
	      case 'v':		/* set verbose flag to print stats */
		verbose = 1;
		i++;
		break;
	      case 'f':		/* set number of F bits */
		i++;
		f_bits = atoi(argv[i]);
		max_frequency = 1<<f_bits;
		i++;
		break;
	      default:		/* incorrect args */
		fprintf(stderr, 
		   "Usage: %s [-e [-m n] | -d] [-v] [-f n] [file]\n", argv[0]);
		exit(1);
	    }
	}
	else if (freopen(argv[i++], "r", stdin) == (FILE *)NULL) 
	{
	    fprintf(stderr, "%s: cannot read %s\n",
		    argv[0], argv[--i]);
	    exit(1);
	}
    }
    
    /* check if memory limit is within allowable range */ 
    if (mbytes < MIN_MBYTES || mbytes > MAX_MBYTES)
    {
	fprintf(stderr, "memory limit must be between %d and %d\n", 
		MIN_MBYTES, MAX_MBYTES);
	exit(1);
    }
    
    /* check if f_bits is within allowable range */
    if (f_bits < MIN_F_BITS || f_bits > MAX_F_BITS)
    {
	fprintf(stderr, "number of f bits must be between %d and %d\n",
		MIN_F_BITS, MAX_F_BITS);
	exit(1);
    }

    /* Check input file for Magic Number. */
    if (selected != ENCODE)
    {
	templength = fread(tempstore, 1, MAGICNO_LENGTH, stdin);
        bytes_input += templength; 
	if (memcmp(tempstore, MAGICNO, MAGICNO_LENGTH) == 0)
	    what = DECODE;
	else if (selected == DECODE)
	{
	    fprintf(stderr, "Bad Magic Number\n");
	    exit(1);
	}
    }
	
    if (what == ENCODE)					/* do ENCODE */
    {
	/* write magic number to output file */
	fwrite(MAGICNO, 1, MAGICNO_LENGTH, stdout);
	bytes_output += MAGICNO_LENGTH;

	/* store memory limit being used in output */
	putc(mbytes, stdout);
	bytes_output += 1;

	/* store number of f_bits being used in output */
	putc(f_bits, stdout);
	bytes_output += 1;

	encode_file(tempstore, templength);
    }
    else						/* do DECODE */
    {
	/* read memory limit to be used and store in mbytes */
	mbytes = getc(stdin);
        bytes_input += 1;
	
	/* get number of f_bits to be used and store in f_bits */
	f_bits = getc(stdin);
	max_frequency = 1<<f_bits;
        bytes_input++;
	decode_file();
    }
    
    /* statistics section if using verbose flag */
    if (verbose)
	print_results(what);
    return 0;			/* exited cleanly */
}


/*
 *
 * print the results of compressing/decompressing a file
 *
 */
void print_results(int operation)
{
    if (operation == ENCODE)
    {
	fprintf(stderr, 
		"                              words           non-words\n");
	fprintf(stderr, "Words read             : %10u          %10u\n", 
		nWords[0], nWords[1]);
	fprintf(stderr, "Distinct words         : %10u          %10u\n",
		nDistinctWords[0], nDistinctWords[1]);
	fprintf(stderr, "Input file size        : %10u bytes\n", bytes_input);
    }
    fprintf(stderr, "Output file size       : %10u bytes\n", bytes_output);
    if (purge_counter != 0)
	fprintf(stderr, "Memory purges          : %10d times\n", purge_counter);
    if ((operation == ENCODE) && (bytes_input > 0))
	fprintf(stderr, "Compression rate       : %10.3f bpc (%0.2f%%) \n", 
		8.0 * bytes_output / bytes_input, 
		(float)bytes_output/bytes_input*100);

    /* only provide timing details if "times" function is available */
#ifdef 	SYSV
{
    struct tms cpu_usage;
    float cpu_used, comp_rate;

    times(&cpu_usage);    	/* determine the cpu time used */
    cpu_used = ((float) cpu_usage.tms_utime) / sysconf(_SC_CLK_TCK);

    if (cpu_used == 0)
	comp_rate = 0;
    else
    {
        if (operation == ENCODE)
	    comp_rate = ((float) bytes_input) / (1024 * cpu_used);
        else
	    comp_rate = ((float) bytes_output) / (1024 * cpu_used);
    }

    fprintf(stderr, "Compression time       : %10.2f seconds (%0.2f Kb/s)\n",
	    cpu_used, comp_rate);
}
#endif
}

/*
 *
 * call the standard C function realloc after checking that the memory
 * limit isn't exceeded. If limit is exceeded return NULL
 *
 */
void 
*do_realloc(void *ptr, size_t size)
{
    total_memory += size;
    if ((total_memory / MEGABYTE) >= mbytes)
	return NULL;
    else
        return (realloc(ptr, size));
}


/*
 *
 * call the standard C function malloc after checking against the memory
 * limit. If the limit is exceeded return NULL

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -