📄 arith-n.cpp

📁 Arith-N 是可以在命令行指定阶数的 N 阶上下文自适应算术编码通用压缩、解压缩程序
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页

//------------------------------------------------------------
// arith-n.cpp - order-n arithmetic coding module

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "errhand.h"
#include "bitio.h"

// low_count 和 high_count 唯一地定义了在 0 到 1 范围中符号的所在位置
// scale 指 0 到 1 范围内的总量程，即有多少字符要记数
typedef struct
{
	unsigned short int low_count;
	unsigned short int high_count;
	unsigned short int scale;
}SYMBOL;

#define MAXIMUM_SCALE	16383		// maximum allowed frequency count 
#define ESCAPE			256			// the escape symbol
#define DONE			( -1 )		// the output stream empty symbol
#define FLUSH			( -2 )		// the symbol to flush the model

void initialize_options( int argc, char ** argv );
int check_compression( FILE* input, BIT_FILE* output );
void initialize_model( void );
void update_model( int symbol );
int convert_int_to_symbol( int symbol, SYMBOL* s );
void get_symbol_scale( SYMBOL* s);
int convert_symbol_to_int( int count, SYMBOL* s );
void add_character_to_model( int c );
void flush_model( void );
void initialize_arithmetic_decoder( BIT_FILE* stream );
void remove_symbol_from_stream( BIT_FILE* stream, SYMBOL* s );
void initialize_arithmetic_encoder( void );
void encode_symbol( BIT_FILE* stream, SYMBOL* s );
void flush_arithmetic_encoder( BIT_FILE* stream );
short int get_current_count( SYMBOL* s );

char* CompressionName	= "Adaptive order n moder with arithmetic coding";
char* Usage				= "in-file out-file [-o order]\n\n";
int max_order			= 3;

void CompressFile( FILE* input, BIT_FILE* output, int argc, char * argv[] )
{
	SYMBOL s;
	int c;
	int escaped;
	int flush = 0;
	long int text_count = 0;

	initialize_options( argc, argv );
	initialize_model();
	initialize_arithmetic_encoder();
	for(;;)
	{
		if (( ++ text_count & 0xff ) == 0 )
			flush = check_compression( input, output );
		if (!flush)
			c = getc( input );
		else
			c = FLUSH;
		if (c == EOF)
			c = DONE;
		do 
		{
			escaped = convert_int_to_symbol( c, &s );
			encode_symbol( output, &s );
		}while(escaped);
		if ( c == DONE )
			break;
		if ( c == FLUSH )
		{
			flush_model();
			flush = 0;
		}
		update_model( c );
		add_character_to_model( c );
	}
	flush_arithmetic_encoder( output );	
}

void ExpandFile( BIT_FILE* input, FILE* output, int argc, char* argv[])
{
	SYMBOL s;
	int c;
	int count;

	initialize_options( argc, argv );
	initialize_model();
	initialize_arithmetic_decoder( input );
	for (;;)
	{
		do 
		{
			get_symbol_scale( &s );
			count = get_current_count( &s );
			c = convert_symbol_to_int( count, &s );
			remove_symbol_from_stream( input, &s );
		}while( c == ESCAPE );
		if ( c == DONE )
			break;
		if ( c != FLUSH )
			putc( (char) c, output );
		else
			flush_model();
		update_model( c );
		add_character_to_model( c );
	}
}

void initialize_options( int argc, char* argv[] )
{
	while ( argc-- > 0)
	{
		if (strcmp( *argv, "-o" ) == 0)
		{
			argc--;
			max_order = atoi(*++argv);
		}
		else
			printf("Uknown argument on command line: %s\n", *argv);
		argc--;
		argv++;
	}
}

int check_compression( FILE* input, BIT_FILE* output )
{
	static long local_input_marker = 0L;
	static long local_output_marker = 0L;
	long total_input_bytes;
	long total_output_bytes;
	int local_ratio;

	total_input_bytes = ftell( input ) - local_input_marker;
	total_output_bytes = ftell( output->file );
	total_output_bytes -= local_output_marker;
	if (total_output_bytes == 0)
		total_output_bytes = 1;
	local_ratio = (int)(( total_output_bytes * 100 ) / total_input_bytes );
	local_input_marker = ftell( input );
	local_output_marker = ftell( output->file );

	return (local_ratio > 90);
}


//----------------------------------------------------------
// the next few routines contain all of the code and data used to
// perfoem modeling for thes program.

// a context table contains a list of the counts for all symbols
// that have been seen in the defined context.  for example, a 
// context of "Zor" might have only had 2 different characters 
// appear.   't' might have appeared 10 times, and 'l' might have 
// appeared once. these two counts are stored in and array of STATS.
// as new characters are added to a particular contexts, the STATS
// array will grow.  sometimes, the STATS array will shrink 
// after flushing the model.
typedef struct
{
	unsigned char symbol;
	unsigned char counts;
}STATS;


// each context has to have links to higher order contexts. These
// links are used to navigate through the context tables. For example, 
// to find the context table for "ABC", I start at the roder 0 table.
// then find the pointer to the "A" context table by looking through
// the LINKS array.  At that table, we find the "B" link  and go to 
// that table. The process continues until the destination table is 
// found.  The table pointed to by the LINKS array corresponds to the 
// symbol found at the same offset in the STATS table.  The reason that
// LINKS is in a separate structure instead of being combined with 
// STATS is to save space.  All of the leaf context nodes don't need
// next pointers, since they are in the highest order context.  In the 
// leaf nodes, the LINKS array is a NULL pointers.
typedef struct
{
	struct context* next;
}LINKS;


// The CONTEXT structure holds all of the known information about
// a particular context.  The links and stats pointers are discussed 
// immediately above here.  The max_index element gives the maximum
// index that can be applied to the stats or link array. When the 
// table is first created, and stats is set to NULL, max_index si set 
// to -1. As soon as single element is added to stats, max_index is 
// incremented to 0.
//
// The lesser context pointer is a navigational aid. It points to 
// the context that is one less than the current order. For example
// if the current is "ABC", the lesser_context pointer will 
// point to "BC". The reason for maintaining this pointer is that 
// this particular bit of table searching is done frequently, but
// the pointer only needs to be built once, when the context is 
// created.
//
typedef struct context
{
	int max_index;
	LINKS* links;
	STATS* stats;
	struct context* lesser_context;
}CONTEXT;

CONTEXT ** contexts;

// current_order contains the current order of the model. it starts 
// at max_order, and is decremented every time an ESCAPE is sent. it
// will only go down to -1 for normal symbols, but can go to -2 for
// EOF and FLUSH
int current_order;

// this table contains the cumulative totals for the current context.
// because this program is using exclusion, totals has to be calculated 
// every time a context is used. the scoreboard array keeps track of
// symbols that have appeared in higher order models, so that they
// can be excluded from lower order context total calculations.
short int totals[258];
char scoreboard[256];

void update_table( CONTEXT* table, int symbol );
void rescale_table( CONTEXT* table );
void totalize_table( CONTEXT* table );
CONTEXT* shift_to_next_context( CONTEXT* table, int c, int order );
CONTEXT* allocate_next_order_table( CONTEXT* table, int symbol, CONTEXT* lesser_context );
void recursive_flush( CONTEXT* table );

void initialize_model()
{
	int i;
	CONTEXT* null_table;
	CONTEXT* control_table;

	current_order = max_order;
	contexts = (CONTEXT**)calloc(sizeof(CONTEXT*), 10);
	if (contexts == NULL)
		fatal_error("Failure #1: allocating context table!");
	contexts += 2;
	null_table = (CONTEXT*)calloc(sizeof(CONTEXT), 1);
	if (null_table == NULL)
		fatal_error("Failure #2: allocating null table!");
	null_table->max_index = -1;
	contexts[-1] = null_table;
	for (i = 0; i <= max_order; i++)
		contexts[i] = allocate_next_order_table(contexts[i - 1], 0, contexts[i - 1]);
	free((char*)null_table->stats);
	null_table->stats = (STATS*)calloc(sizeof(STATS), 256);
	if(null_table->stats == NULL)
		fatal_error("Failure #3: allocating null table!");
	null_table->max_index = 255;
	for (i = 0; i < 256; i ++)
	{
		null_table->stats[i].symbol = (unsigned char)i;
		null_table->stats[i].counts = 1;
	}

	control_table = (CONTEXT*)calloc(sizeof(CONTEXT), 1);
	if (control_table == NULL)
		fatal_error("Failure #4: allocating control table!");
	control_table->stats = (STATS*)calloc(sizeof(STATS), 2);
	if ( control_table->stats == NULL)
		fatal_error("Failure #5: allocating control table!");
	contexts[-2] = control_table;
	control_table->max_index = 1;
	control_table->stats[0].symbol = -FLUSH;
	control_table->stats[0].counts = 1;
	control_table->stats[1].symbol = -DONE;
	control_table->stats[1].counts = 1;

	for (i = 0; i < 256; i++)
		scoreboard[i] = 0;
}

CONTEXT* allocate_next_order_table( CONTEXT* table, int symbol, CONTEXT* lesser_context )
{
	CONTEXT* new_table;
	int i;
	unsigned int new_size;

	for ( i = 0; i <= table->max_index; i++)
		if (table->stats[i].symbol == ( unsigned char )symbol)
			break;
	if ( i > table->max_index )
	{
		table->max_index++;
		new_size = sizeof(LINKS);
		new_size *= table->max_index + 1;
		if (table->links == NULL)
			table->links = (LINKS*)calloc(new_size, 1);
		else
			table->links = (LINKS*)realloc((char*)table->links, new_size);
		new_size = sizeof(STATS);
		new_size *= table->max_index + 1;
		if (table->stats == NULL)
			table->stats = (STATS*)calloc(new_size, 1);
		else
			table->stats = (STATS*)realloc((char*)table->stats, new_size);
		if (table->links == NULL)
			fatal_error("Failure #6: allocating new talbe");
		if (table->stats == NULL)
			fatal_error("Failure #7: allocating new table");
		table->stats[i].symbol = (unsigned char)symbol;
		table->stats[i].counts = 0;
	}
	new_table = (CONTEXT*)calloc(sizeof(CONTEXT), 1);
	if (new_table == NULL)
		fatal_error("Failure #8: allocating new table");
	new_table->max_index = -1;
	table->links[i].next = new_table;
	new_table->lesser_context = lesser_context;
	return (new_table);
}

// the routine is called to increment the counts for the current 
// contexts. It is called after a character has been encoded or 
// decoded. All it does is call update_table for each of the 
// current contexts, which does the work of incrementing the count.
// This particular version of update_model() practices update exclusion.
// which means that if lower order models weren't used to encode 
// or decode the character, they don't get their counts updated.
// this seems to improve compression performance quite a bit.
// to disable update exclusion, the loop would be changed to run 
// from 0 to max_order, instead of current_order to max_order
void update_model( int symbol )
{
	int i;
	int local_order;

	if (current_order < 0)
		local_order = 0;
	else
		local_order = current_order;
	if (symbol >= 0)
	{
		while ( local_order <= max_order )
		{
			if (symbol >= 0)
				update_table( contexts[local_order], symbol );
			local_order++;
		}
	}
	current_order = max_order;
	for( i = 0; i < 256; i++ )
		scoreboard[i] = 0;
}

void update_table( CONTEXT* table, int symbol )
{
	int i;
	int index;
	unsigned char temp;
	CONTEXT* temp_ptr;
	unsigned int new_size;

	// first, find the symbol in the apropriate context table. The first
	// symbol in the table is the most active. so start there.
	index = 0;
	while( index <= table->max_index && 
			table->stats[index].symbol != (unsigned char)symbol )
		index++;
	if ( index > table->max_index )
	{
		table->max_index++;
		new_size = sizeof(LINKS);
		new_size *= table->max_index + 1;
		if (current_order < max_order)
		{
			if (table->max_index == 0)
				table->links = (LINKS*)calloc(new_size, 1);
12 下一页
💿 文件大小 13 K
👤 上传用户 a195767236
📂 所属分类压缩解压
🏷️ 相关标签

#Arith-N #命令行 #算术编码 #程序
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -