📄 arith-n.cpp
字号:
//------------------------------------------------------------
// arith-n.cpp - order-n arithmetic coding module
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "errhand.h"
#include "bitio.h"
// low_count 和 high_count 唯一地定义了在 0 到 1 范围中符号的所在位置
// scale 指 0 到 1 范围内的总量程,即有多少字符要记数
typedef struct
{
unsigned short int low_count;
unsigned short int high_count;
unsigned short int scale;
}SYMBOL;
#define MAXIMUM_SCALE 16383 // maximum allowed frequency count
#define ESCAPE 256 // the escape symbol
#define DONE ( -1 ) // the output stream empty symbol
#define FLUSH ( -2 ) // the symbol to flush the model
void initialize_options( int argc, char ** argv );
int check_compression( FILE* input, BIT_FILE* output );
void initialize_model( void );
void update_model( int symbol );
int convert_int_to_symbol( int symbol, SYMBOL* s );
void get_symbol_scale( SYMBOL* s);
int convert_symbol_to_int( int count, SYMBOL* s );
void add_character_to_model( int c );
void flush_model( void );
void initialize_arithmetic_decoder( BIT_FILE* stream );
void remove_symbol_from_stream( BIT_FILE* stream, SYMBOL* s );
void initialize_arithmetic_encoder( void );
void encode_symbol( BIT_FILE* stream, SYMBOL* s );
void flush_arithmetic_encoder( BIT_FILE* stream );
short int get_current_count( SYMBOL* s );
char* CompressionName = "Adaptive order n moder with arithmetic coding";
char* Usage = "in-file out-file [-o order]\n\n";
int max_order = 3;
void CompressFile( FILE* input, BIT_FILE* output, int argc, char * argv[] )
{
SYMBOL s;
int c;
int escaped;
int flush = 0;
long int text_count = 0;
initialize_options( argc, argv );
initialize_model();
initialize_arithmetic_encoder();
for(;;)
{
if (( ++ text_count & 0xff ) == 0 )
flush = check_compression( input, output );
if (!flush)
c = getc( input );
else
c = FLUSH;
if (c == EOF)
c = DONE;
do
{
escaped = convert_int_to_symbol( c, &s );
encode_symbol( output, &s );
}while(escaped);
if ( c == DONE )
break;
if ( c == FLUSH )
{
flush_model();
flush = 0;
}
update_model( c );
add_character_to_model( c );
}
flush_arithmetic_encoder( output );
}
void ExpandFile( BIT_FILE* input, FILE* output, int argc, char* argv[])
{
SYMBOL s;
int c;
int count;
initialize_options( argc, argv );
initialize_model();
initialize_arithmetic_decoder( input );
for (;;)
{
do
{
get_symbol_scale( &s );
count = get_current_count( &s );
c = convert_symbol_to_int( count, &s );
remove_symbol_from_stream( input, &s );
}while( c == ESCAPE );
if ( c == DONE )
break;
if ( c != FLUSH )
putc( (char) c, output );
else
flush_model();
update_model( c );
add_character_to_model( c );
}
}
void initialize_options( int argc, char* argv[] )
{
while ( argc-- > 0)
{
if (strcmp( *argv, "-o" ) == 0)
{
argc--;
max_order = atoi(*++argv);
}
else
printf("Uknown argument on command line: %s\n", *argv);
argc--;
argv++;
}
}
int check_compression( FILE* input, BIT_FILE* output )
{
static long local_input_marker = 0L;
static long local_output_marker = 0L;
long total_input_bytes;
long total_output_bytes;
int local_ratio;
total_input_bytes = ftell( input ) - local_input_marker;
total_output_bytes = ftell( output->file );
total_output_bytes -= local_output_marker;
if (total_output_bytes == 0)
total_output_bytes = 1;
local_ratio = (int)(( total_output_bytes * 100 ) / total_input_bytes );
local_input_marker = ftell( input );
local_output_marker = ftell( output->file );
return (local_ratio > 90);
}
//----------------------------------------------------------
// the next few routines contain all of the code and data used to
// perfoem modeling for thes program.
// a context table contains a list of the counts for all symbols
// that have been seen in the defined context. for example, a
// context of "Zor" might have only had 2 different characters
// appear. 't' might have appeared 10 times, and 'l' might have
// appeared once. these two counts are stored in and array of STATS.
// as new characters are added to a particular contexts, the STATS
// array will grow. sometimes, the STATS array will shrink
// after flushing the model.
typedef struct
{
unsigned char symbol;
unsigned char counts;
}STATS;
// each context has to have links to higher order contexts. These
// links are used to navigate through the context tables. For example,
// to find the context table for "ABC", I start at the roder 0 table.
// then find the pointer to the "A" context table by looking through
// the LINKS array. At that table, we find the "B" link and go to
// that table. The process continues until the destination table is
// found. The table pointed to by the LINKS array corresponds to the
// symbol found at the same offset in the STATS table. The reason that
// LINKS is in a separate structure instead of being combined with
// STATS is to save space. All of the leaf context nodes don't need
// next pointers, since they are in the highest order context. In the
// leaf nodes, the LINKS array is a NULL pointers.
typedef struct
{
struct context* next;
}LINKS;
// The CONTEXT structure holds all of the known information about
// a particular context. The links and stats pointers are discussed
// immediately above here. The max_index element gives the maximum
// index that can be applied to the stats or link array. When the
// table is first created, and stats is set to NULL, max_index si set
// to -1. As soon as single element is added to stats, max_index is
// incremented to 0.
//
// The lesser context pointer is a navigational aid. It points to
// the context that is one less than the current order. For example
// if the current is "ABC", the lesser_context pointer will
// point to "BC". The reason for maintaining this pointer is that
// this particular bit of table searching is done frequently, but
// the pointer only needs to be built once, when the context is
// created.
//
typedef struct context
{
int max_index;
LINKS* links;
STATS* stats;
struct context* lesser_context;
}CONTEXT;
CONTEXT ** contexts;
// current_order contains the current order of the model. it starts
// at max_order, and is decremented every time an ESCAPE is sent. it
// will only go down to -1 for normal symbols, but can go to -2 for
// EOF and FLUSH
int current_order;
// this table contains the cumulative totals for the current context.
// because this program is using exclusion, totals has to be calculated
// every time a context is used. the scoreboard array keeps track of
// symbols that have appeared in higher order models, so that they
// can be excluded from lower order context total calculations.
short int totals[258];
char scoreboard[256];
void update_table( CONTEXT* table, int symbol );
void rescale_table( CONTEXT* table );
void totalize_table( CONTEXT* table );
CONTEXT* shift_to_next_context( CONTEXT* table, int c, int order );
CONTEXT* allocate_next_order_table( CONTEXT* table, int symbol, CONTEXT* lesser_context );
void recursive_flush( CONTEXT* table );
void initialize_model()
{
int i;
CONTEXT* null_table;
CONTEXT* control_table;
current_order = max_order;
contexts = (CONTEXT**)calloc(sizeof(CONTEXT*), 10);
if (contexts == NULL)
fatal_error("Failure #1: allocating context table!");
contexts += 2;
null_table = (CONTEXT*)calloc(sizeof(CONTEXT), 1);
if (null_table == NULL)
fatal_error("Failure #2: allocating null table!");
null_table->max_index = -1;
contexts[-1] = null_table;
for (i = 0; i <= max_order; i++)
contexts[i] = allocate_next_order_table(contexts[i - 1], 0, contexts[i - 1]);
free((char*)null_table->stats);
null_table->stats = (STATS*)calloc(sizeof(STATS), 256);
if(null_table->stats == NULL)
fatal_error("Failure #3: allocating null table!");
null_table->max_index = 255;
for (i = 0; i < 256; i ++)
{
null_table->stats[i].symbol = (unsigned char)i;
null_table->stats[i].counts = 1;
}
control_table = (CONTEXT*)calloc(sizeof(CONTEXT), 1);
if (control_table == NULL)
fatal_error("Failure #4: allocating control table!");
control_table->stats = (STATS*)calloc(sizeof(STATS), 2);
if ( control_table->stats == NULL)
fatal_error("Failure #5: allocating control table!");
contexts[-2] = control_table;
control_table->max_index = 1;
control_table->stats[0].symbol = -FLUSH;
control_table->stats[0].counts = 1;
control_table->stats[1].symbol = -DONE;
control_table->stats[1].counts = 1;
for (i = 0; i < 256; i++)
scoreboard[i] = 0;
}
CONTEXT* allocate_next_order_table( CONTEXT* table, int symbol, CONTEXT* lesser_context )
{
CONTEXT* new_table;
int i;
unsigned int new_size;
for ( i = 0; i <= table->max_index; i++)
if (table->stats[i].symbol == ( unsigned char )symbol)
break;
if ( i > table->max_index )
{
table->max_index++;
new_size = sizeof(LINKS);
new_size *= table->max_index + 1;
if (table->links == NULL)
table->links = (LINKS*)calloc(new_size, 1);
else
table->links = (LINKS*)realloc((char*)table->links, new_size);
new_size = sizeof(STATS);
new_size *= table->max_index + 1;
if (table->stats == NULL)
table->stats = (STATS*)calloc(new_size, 1);
else
table->stats = (STATS*)realloc((char*)table->stats, new_size);
if (table->links == NULL)
fatal_error("Failure #6: allocating new talbe");
if (table->stats == NULL)
fatal_error("Failure #7: allocating new table");
table->stats[i].symbol = (unsigned char)symbol;
table->stats[i].counts = 0;
}
new_table = (CONTEXT*)calloc(sizeof(CONTEXT), 1);
if (new_table == NULL)
fatal_error("Failure #8: allocating new table");
new_table->max_index = -1;
table->links[i].next = new_table;
new_table->lesser_context = lesser_context;
return (new_table);
}
// the routine is called to increment the counts for the current
// contexts. It is called after a character has been encoded or
// decoded. All it does is call update_table for each of the
// current contexts, which does the work of incrementing the count.
// This particular version of update_model() practices update exclusion.
// which means that if lower order models weren't used to encode
// or decode the character, they don't get their counts updated.
// this seems to improve compression performance quite a bit.
// to disable update exclusion, the loop would be changed to run
// from 0 to max_order, instead of current_order to max_order
void update_model( int symbol )
{
int i;
int local_order;
if (current_order < 0)
local_order = 0;
else
local_order = current_order;
if (symbol >= 0)
{
while ( local_order <= max_order )
{
if (symbol >= 0)
update_table( contexts[local_order], symbol );
local_order++;
}
}
current_order = max_order;
for( i = 0; i < 256; i++ )
scoreboard[i] = 0;
}
void update_table( CONTEXT* table, int symbol )
{
int i;
int index;
unsigned char temp;
CONTEXT* temp_ptr;
unsigned int new_size;
// first, find the symbol in the apropriate context table. The first
// symbol in the table is the most active. so start there.
index = 0;
while( index <= table->max_index &&
table->stats[index].symbol != (unsigned char)symbol )
index++;
if ( index > table->max_index )
{
table->max_index++;
new_size = sizeof(LINKS);
new_size *= table->max_index + 1;
if (current_order < max_order)
{
if (table->max_index == 0)
table->links = (LINKS*)calloc(new_size, 1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -