📄 arith.pas
字号:
unit arith;
interface
{*******************************************************************************
arith压缩算法单元
源文件出自王咏刚先生的《笨笨数据压缩教程》中的arith.c
参见 http://www.contextfree.net/wangyg/
由崔东伟修改为pascal单元 Email:cuidongwei@yeah.net
由于该算法随着阶数的增加会动态分配(和释放)大量的内存,所以管理这些内存确是一
件令人头疼的事情。虽然可以找到一些方法,但这里和源文件都没有做清理内存的操作,
由操作系统在退出的时候来清理它。所以该算法每次运行最好只使用一次。编译为控制台
程序也许是使用该算法的做好结果。
如果需要原始的算法,请参照arith.c
*******************************************************************************}
uses
Windows, Messages, SysUtils, Classes,forms;
const
MAXIMUM_SCALE= 16383; // maximum allowed frequency count
ESCAPE = 256; // the escape symbol
DONE = -1 ; // the output stream empty symbol
CFLUSH = -2 ; // the symbol to flush the model
EOF = -1;
max_order :integer= 3;
procedure ArithCompress(InStr, OutStr: TStream);
procedure ArithExpand(InStr, OutStr: TStream);
implementation
type
// low_count 和 high_count 唯一地定义了在 0 到 1 范围中符号的所在位置
// scale 指 0 到 1 范围内的总量程,即有多少字符要记数
TSYMBOL= packed record
low_count:word;
high_count:word;
scale:word;
end;
Pbit_file=^bit_file;
bit_file =record
Afile:TStream;
mask:byte;
rack:integer;
pacifier_counter:integer; // 工作计数
// 初始值为0,每输出一位递增1,每2048位就
// 在标准输出写一个 '.' ,以表示工作正在进行
end;
PSTATS=^STATSARRAY;
STATS=packed record
symbol:byte;
counts:byte;
end;
STATSARRAY=array[0..255] of STATS;
PLINKS=^LINKSARRAY;
LINKS= packed record
next:pointer;
end;
LINKSARRAY=array[0..255] of LINKS;
pcontext=^context;
CONTEXT=packed record
max_index:integer;
links:PLINKS;
stats:PSTATS;
lesser_context:Pcontext;
end;
var
contexts:array[-2..7] of PCONTEXT;
// current_order contains the current order of the model. it starts
// at max_order, and is decremented every time an ESCAPE is sent. it
// will only go down to -1 for normal symbols, but can go to -2 for
// EOF and FLUSH
current_order:integer;
// this table contains the cumulative totals for the current context.
// because this program is using exclusion, totals has to be calculated
// every time a context is used. the scoreboard array keeps track of
// symbols that have appeared in higher order models, so that they
// can be excluded from lower order context total calculations.
totals:array[0..257] of smallint;
scoreboard:array[0..255] of char;
// local_input_marker:int64=0;
// local_output_marker:int64=0;
function check_compression(input:TStream; output:Pbit_file):integer;forward;
procedure initialize_model();forward;
function allocate_next_order_table( table:PCONTEXT; symbol:integer;lesser_context:PCONTEXT):PCONTEXT;forward;
procedure update_model(symbol:integer);forward;
procedure update_table(table:PCONTEXT;symbol:integer);forward;
function convert_int_to_symbol(c:integer;var s:TSYMBOL):integer;forward;
procedure get_symbol_scale( var s:TSYMBOL);forward;
function convert_symbol_to_int(count:integer;var s:TSYMBOL):integer;forward;
procedure add_character_to_model( c:integer );forward;
function shift_to_next_context(table:PCONTEXT; c, order:integer):PCONTEXT;forward;
procedure rescale_table(table:PCONTEXT);forward;
procedure totalize_table(table:PCONTEXT);forward;
procedure recursive_flush( table:PCONTEXT );forward;
procedure flush_model();forward;
procedure initialize_arithmetic_encoder();forward;
procedure flush_arithmetic_encoder(stream:Pbit_file);forward;
procedure encode_symbol(stream:Pbit_file;var s:TSYMBOL);forward;
function get_current_count(var s:TSYMBOL):smallint;forward;
procedure initialize_arithmetic_decoder( stream:Pbit_file );forward;
procedure remove_symbol_from_stream( stream:Pbit_file;var s:TSYMBOL);forward;
function getc(f:TStream):integer;
var
b:byte;
begin
if f.Read(b,1)<>1 then
result:=EOF
else
result:=b;
end;
function putc(c:byte;f:TStream):integer;
begin
if f.Write(c,1)=1 then
result:=c
else
result:=0;
end;
procedure CompressFile(input:TStream; output:Pbit_file);
var
s:TSYMBOL;
c,escaped,flush:integer;
text_count:int64;
begin
flush := 0;
text_count := 0;
// local_input_marker:=0;
// local_output_marker:=0;
//initialize_options( argc, argv );
initialize_model;
initialize_arithmetic_encoder;
while true do
begin
inc(text_count);
if (text_count and $ff ) = 0 then
flush := check_compression( input, output );
if (flush=0) then
c := getc(input )
else
c := CFLUSH;
if (c = EOF) then c := DONE;
repeat
escaped := convert_int_to_symbol( c, s );
encode_symbol( output, s );
until escaped=0;
if ( c = DONE ) then break;
if ( c = CFLUSH ) then
begin
flush_model();
flush := 0;
end;
update_model( c );
add_character_to_model( c );
end;
flush_arithmetic_encoder(output);
end;
procedure ExpandFile( input:Pbit_file; output:TStream);
var
s:TSYMBOL;
c,count:integer;
begin
//initialize_options( argc, argv );
initialize_model();
initialize_arithmetic_decoder( input );
while true do
begin
repeat
get_symbol_scale( s );
count := get_current_count( s );
c := convert_symbol_to_int( count, s );
remove_symbol_from_stream( input, s );
until c <> ESCAPE;
if ( c = DONE ) then break;
if ( c <> CFLUSH ) then
putc( c, output )
else
flush_model();
update_model( c );
add_character_to_model( c );
end;
end;
procedure ArithCompress(InStr, OutStr: TStream);
var
BF:bit_file;
begin
bf.Afile:=OutStr;
bf.rack:=0;
bf.mask:=$80;
bf.pacifier_counter:=0;
CompressFile(InStr,@bf);
if bf.mask<>$80 then
putc(bf.rack,bf.Afile);
end;
procedure ArithExpand(InStr, OutStr: TStream);
var
BF:bit_file;
begin
bf.Afile:=InStr;
bf.rack:=0;
bf.mask:=$80;
bf.pacifier_counter:=0;
ExpandFile(@bf,OutStr);
end;
function check_compression(input:TStream; output:Pbit_file):integer;
const
local_input_marker:int64=0;
local_output_marker:int64=0;
var
total_input_bytes, total_output_bytes:int64;
local_ratio:integer;
begin
total_input_bytes := input.Position - local_input_marker;
total_output_bytes := output.Afile.Position-local_output_marker;
if (total_output_bytes = 0) then total_output_bytes := 1;
local_ratio := ( total_output_bytes * 100 ) div total_input_bytes;
local_input_marker := input.Position;
local_output_marker := output.Afile.Position;
result:=ord(local_ratio > 90);
end;
function callocSTATS(cnt:integer):PSTATS;
begin
// GetMem(result, cnt*sizeof(STATS));
result:=AllocMem(cnt*sizeof(STATS));
end;
procedure initialize_model();
var
i:integer;
null_table ,control_table:PCONTEXT;
begin
current_order := max_order;
null_table:=AllocMem(sizeof(CONTEXT));
null_table^.max_index := -1;
contexts[-1] := null_table;
for i := 0 to max_order do
contexts[i] := allocate_next_order_table(contexts[i - 1], 0, contexts[i - 1]);
FreeMem(null_table^.stats);
null_table^.stats := callocSTATS(256);
null_table.max_index := 255;
for i := 0 to 255 do
begin
null_table^.stats[i].symbol := byte(i);
null_table^.stats[i].counts := 1;
end;
control_table:=AllocMem(sizeof(CONTEXT));
control_table^.stats := callocSTATS(2);
contexts[-2] := control_table;
control_table^.max_index := 1;
control_table^.stats[0].symbol := -CFLUSH;
control_table^.stats[0].counts := 1;
control_table^.stats[1].symbol := -DONE;
control_table^.stats[1].counts := 1;
for i:=0 to 255 do scoreboard[i] := #0;
end;
function allocate_next_order_table( table:PCONTEXT; symbol:integer;lesser_context:PCONTEXT):PCONTEXT;
var
new_table:PCONTEXT;
i:integer;
new_size:Dword;
begin
i:=0;
while i<=table^.max_index do
begin
if (table.stats[i].symbol = byte(symbol)) then break;
inc(i);
end;
if ( i > table^.max_index ) then
begin
inc(table^.max_index);
new_size := sizeof(LINKS) * ( table^.max_index + 1);
// if (table^.max_index = 0) then
if (table^.links = nil) then
table^.links := AllocMem(new_size)
else
ReallocMem(table^.links, new_size);
new_size := sizeof(STATS) * ( table^.max_index + 1);
if (table^.stats = nil) then
table^.stats := AllocMem(new_size)
else
ReallocMem(table^.stats, new_size);
table^.stats[i].symbol := byte(symbol);
table^.stats[i].counts := 0;
end;
new_table:=AllocMem(sizeof(CONTEXT));
new_table.max_index := -1;
table^.links[i].next := new_table;
new_table^.lesser_context := lesser_context;
result:=new_table;
end;
// the routine is called to increment the counts for the current
// contexts. It is called after a character has been encoded or
// decoded. All it does is call update_table for each of the
// current contexts, which does the work of incrementing the count.
// This particular version of update_model() practices update exclusion.
// which means that if lower order models weren't used to encode
// or decode the character, they don't get their counts updated.
// this seems to improve compression performance quite a bit.
// to disable update exclusion, the loop would be changed to run
// from 0 to max_order, instead of current_order to max_order
procedure update_model(symbol:integer);
var
i,local_order:integer;
begin
if (current_order < 0) then
local_order := 0
else
local_order := current_order;
if (symbol >= 0) then
begin
while ( local_order <= max_order ) do
begin
if (symbol >= 0) then
update_table( contexts[local_order], symbol );
inc(local_order);
end;
end;
current_order := max_order;
for i := 0 to 255 do
scoreboard[i] := #0;
end;
procedure update_table(table:PCONTEXT;symbol:integer);
var
i,index:integer;
temp:byte;
temp_ptr:PCONTEXT;
new_size:word;
begin
// first, find the symbol in the apropriate context table. The first
// symbol in the table is the most active. so start there.
index := 0;
while (index <= table^.max_index) and
(table.stats[index].symbol <> byte(symbol)) do
inc(index);
if ( index > table^.max_index ) then
begin
inc(table^.max_index);
new_size := sizeof(LINKS) * ( table^.max_index + 1);
if (current_order < max_order) then
begin
if (table^.max_index = 0) then
// if (table^links = nil) then
table^.links := AllocMem(new_size)
else
ReallocMem(table^.links, new_size);
table^.links[index].next := nil;
end;
new_size := sizeof(STATS) * ( table^.max_index + 1);
if (table^.max_index = 0) then
// if (table^.stats = nil) then
table^.stats := AllocMem(new_size)
else
ReallocMem(table^.stats, new_size);
table^.stats[index].symbol := byte(symbol);
table^.stats[index].counts := 0;
end;
// now I move the symbol to the front of its list
i := index;
while (i > 0) and (table^.stats[index].counts = table^.stats[i - 1].counts) do
dec(i);
if (i <> index) then
begin
temp := table^.stats[index].symbol;
table^.stats[index].symbol := table^.stats[i].symbol;
table^.stats[i].symbol := temp;
if (table^.links <> nil) then
begin
temp_ptr := table^.links[index].next;
table^.links[index].next := table^.links[i].next;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -