📄 trees.pas
字号:
Unit trees;
{$T-}
{$define ORG_DEBUG}
{
trees.c -- output deflated data using Huffman coding
Copyright (C) 1995-1998 Jean-loup Gailly
Pascal tranlastion
Copyright (C) 1998 by Jacques Nomssi Nzali
For conditions of distribution and use, see copyright notice in readme.txt
}
{
* ALGORITHM
*
* The "deflation" process uses several Huffman trees. The more
* common source values are represented by shorter bit sequences.
*
* Each code tree is stored in a compressed form which is itself
* a Huffman encoding of the lengths of all the code strings (in
* ascending order by source values). The actual code strings are
* reconstructed from the lengths in the inflate process, as described
* in the deflate specification.
*
* REFERENCES
*
* Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
* Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
*
* Storer, James A.
* Data Compression: Methods and Theory, pp. 49-50.
* Computer Science Press, 1988. ISBN 0-7167-8156-5.
*
* Sedgewick, R.
* Algorithms, p290.
* Addison-Wesley, 1983. ISBN 0-201-06672-6.
}
interface
{$I zconf.inc}
uses
{$ifdef DEBUG}
strutils,
{$ENDIF}
zutil, zlib;
{ ===========================================================================
Internal compression state. }
const
LENGTH_CODES = 29;
{ number of length codes, not counting the special END_BLOCK code }
LITERALS = 256;
{ number of literal bytes 0..255 }
L_CODES = (LITERALS+1+LENGTH_CODES);
{ number of Literal or Length codes, including the END_BLOCK code }
D_CODES = 30;
{ number of distance codes }
BL_CODES = 19;
{ number of codes used to transfer the bit lengths }
HEAP_SIZE = (2*L_CODES+1);
{ maximum heap size }
MAX_BITS = 15;
{ All codes must not exceed MAX_BITS bits }
const
INIT_STATE = 42;
BUSY_STATE = 113;
FINISH_STATE = 666;
{ Stream status }
{ Data structure describing a single value and its code string. }
type
ct_data_ptr = ^ct_data;
ct_data = record
fc : record
case byte of
0:(freq : ush); { frequency count }
1:(code : ush); { bit string }
end;
dl : record
case byte of
0:(dad : ush); { father node in Huffman tree }
1:(len : ush); { length of bit string }
end;
end;
{ Freq = fc.freq
Code = fc.code
Dad = dl.dad
Len = dl.len }
type
ltree_type = array[0..HEAP_SIZE-1] of ct_data; { literal and length tree }
dtree_type = array[0..2*D_CODES+1-1] of ct_data; { distance tree }
htree_type = array[0..2*BL_CODES+1-1] of ct_data; { Huffman tree for bit lengths }
{ generic tree type }
tree_type = array[0..(MaxInt div SizeOf(ct_data))-1] of ct_data;
tree_ptr = ^tree_type;
ltree_ptr = ^ltree_type;
dtree_ptr = ^dtree_type;
htree_ptr = ^htree_type;
type
static_tree_desc_ptr = ^static_tree_desc;
static_tree_desc =
record
{const} static_tree : tree_ptr; { static tree or NIL }
{const} extra_bits : pzIntfArray; { extra bits for each code or NIL }
extra_base : int; { base index for extra_bits }
elems : int; { max number of elements in the tree }
max_length : int; { max bit length for the codes }
end;
tree_desc_ptr = ^tree_desc;
tree_desc = record
dyn_tree : tree_ptr; { the dynamic tree }
max_code : int; { largest code with non zero frequency }
stat_desc : static_tree_desc_ptr; { the corresponding static tree }
end;
type
Pos = ush;
Posf = Pos; {FAR}
IPos = uInt;
pPosf = ^Posf;
zPosfArray = array[0..(MaxInt div SizeOf(Posf))-1] of Posf;
pzPosfArray = ^zPosfArray;
{ A Pos is an index in the character window. We use short instead of int to
save space in the various tables. IPos is used only for parameter passing.}
type
deflate_state_ptr = ^deflate_state;
deflate_state = record
strm : z_streamp; { pointer back to this zlib stream }
status : int; { as the name implies }
pending_buf : pzByteArray; { output still pending }
pending_buf_size : ulg; { size of pending_buf }
pending_out : pBytef; { next pending byte to output to the stream }
pending : int; { nb of bytes in the pending buffer }
noheader : int; { suppress zlib header and adler32 }
data_type : Byte; { UNKNOWN, BINARY or ASCII }
method : Byte; { STORED (for zip only) or DEFLATED }
last_flush : int; { value of flush param for previous deflate call }
{ used by deflate.pas: }
w_size : uInt; { LZ77 window size (32K by default) }
w_bits : uInt; { log2(w_size) (8..16) }
w_mask : uInt; { w_size - 1 }
window : pzByteArray;
{ Sliding window. Input bytes are read into the second half of the window,
and move to the first half later to keep a dictionary of at least wSize
bytes. With this organization, matches are limited to a distance of
wSize-MAX_MATCH bytes, but this ensures that IO is always
performed with a length multiple of the block size. Also, it limits
the window size to 64K, which is quite useful on MSDOS.
To do: use the user input buffer as sliding window. }
window_size : ulg;
{ Actual size of window: 2*wSize, except when the user input buffer
is directly used as sliding window. }
prev : pzPosfArray;
{ Link to older string with same hash index. To limit the size of this
array to 64K, this link is maintained only for the last 32K strings.
An index in this array is thus a window index modulo 32K. }
head : pzPosfArray; { Heads of the hash chains or NIL. }
ins_h : uInt; { hash index of string to be inserted }
hash_size : uInt; { number of elements in hash table }
hash_bits : uInt; { log2(hash_size) }
hash_mask : uInt; { hash_size-1 }
hash_shift : uInt;
{ Number of bits by which ins_h must be shifted at each input
step. It must be such that after MIN_MATCH steps, the oldest
byte no longer takes part in the hash key, that is:
hash_shift * MIN_MATCH >= hash_bits }
block_start : long;
{ Window position at the beginning of the current output block. Gets
negative when the window is moved backwards. }
match_length : uInt; { length of best match }
prev_match : IPos; { previous match }
match_available : boolean; { set if previous match exists }
strstart : uInt; { start of string to insert }
match_start : uInt; { start of matching string }
lookahead : uInt; { number of valid bytes ahead in window }
prev_length : uInt;
{ Length of the best match at previous step. Matches not greater than this
are discarded. This is used in the lazy match evaluation. }
max_chain_length : uInt;
{ To speed up deflation, hash chains are never searched beyond this
length. A higher limit improves compression ratio but degrades the
speed. }
{ moved to the end because Borland Pascal won't accept the following:
max_lazy_match : uInt;
max_insert_length : uInt absolute max_lazy_match;
}
level : int; { compression level (1..9) }
strategy : int; { favor or force Huffman coding}
good_match : uInt;
{ Use a faster search when the previous match is longer than this }
nice_match : int; { Stop searching when current match exceeds this }
{ used by trees.pas: }
{ Didn't use ct_data typedef below to supress compiler warning }
dyn_ltree : ltree_type; { literal and length tree }
dyn_dtree : dtree_type; { distance tree }
bl_tree : htree_type; { Huffman tree for bit lengths }
l_desc : tree_desc; { desc. for literal tree }
d_desc : tree_desc; { desc. for distance tree }
bl_desc : tree_desc; { desc. for bit length tree }
bl_count : array[0..MAX_BITS+1-1] of ush;
{ number of codes at each bit length for an optimal tree }
heap : array[0..2*L_CODES+1-1] of int; { heap used to build the Huffman trees }
heap_len : int; { number of elements in the heap }
heap_max : int; { element of largest frequency }
{ The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
The same heap array is used to build all trees. }
depth : array[0..2*L_CODES+1-1] of uch;
{ Depth of each subtree used as tie breaker for trees of equal frequency }
l_buf : puchfArray; { buffer for literals or lengths }
lit_bufsize : uInt;
{ Size of match buffer for literals/lengths. There are 4 reasons for
limiting lit_bufsize to 64K:
- frequencies can be kept in 16 bit counters
- if compression is not successful for the first block, all input
data is still in the window so we can still emit a stored block even
when input comes from standard input. (This can also be done for
all blocks if lit_bufsize is not greater than 32K.)
- if compression is not successful for a file smaller than 64K, we can
even emit a stored file instead of a stored block (saving 5 bytes).
This is applicable only for zip (not gzip or zlib).
- creating new Huffman trees less frequently may not provide fast
adaptation to changes in the input data statistics. (Take for
example a binary file with poorly compressible code followed by
a highly compressible string table.) Smaller buffer sizes give
fast adaptation but have of course the overhead of transmitting
trees more frequently.
- I can't count above 4 }
last_lit : uInt; { running index in l_buf }
d_buf : pushfArray;
{ Buffer for distances. To simplify the code, d_buf and l_buf have
the same number of elements. To use different lengths, an extra flag
array would be necessary. }
opt_len : ulg; { bit length of current block with optimal trees }
static_len : ulg; { bit length of current block with static trees }
compressed_len : ulg; { total bit length of compressed file }
matches : uInt; { number of string matches in current block }
last_eob_len : int; { bit length of EOB code for last block }
{$ifdef DEBUG}
bits_sent : ulg; { bit length of the compressed data }
{$endif}
bi_buf : ush;
{ Output buffer. bits are inserted starting at the bottom (least
significant bits). }
bi_valid : int;
{ Number of valid bits in bi_buf. All bits above the last valid bit
are always zero. }
case byte of
0:(max_lazy_match : uInt);
{ Attempt to find a better match only when the current match is strictly
smaller than this value. This mechanism is used only for compression
levels >= 4. }
1:(max_insert_length : uInt);
{ Insert new strings in the hash table only if the match length is not
greater than this length. This saves time but degrades compression.
max_insert_length is used only for compression levels <= 3. }
end;
procedure _tr_init (var s : deflate_state);
function _tr_tally (var s : deflate_state;
dist : unsigned;
lc : unsigned) : boolean;
function _tr_flush_block (var s : deflate_state;
buf : pcharf;
stored_len : ulg;
eof : boolean) : ulg;
procedure _tr_align(var s : deflate_state);
procedure _tr_stored_block(var s : deflate_state;
buf : pcharf;
stored_len : ulg;
eof : boolean);
implementation
{ #define GEN_TREES_H }
{$ifndef GEN_TREES_H}
{ header created automatically with -DGEN_TREES_H }
const
DIST_CODE_LEN = 512; { see definition of array dist_code below }
{ The static literal tree. Since the bit lengths are imposed, there is no
need for the L_CODES extra codes used during heap construction. However
The codes 286 and 287 are needed to build a canonical tree (see _tr_init
below). }
const
static_ltree : array[0..L_CODES+2-1] of ct_data = (
{ fc:(freq, code) dl:(dad,len) }
(fc:(freq: 12);dl:(len: 8)), (fc:(freq:140);dl:(len: 8)), (fc:(freq: 76);dl:(len: 8)),
(fc:(freq:204);dl:(len: 8)), (fc:(freq: 44);dl:(len: 8)), (fc:(freq:172);dl:(len: 8)),
(fc:(freq:108);dl:(len: 8)), (fc:(freq:236);dl:(len: 8)), (fc:(freq: 28);dl:(len: 8)),
(fc:(freq:156);dl:(len: 8)), (fc:(freq: 92);dl:(len: 8)), (fc:(freq:220);dl:(len: 8)),
(fc:(freq: 60);dl:(len: 8)), (fc:(freq:188);dl:(len: 8)), (fc:(freq:124);dl:(len: 8)),
(fc:(freq:252);dl:(len: 8)), (fc:(freq: 2);dl:(len: 8)), (fc:(freq:130);dl:(len: 8)),
(fc:(freq: 66);dl:(len: 8)), (fc:(freq:194);dl:(len: 8)), (fc:(freq: 34);dl:(len: 8)),
(fc:(freq:162);dl:(len: 8)), (fc:(freq: 98);dl:(len: 8)), (fc:(freq:226);dl:(len: 8)),
(fc:(freq: 18);dl:(len: 8)), (fc:(freq:146);dl:(len: 8)), (fc:(freq: 82);dl:(len: 8)),
(fc:(freq:210);dl:(len: 8)), (fc:(freq: 50);dl:(len: 8)), (fc:(freq:178);dl:(len: 8)),
(fc:(freq:114);dl:(len: 8)), (fc:(freq:242);dl:(len: 8)), (fc:(freq: 10);dl:(len: 8)),
(fc:(freq:138);dl:(len: 8)), (fc:(freq: 74);dl:(len: 8)), (fc:(freq:202);dl:(len: 8)),
(fc:(freq: 42);dl:(len: 8)), (fc:(freq:170);dl:(len: 8)), (fc:(freq:106);dl:(len: 8)),
(fc:(freq:234);dl:(len: 8)), (fc:(freq: 26);dl:(len: 8)), (fc:(freq:154);dl:(len: 8)),
(fc:(freq: 90);dl:(len: 8)), (fc:(freq:218);dl:(len: 8)), (fc:(freq: 58);dl:(len: 8)),
(fc:(freq:186);dl:(len: 8)), (fc:(freq:122);dl:(len: 8)), (fc:(freq:250);dl:(len: 8)),
(fc:(freq: 6);dl:(len: 8)), (fc:(freq:134);dl:(len: 8)), (fc:(freq: 70);dl:(len: 8)),
(fc:(freq:198);dl:(len: 8)), (fc:(freq: 38);dl:(len: 8)), (fc:(freq:166);dl:(len: 8)),
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -