⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ari.cpp

📁 bwt压缩方式的源码
💻 CPP
字号:
//
//  ARI.CPP
//
//  Mark Nelson
//  March 8, 1996
//  http://web2.airmail.net/markn
//
// DESCRIPTION
// -----------
//
//  This program performs an order-0 adaptive arithmetic encoding
//  function on an input file/stream, and sends the result to an
//  output file or stream.
//
//  This program contains the source code from the 1987 CACM
//  article by Witten, Neal, and Cleary.  I have taken the
//  source modules and combined them into this single file for
//  ease of distribution and compilation.  Other than that,
//  the code is essentially unchanged.
//
//  This program takes two arguments: an input file and an output
//  file.  You can leave off one argument and send your output to
//  stdout.  Leave off two arguments and read your input from stdin
//  as well.
//
//  This program accompanies my article "Data Compression with the
//  Burrows-Wheeler Transform."
//
// Build Instructions
// ------------------
//
//  Define the constant unix for UNIX or UNIX-like systems.  The
//  use of this constant turns off the code used to force the MS-DOS
//  file system into binary mode.  g++ already does this, and your
//  UNIX C++ compiler might also.
//
//  Borland C++ 4.5 16 bit    : bcc -w ari.cpp
//  Borland C++ 4.5 32 bit    : bcc32 -w ari.cpp
//  Microsoft Visual C++ 1.52 : cl /W4 ari.cpp
//  Microsoft Visual C++ 2.1  : cl /W4 ari.cpp
//  g++                       : g++ -o ari ari.cpp
//
// Typical Use
// -----------
//
//  rle < raw-file | bwt | mtf | rle | ari > compressed-file
//
//

#include <stdio.h>
#include <stdlib.h>
#if !defined( unix )
#include <io.h>
#endif
#include <fcntl.h>

#define No_of_chars 256                 /* Number of character symbols      */
#define EOF_symbol (No_of_chars+1)      /* Index of EOF symbol              */

#define No_of_symbols (No_of_chars+1)   /* Total number of symbols          */

/* TRANSLATION TABLES BETWEEN CHARACTERS AND SYMBOL INDEXES. */

int char_to_index[No_of_chars];         /* To index from character          */
unsigned char index_to_char[No_of_symbols+1]; /* To character from index    */

/* ADAPTIVE SOURCE MODEL */

int freq[No_of_symbols+1];      /* Symbol frequencies                       */
int cum_freq[No_of_symbols+1];  /* Cumulative symbol frequencies            */

/* DECLARATIONS USED FOR ARITHMETIC ENCODING AND DECODING */

/* SIZE OF ARITHMETIC CODE VALUES. */

#define Code_value_bits 16              /* Number of bits in a code value   */
typedef long code_value;                /* Type of an arithmetic code value */

#define Top_value (((long)1<<Code_value_bits)-1)      /* Largest code value */


/* HALF AND QUARTER POINTS IN THE CODE VALUE RANGE. */

#define First_qtr (Top_value/4+1)       /* Point after first quarter        */
#define Half	  (2*First_qtr)            /* Point after first half           */
#define Third_qtr (3*First_qtr)         /* Point after third quarter        */


/* CUMULATIVE FREQUENCY TABLE. */

#define Max_frequency 16383             /* Maximum allowed frequency count  */

extern int cum_freq[No_of_symbols+1];   /* Cumulative symbol frequencies    */


void start_model( void );
void start_encoding( void );
void encode_symbol(int symbol,int cum_freq[] );
void update_model(int symbol);
void done_encoding( void );
void done_outputing_bits( void );

/* BIT OUTPUT ROUTINES. */

/* THE BIT BUFFER. */

int buffer;                     /* Bits buffered for output                 */
int bits_to_go;                 /* Number of bits free in buffer            */


/* INITIALIZE FOR BIT OUTPUT. */

void start_outputing_bits( void )
{   buffer = 0;                                 /* Buffer is empty to start */
    bits_to_go= 8;                              /* with.                    */
}

/* OUTPUT A BIT. */

inline void output_bit( int bit )
{   buffer >>= 1; if (bit) buffer |= 0x80;      /* Put bit in top of buffer.*/
    bits_to_go -= 1;
    if (bits_to_go==0) {                        /* Output buffer if it is   */
        putc( (char) buffer, stdout );          /* now full.                */
        bits_to_go = 8;
    }
}

/* FLUSH OUT THE LAST BITS. */

void done_outputing_bits( void )
{   putc( (char) ( buffer >> bits_to_go ),stdout);
}

/* CURRENT STATE OF THE ENCODING. */

code_value low, high;           /* Ends of the current code region          */
long bits_to_follow;            /* Number of opposite bits to output after  */
                                /* the next bit.                            */
/* OUTPUT BITS PLUS FOLLOWING OPPOSITE BITS. */

void bit_plus_follow( int bit )
{   output_bit(bit);                            /* Output the bit.          */
    while (bits_to_follow>0) {
        output_bit(!bit);                       /* Output bits_to_follow    */
        bits_to_follow -= 1;                    /* opposite bits. Set       */
    }                                           /* bits_to_follow to zero.  */
}

int main( int argc, char *argv[] )
{
    int ticker = 0;

    fprintf( stderr, "Arithmetic coding on " );
    if ( argc > 1 ) {
        freopen( argv[ 1 ], "rb", stdin );
        fprintf( stderr, "%s", argv[ 1 ] );
    } else
        fprintf( stderr, "stdin" );
    fprintf( stderr, " to " );
    if ( argc > 2 ) {
        freopen( argv[ 2 ], "wb", stdout );
        fprintf( stderr, "%s", argv[ 2 ] );
    } else
        fprintf( stderr, "stdout" );
    fprintf( stderr, "\n" );
#if !defined( unix )
    setmode( fileno( stdin ), O_BINARY );
    setmode( fileno( stdout ), O_BINARY );
#endif
    start_model();                              /* Set up other modules.    */
    start_outputing_bits();
    start_encoding();
    for (;;) {                                  /* Loop through characters. */
        int ch; int symbol;
        if ( ( ticker++ % 1024 ) == 0 )
            putc( '.', stderr );
        ch = getc(stdin);                       /* Read the next character. */
        if (ch==EOF) break;                     /* Exit loop on end-of-file.*/
        symbol = char_to_index[ch];             /* Translate to an index.   */
        encode_symbol(symbol,cum_freq);         /* Encode that symbol.      */
        update_model(symbol);                   /* Update the model.        */
    }
    encode_symbol(EOF_symbol,cum_freq);         /* Encode the EOF symbol.   */
    done_encoding();                            /* Send the last few bits.  */
    done_outputing_bits();
    putc( '\n', stderr );
    return 0;
}

/* ARITHMETIC ENCODING ALGORITHM. */


/* START ENCODING A STREAM OF SYMBOLS. */

void start_encoding( void )
{   low = 0;                                    /* Full code range.         */
    high = Top_value;
    bits_to_follow = 0;                         /* No bits to follow next.  */
}


/* ENCODE A SYMBOL. */

void encode_symbol(int symbol,int cum_freq[] )
{   long range;                 /* Size of the current code region          */
    range = (long)(high-low)+1;
    high = low +                                /* Narrow the code region   */
      (range*cum_freq[symbol-1])/cum_freq[0]-1; /* to that allotted to this */
    low = low +                                 /* symbol.                  */
      (range*cum_freq[symbol])/cum_freq[0];
    for (;;) {                                  /* Loop to output bits.     */
        if (high<Half) {
            bit_plus_follow(0);                 /* Output 0 if in low half. */
        }
        else if (low>=Half) {                   /* Output 1 if in high half.*/
            bit_plus_follow(1);
            low -= Half;
            high -= Half;                       /* Subtract offset to top.  */
        }
        else if (low>=First_qtr                 /* Output an opposite bit   */
              && high<Third_qtr) {              /* later if in middle half. */
            bits_to_follow += 1;
            low -= First_qtr;                   /* Subtract offset to middle*/
            high -= First_qtr;
        }
        else break;                             /* Otherwise exit loop.     */
        low = 2*low;
        high = 2*high+1;                        /* Scale up code range.     */
    }
}


/* FINISH ENCODING THE STREAM. */

void done_encoding( void )
{   bits_to_follow += 1;                        /* Output two bits that     */
    if (low<First_qtr) bit_plus_follow(0);      /* select the quarter that  */
    else bit_plus_follow(1);                    /* the current code range   */
}                                               /* contains.                */


/* THE ADAPTIVE SOURCE MODEL */

/* INITIALIZE THE MODEL. */

void start_model( void )
{   int i;
    for (i = 0; i<No_of_chars; i++) {           /* Set up tables that       */
        char_to_index[i] = i+1;                 /* translate between symbol */
        index_to_char[i+1] = (unsigned char) i; /* indexes and characters.  */
    }
    for (i = 0; i<=No_of_symbols; i++) {        /* Set up initial frequency */
        freq[i] = 1;                            /* counts to be one for all */
        cum_freq[i] = No_of_symbols-i;          /* symbols.                 */
    }
    freq[0] = 0;                                /* Freq[0] must not be the  */
}                                               /* same as freq[1].         */


/* UPDATE THE MODEL TO ACCOUNT FOR A NEW SYMBOL. */

void update_model( int symbol )
{   int i;                      /* New index for symbol                     */
    if (cum_freq[0]==Max_frequency) {           /* See if frequency counts  */
        int cum;                                /* are at their maximum.    */
        cum = 0;
        for (i = No_of_symbols; i>=0; i--) {    /* If so, halve all the     */
            freq[i] = (freq[i]+1)/2;            /* counts (keeping them     */
            cum_freq[i] = cum;                  /* non-zero).               */
            cum += freq[i];
        }
    }
    for (i = symbol; freq[i]==freq[i-1]; i--) ; /* Find symbol's new index. */
    if (i<symbol) {
        int ch_i, ch_symbol;
        ch_i = index_to_char[i];                /* Update the translation   */
        ch_symbol = index_to_char[symbol];      /* tables if the symbol has */
        index_to_char[i] = (unsigned char) ch_symbol;           /* moved.                   */
        index_to_char[symbol] = (unsigned char) ch_i;
        char_to_index[ch_i] = symbol;
        char_to_index[ch_symbol] = i;
    }
    freq[i] += 1;                               /* Increment the frequency  */
    while (i>0) {                               /* count for the symbol and */
        i -= 1;                                 /* update the cumulative    */
        cum_freq[i] += 1;                       /* frequencies.             */
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -