📄 arith_encode.c

📁 压缩包里包含算术码压缩和解压到源代码
💻 C
字号:
/* This is the arithmetic subroutine with shorter integer precision *//************************** Start of ARITH.C ************************* * */#include <stdio.h>#include <stdlib.h>#include "arith.h"/* * These four variables define the current state of the arithmetic * coder/decoder.  They are assumed to be 16 bits long.  Note that * by declaring them as ints, they will actually be 16 bits * on most 80X86 and 680X0 machines, as well as VAXen. */static unsigned short int code;  /* The present input code value       */static unsigned short int low;   /* Start of the current code range    */static unsigned short int high;  /* End of the current code range      */long underflow_bits;             /* Number of underflow bits pending   *//*unsigned int totals[NO_WORDS+1]; */#define PACIFIER_COUNT 2047
main()
{
	int count[27]={0},i,cur_index;                /*定义计算各个单词的个数的数组*/
	unsigned short int esti_pro[27];              /*定义概率空间数组*/
    unsigned  int sum=0;                          /*定义单词总个数变量*/
	char codeword[27],ch,outputbitfilename[10];   /*定义样本单词数组，比特文件名字数组*/
	BIT_FILE *outputbitfile;                      /*定义结构体指针*/
	FILE *fp;
	for(i=0;i<26;i++) codeword[i]='A'+i;          /*样本单词*/
	codeword[26]=' ';
	if((fp=fopen("test.txt","r"))==NULL) return 0;/*打开要编码的文件*/

	while(!feof(fp))                              /*统计各种单词的个数*/
	{
		ch=getc(fp);
		for(i=0;i<27;i++) 
			if(ch==codeword[i]) count[i]++;
		
	}
    rewind(fp);                                   /*把指针重新放到文件开头处*/
	for(i=0;i<27;i++) sum+=count[i];              /*单词总数*/
	
	for(i=0;i<27;i++) esti_pro[i]=(unsigned short int)((1.0*count[i]/sum)*20000);
			                                     /*概率空间*/
	initialize_arithmetic_encoder();
	printf("输入编码后文件的名字(不超过十个字符)eg.\\bit.txt :");
	scanf("%s",outputbitfilename);               /*输入编码后文件的名字*/
	outputbitfile=OpenOutputBitFile(outputbitfilename);/*打开这个文件并返回头指针*/
	
	for(i=0;i<27;i++) putw(esti_pro[i],outputbitfile->file);/*把各个字符出现的概率放进这个文件中，以便解码时读取*/
	fwrite(&sum,4,1,outputbitfile->file);                   /*把统计字符的总个数放入到这个文件中,以便解码时读取*/

	while(!feof(fp))
	{ch=getc(fp);
	if(ch>='A'&&ch<='Z')                  /*读取字符并编码*/
	 cur_index=ch-'A';
	else cur_index=26;
	arith_encode(outputbitfile,esti_pro,27,cur_index);
	}


	flush_arithmetic_encoder(outputbitfile);/*把编码后没有溢出的那些比特冲洗出来*/
	CloseOutputBitFile(outputbitfile);/*关闭文件指针*/
	fclose(fp);                       /*关闭文件指针*/
	printf("编码完毕\n");

	

}

BIT_FILE *OpenOutputBitFile(char *name ){    BIT_FILE *bit_file;    bit_file = (BIT_FILE *) calloc( 1, sizeof( BIT_FILE ) );    if ( bit_file == NULL )        return( bit_file );    bit_file->file = fopen( name, "wb" );    bit_file->rack = 0;    bit_file->mask = 0x80;    bit_file->pacifier_counter = 0;    return( bit_file );}void CloseOutputBitFile(BIT_FILE *bit_file ){    if ( bit_file->mask != 0x80 )        if ( putc( bit_file->rack, bit_file->file ) != bit_file->rack )           {            printf( "Fatal error in CloseBitFile!\n" );            exit(1);           }    fclose( bit_file->file );    free( (char *) bit_file );}void OutputBit(BIT_FILE *bit_file, int bit){    if ( bit )        bit_file->rack |= bit_file->mask;    bit_file->mask >>= 1;    if ( bit_file->mask == 0 ) {	if ( putc( bit_file->rack, bit_file->file ) != bit_file->rack )           {	    printf( "Fatal error in OutputBit!\n" );            exit(1);           }	else        if ( ( bit_file->pacifier_counter++ & PACIFIER_COUNT ) == 0 )           ;	/*	putc( '.', stdout ); */	bit_file->rack = 0;	bit_file->mask = 0x80;    }}/* * Everything from here down define the arithmetic coder section * of the program. *//* * This routine must be called to initialize the encoding process. * The high register is initialized to all 1s, and it is assumed that * it has an infinite string of 1s to be shifted into the lower bit * positions when needed. */void initialize_arithmetic_encoder(){    low = 0;    high = 0xffff;    underflow_bits = 0;}/* * At the end of the encoding process, there are still significant * bits left in the high and low registers.  We output two bits, * plus as many underflow bits as are necessary. */void flush_arithmetic_encoder(BIT_FILE *stream ){    OutputBit( stream, low & 0x4000 );    underflow_bits++;    while ( underflow_bits-- > 0 )        OutputBit( stream, ~low & 0x4000 );  /*  OutputBits( stream, 0L, 16 ); */}/* * This routine is called to encode a symbol.  The symbol is passed * in the SYMBOL structure as a low count, a high count, and a range, * instead of the more conventional probability ranges.  The encoding * process takes two steps.  First, the values of high and low are * updated to take into account the range restriction created by the * new symbol.  Then, as many bits as possible are shifted out to * the output stream.  Finally, high and low are stable again and * the routine returns. The return value is the output bits by this call. */int encode_symbol(BIT_FILE *stream, SYMBOL *s){    long range;    int output_bits;/* * These three lines rescale high and low for the new symbol. */    range = (long) ( high-low ) + 1;    high = low + (unsigned short int)                 (( range * s->high_count ) / s->scale - 1 );    low = low + (unsigned short int)                 (( range * s->low_count ) / s->scale );/* * This loop turns out new bits until high and low are far enough * apart to have stabilized. */    output_bits=0;    for ( ; ; ) {/* * If this test passes, it means that the MSDigits match, and can * be sent to the output stream. */        if ( ( high & 0x8000 ) == ( low & 0x8000 ) ) {            OutputBit( stream, high & 0x8000 );            output_bits++;            while ( underflow_bits > 0 ) {                OutputBit( stream, ~high & 0x8000 );                output_bits++;                underflow_bits--;            }        }/* * If this test passes, the numbers are in danger of underflow, because * the MSDigits don't match, and the 2nd digits are just one apart. */        else if ( ( low & 0x4000 ) && !( high & 0x4000 )) {            underflow_bits += 1;            low &= 0x3fff;            high |= 0x4000;        } else            return(output_bits);        low <<= 1;        high <<= 1;        high |= 1;    }}/******************************************************************* following routines are related to decoding of arithmatic codec. ********************************************************************/BIT_FILE *OpenInputBitFile(char *name){    BIT_FILE *bit_file;    bit_file = (BIT_FILE *) calloc( 1, sizeof( BIT_FILE ) );    if ( bit_file == NULL )	return( bit_file );    bit_file->file = fopen( name, "rb" );    bit_file->rack = 0;    bit_file->mask = 0x80;    bit_file->pacifier_counter = 0;    return( bit_file );}void CloseInputBitFile(BIT_FILE *bit_file){    fclose( bit_file->file );    free( (char *) bit_file );}int InputBit(BIT_FILE *bit_file){    int value;        if ( bit_file->mask == 0x80 ) {        bit_file->rack = getc( bit_file->file );        if ( bit_file->rack == EOF )           {/*            printf( "Fatal error in InputBit!\n" );             exit(1); */           }    if ( ( bit_file->pacifier_counter++ & PACIFIER_COUNT ) == 0 )          ;	  /*  putc( '.', stdout ); */    }    value = bit_file->rack & bit_file->mask;    bit_file->mask >>= 1;    if ( bit_file->mask == 0 )	bit_file->mask = 0x80;    return( value ? 1 : 0 );}/* * When decoding, this routine is called to figure out which symbol * is presently waiting to be decoded.  This routine expects to get * the current model scale in the s->scale parameter, and it returns * a count that corresponds to the present floating point code: * *  code = count / s->scale */short int get_current_count( SYMBOL *s ){    long range;    short int count;    range = (long) ( high - low ) + 1;    count = (short int)            ((((long) ( code - low ) + 1 ) * s->scale-1 ) / range );    return( count );}/* * During decompression, we have to search through the table until * we find the symbol that straddles the "count" parameter.  When * it is found, it is returned. The reason for also setting the * high count and low count is so that symbol can be properly removed * from the arithmetic coded input. */int convert_symbol_to_int(short int count, SYMBOL *s, short int *totals, int num_codeword){    int c;    for ( c = num_codeword ; count < *(totals+c) ; c-- )	;    s->high_count = totals[ c + 1 ];    s->low_count = totals[ c ];    return( c );}/* * This routine is called to initialize the state of the arithmetic * decoder.  This involves initializing the high and low registers * to their conventional starting values, plus reading the first * 16 bits from the input stream into the code value. */int initialize_arithmetic_decoder( BIT_FILE *stream ){    int i;    int input_bits;    code = 0;    input_bits=0;    for ( i = 0 ; i < 16 ; i++ ) {        code <<= 1;        code += InputBit( stream );        input_bits++;    }    low = 0;    high = 0xffff;    return(input_bits);}/* * Just figuring out what the present symbol is doesn't remove * it from the input bit stream.  After the character has been * decoded, this routine has to be called to remove it from the * input stream. */int remove_symbol_from_stream( BIT_FILE *stream,SYMBOL *s){    long range;    int input_bits;/* * First, the range is expanded to account for the symbol removal. */    range = (long)( high - low ) + 1;    high = low + (unsigned short int)                 (( range * s->high_count ) / s->scale - 1 );    low = low + (unsigned short int)                 (( range * s->low_count ) / s->scale );/* * Next, any possible bits are shipped out. */    input_bits=0;    for ( ; ; ) {/* * If the MSDigits match, the bits will be shifted out. */        if ( ( high & 0x8000 ) == ( low & 0x8000 ) ) {        }/* * Else, if underflow is threatening, shift out the 2nd MSDigit. */        else if ((low & 0x4000) == 0x4000  && (high & 0x4000) == 0 ) {            code ^= 0x4000;            low   &= 0x3fff;            high  |= 0x4000;        } else /* * Otherwise, nothing can be shifted out, so I return. */            return(input_bits);        low <<= 1;        high <<= 1;        high |= 1;        code <<= 1;        code += InputBit( stream );        input_bits++;    }}/*********************************************************************  arith_encode is the combination routine that will be easy to   use. The only thing you need to do is to get   the distribution of random variable X before calling this routine. 
  The discrete distribution array "esti_prob[]", the number of symbols
  in the array "num_codeword" and current encoding value "cur_index"
  must be transferred to this routine.   The return value is the output bits by this call**********************************************************************/ int arith_encode(BIT_FILE *OutBitp, unsigned short int *esti_prob, 
                       int num_codeword, int cur_index)
{
  unsigned short int cur_start, cur_end, cur_counts, total_range;
  SYMBOL	cs;
  int		k;
  
  cur_start=0;
  cur_end=0;
  total_range=0;
  
  for(k=0; k < num_codeword; k++)
  {
    cur_counts=*(esti_prob+k);
    cur_start=cur_end;
    cur_end+=cur_counts;
    if(k==cur_index)
    {
      cs.low_count=cur_start;
      cs.high_count=cur_end;
    }  
    total_range+=cur_counts;
  }
  
  cs.scale=total_range;
  return(encode_symbol(OutBitp, &cs));
}
/*********************************************************************  arith_decode is the combination routine that will be easy to   use. The only thing you need to do is to get 
  the distribution of random variable X before calling this routine. 
  The following parameters should be transferred to this routine:
  The discrete distribution array "esti_prob[]", 
  the number of symbols in the array "num_codeword", 
  a working variable "cs" and a working array 
  "totals[]" which is of the same size as "esti_prob[]". 
  The return value is the decoded value.**********************************************************************/  int arith_decode(BIT_FILE *InBitp, SYMBOL *cs, unsigned short int *esti_prob, 
                 int num_codeword, short int *totals)
{
  unsigned short int  temp_counts, total_range;
  short int cur_count;
  int		recover_index, k;
  
  total_range=0;
  *totals=0;
       
  for(k=0; k < num_codeword; k++)
  {
    temp_counts=*(esti_prob+k);
    total_range+=temp_counts;
    *(totals+k+1)=*(totals+k)+temp_counts;
  }
       
  cs->scale=total_range;
  cur_count=get_current_count(cs);
  recover_index=convert_symbol_to_int(cur_count, cs, totals, num_codeword);
  remove_symbol_from_stream(InBitp, cs);
  return(recover_index);
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -