📄 arith_encode.c
字号:
/* This is the arithmetic subroutine with shorter integer precision *//************************** Start of ARITH.C ************************* * */#include <stdio.h>#include <stdlib.h>#include "arith.h"/* * These four variables define the current state of the arithmetic * coder/decoder. They are assumed to be 16 bits long. Note that * by declaring them as ints, they will actually be 16 bits * on most 80X86 and 680X0 machines, as well as VAXen. */static unsigned short int code; /* The present input code value */static unsigned short int low; /* Start of the current code range */static unsigned short int high; /* End of the current code range */long underflow_bits; /* Number of underflow bits pending *//*unsigned int totals[NO_WORDS+1]; */#define PACIFIER_COUNT 2047
main()
{
int count[27]={0},i,cur_index; /*定义计算各个单词的个数的数组*/
unsigned short int esti_pro[27]; /*定义概率空间数组*/
unsigned int sum=0; /*定义单词总个数变量*/
char codeword[27],ch,outputbitfilename[10]; /*定义样本单词数组,比特文件名字数组*/
BIT_FILE *outputbitfile; /*定义结构体指针*/
FILE *fp;
for(i=0;i<26;i++) codeword[i]='A'+i; /*样本单词*/
codeword[26]=' ';
if((fp=fopen("test.txt","r"))==NULL) return 0;/*打开要编码的文件*/
while(!feof(fp)) /*统计各种单词的个数*/
{
ch=getc(fp);
for(i=0;i<27;i++)
if(ch==codeword[i]) count[i]++;
}
rewind(fp); /*把指针重新放到文件开头处*/
for(i=0;i<27;i++) sum+=count[i]; /*单词总数*/
for(i=0;i<27;i++) esti_pro[i]=(unsigned short int)((1.0*count[i]/sum)*20000);
/*概率空间*/
initialize_arithmetic_encoder();
printf("输入编码后文件的名字(不超过十个字符)eg.\\bit.txt :");
scanf("%s",outputbitfilename); /*输入编码后文件的名字*/
outputbitfile=OpenOutputBitFile(outputbitfilename);/*打开这个文件并返回头指针*/
for(i=0;i<27;i++) putw(esti_pro[i],outputbitfile->file);/*把各个字符出现的概率放进这个文件中,以便解码时读取*/
fwrite(&sum,4,1,outputbitfile->file); /*把统计字符的总个数放入到这个文件中,以便解码时读取*/
while(!feof(fp))
{ch=getc(fp);
if(ch>='A'&&ch<='Z') /*读取字符并编码*/
cur_index=ch-'A';
else cur_index=26;
arith_encode(outputbitfile,esti_pro,27,cur_index);
}
flush_arithmetic_encoder(outputbitfile);/*把编码后没有溢出的那些比特冲洗出来*/
CloseOutputBitFile(outputbitfile);/*关闭文件指针*/
fclose(fp); /*关闭文件指针*/
printf("编码完毕\n");
}
BIT_FILE *OpenOutputBitFile(char *name ){ BIT_FILE *bit_file; bit_file = (BIT_FILE *) calloc( 1, sizeof( BIT_FILE ) ); if ( bit_file == NULL ) return( bit_file ); bit_file->file = fopen( name, "wb" ); bit_file->rack = 0; bit_file->mask = 0x80; bit_file->pacifier_counter = 0; return( bit_file );}void CloseOutputBitFile(BIT_FILE *bit_file ){ if ( bit_file->mask != 0x80 ) if ( putc( bit_file->rack, bit_file->file ) != bit_file->rack ) { printf( "Fatal error in CloseBitFile!\n" ); exit(1); } fclose( bit_file->file ); free( (char *) bit_file );}void OutputBit(BIT_FILE *bit_file, int bit){ if ( bit ) bit_file->rack |= bit_file->mask; bit_file->mask >>= 1; if ( bit_file->mask == 0 ) { if ( putc( bit_file->rack, bit_file->file ) != bit_file->rack ) { printf( "Fatal error in OutputBit!\n" ); exit(1); } else if ( ( bit_file->pacifier_counter++ & PACIFIER_COUNT ) == 0 ) ; /* putc( '.', stdout ); */ bit_file->rack = 0; bit_file->mask = 0x80; }}/* * Everything from here down define the arithmetic coder section * of the program. *//* * This routine must be called to initialize the encoding process. * The high register is initialized to all 1s, and it is assumed that * it has an infinite string of 1s to be shifted into the lower bit * positions when needed. */void initialize_arithmetic_encoder(){ low = 0; high = 0xffff; underflow_bits = 0;}/* * At the end of the encoding process, there are still significant * bits left in the high and low registers. We output two bits, * plus as many underflow bits as are necessary. */void flush_arithmetic_encoder(BIT_FILE *stream ){ OutputBit( stream, low & 0x4000 ); underflow_bits++; while ( underflow_bits-- > 0 ) OutputBit( stream, ~low & 0x4000 ); /* OutputBits( stream, 0L, 16 ); */}/* * This routine is called to encode a symbol. The symbol is passed * in the SYMBOL structure as a low count, a high count, and a range, * instead of the more conventional probability ranges. The encoding * process takes two steps. First, the values of high and low are * updated to take into account the range restriction created by the * new symbol. Then, as many bits as possible are shifted out to * the output stream. Finally, high and low are stable again and * the routine returns. The return value is the output bits by this call. */int encode_symbol(BIT_FILE *stream, SYMBOL *s){ long range; int output_bits;/* * These three lines rescale high and low for the new symbol. */ range = (long) ( high-low ) + 1; high = low + (unsigned short int) (( range * s->high_count ) / s->scale - 1 ); low = low + (unsigned short int) (( range * s->low_count ) / s->scale );/* * This loop turns out new bits until high and low are far enough * apart to have stabilized. */ output_bits=0; for ( ; ; ) {/* * If this test passes, it means that the MSDigits match, and can * be sent to the output stream. */ if ( ( high & 0x8000 ) == ( low & 0x8000 ) ) { OutputBit( stream, high & 0x8000 ); output_bits++; while ( underflow_bits > 0 ) { OutputBit( stream, ~high & 0x8000 ); output_bits++; underflow_bits--; } }/* * If this test passes, the numbers are in danger of underflow, because * the MSDigits don't match, and the 2nd digits are just one apart. */ else if ( ( low & 0x4000 ) && !( high & 0x4000 )) { underflow_bits += 1; low &= 0x3fff; high |= 0x4000; } else return(output_bits); low <<= 1; high <<= 1; high |= 1; }}/******************************************************************* following routines are related to decoding of arithmatic codec. ********************************************************************/BIT_FILE *OpenInputBitFile(char *name){ BIT_FILE *bit_file; bit_file = (BIT_FILE *) calloc( 1, sizeof( BIT_FILE ) ); if ( bit_file == NULL ) return( bit_file ); bit_file->file = fopen( name, "rb" ); bit_file->rack = 0; bit_file->mask = 0x80; bit_file->pacifier_counter = 0; return( bit_file );}void CloseInputBitFile(BIT_FILE *bit_file){ fclose( bit_file->file ); free( (char *) bit_file );}int InputBit(BIT_FILE *bit_file){ int value; if ( bit_file->mask == 0x80 ) { bit_file->rack = getc( bit_file->file ); if ( bit_file->rack == EOF ) {/* printf( "Fatal error in InputBit!\n" ); exit(1); */ } if ( ( bit_file->pacifier_counter++ & PACIFIER_COUNT ) == 0 ) ; /* putc( '.', stdout ); */ } value = bit_file->rack & bit_file->mask; bit_file->mask >>= 1; if ( bit_file->mask == 0 ) bit_file->mask = 0x80; return( value ? 1 : 0 );}/* * When decoding, this routine is called to figure out which symbol * is presently waiting to be decoded. This routine expects to get * the current model scale in the s->scale parameter, and it returns * a count that corresponds to the present floating point code: * * code = count / s->scale */short int get_current_count( SYMBOL *s ){ long range; short int count; range = (long) ( high - low ) + 1; count = (short int) ((((long) ( code - low ) + 1 ) * s->scale-1 ) / range ); return( count );}/* * During decompression, we have to search through the table until * we find the symbol that straddles the "count" parameter. When * it is found, it is returned. The reason for also setting the * high count and low count is so that symbol can be properly removed * from the arithmetic coded input. */int convert_symbol_to_int(short int count, SYMBOL *s, short int *totals, int num_codeword){ int c; for ( c = num_codeword ; count < *(totals+c) ; c-- ) ; s->high_count = totals[ c + 1 ]; s->low_count = totals[ c ]; return( c );}/* * This routine is called to initialize the state of the arithmetic * decoder. This involves initializing the high and low registers * to their conventional starting values, plus reading the first * 16 bits from the input stream into the code value. */int initialize_arithmetic_decoder( BIT_FILE *stream ){ int i; int input_bits; code = 0; input_bits=0; for ( i = 0 ; i < 16 ; i++ ) { code <<= 1; code += InputBit( stream ); input_bits++; } low = 0; high = 0xffff; return(input_bits);}/* * Just figuring out what the present symbol is doesn't remove * it from the input bit stream. After the character has been * decoded, this routine has to be called to remove it from the * input stream. */int remove_symbol_from_stream( BIT_FILE *stream,SYMBOL *s){ long range; int input_bits;/* * First, the range is expanded to account for the symbol removal. */ range = (long)( high - low ) + 1; high = low + (unsigned short int) (( range * s->high_count ) / s->scale - 1 ); low = low + (unsigned short int) (( range * s->low_count ) / s->scale );/* * Next, any possible bits are shipped out. */ input_bits=0; for ( ; ; ) {/* * If the MSDigits match, the bits will be shifted out. */ if ( ( high & 0x8000 ) == ( low & 0x8000 ) ) { }/* * Else, if underflow is threatening, shift out the 2nd MSDigit. */ else if ((low & 0x4000) == 0x4000 && (high & 0x4000) == 0 ) { code ^= 0x4000; low &= 0x3fff; high |= 0x4000; } else /* * Otherwise, nothing can be shifted out, so I return. */ return(input_bits); low <<= 1; high <<= 1; high |= 1; code <<= 1; code += InputBit( stream ); input_bits++; }}/********************************************************************* arith_encode is the combination routine that will be easy to use. The only thing you need to do is to get the distribution of random variable X before calling this routine.
The discrete distribution array "esti_prob[]", the number of symbols
in the array "num_codeword" and current encoding value "cur_index"
must be transferred to this routine. The return value is the output bits by this call**********************************************************************/ int arith_encode(BIT_FILE *OutBitp, unsigned short int *esti_prob,
int num_codeword, int cur_index)
{
unsigned short int cur_start, cur_end, cur_counts, total_range;
SYMBOL cs;
int k;
cur_start=0;
cur_end=0;
total_range=0;
for(k=0; k < num_codeword; k++)
{
cur_counts=*(esti_prob+k);
cur_start=cur_end;
cur_end+=cur_counts;
if(k==cur_index)
{
cs.low_count=cur_start;
cs.high_count=cur_end;
}
total_range+=cur_counts;
}
cs.scale=total_range;
return(encode_symbol(OutBitp, &cs));
}
/********************************************************************* arith_decode is the combination routine that will be easy to use. The only thing you need to do is to get
the distribution of random variable X before calling this routine.
The following parameters should be transferred to this routine:
The discrete distribution array "esti_prob[]",
the number of symbols in the array "num_codeword",
a working variable "cs" and a working array
"totals[]" which is of the same size as "esti_prob[]".
The return value is the decoded value.**********************************************************************/ int arith_decode(BIT_FILE *InBitp, SYMBOL *cs, unsigned short int *esti_prob,
int num_codeword, short int *totals)
{
unsigned short int temp_counts, total_range;
short int cur_count;
int recover_index, k;
total_range=0;
*totals=0;
for(k=0; k < num_codeword; k++)
{
temp_counts=*(esti_prob+k);
total_range+=temp_counts;
*(totals+k+1)=*(totals+k)+temp_counts;
}
cs->scale=total_range;
cur_count=get_current_count(cs);
recover_index=convert_symbol_to_int(cur_count, cs, totals, num_codeword);
remove_symbol_from_stream(InBitp, cs);
return(recover_index);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -