📄 stats.c
字号:
/******************************************************************************File: stats.cAuthors: John Carpinelli (johnfc@ecr.mu.oz.au) Wayne Salamonsen (wbs@mundil.cs.mu.oz.au) Lang Stuiver (langs@cs.mu.oz.au) Andrew Turpin (aht@cs.mu.oz.au)Purpose: Data compression and revised arithmetic coding method. Including modified Fenwick structure so coding is on-line.Based on: A. Moffat, R. Neal, I.H. Witten, "Arithmetic Coding Revisited", Proc. IEEE Data Compression Conference, Snowbird, Utah, March 1995. A. Moffat, "An improved data structure for cummulative probability tables", Software-Practice and Experience, 1998. To appear.Copyright 1995 John Carpinelli and Wayne Salamonsen, All Rights Reserved.Copyright 1996, Lang Stuiver. All Rights Reserved.Copyright 1998, Andrew Turpin. All Rights Reserved.These programs are supplied free of charge for research purposes only,and may not sold or incorporated into any commercial product. There isABSOLUTELY NO WARRANTY of any sort, nor any undertaking that they arefit for ANY PURPOSE WHATSOEVER. Use them at your own risk. If you dohappen to find a bug, or have modifications to suggest, please reportthe same to Alistair Moffat, alistair@cs.mu.oz.au. The copyrightnotice above and this statement of conditions must remain an integralpart of each and every copy made of these files.******************************************************************************/#include <stdio.h>#include <stdlib.h>#include "arith.h"#include "stats.h"#ifdef RCSIDstatic char rcsid[] = "$Id: stats.new.c,v 1.1 1998/09/18 00:36:26 aht Exp aht $";#endif#ifdef VARY_NBITS static freq_value Max_frequency;#else# define Max_frequency ((freq_value) 1 << F_bits)#endif/* MOST_PROB_AT_END: * Put most probable symbol at end of range (for more accurate approximations) * This option influences the compressed data, but is not recorded in the * data stream. It is for experimental purposes and it is recommended * that it is left #defined. */#define MOST_PROB_AT_END#ifdef MOST_PROB_AT_END char *stats_desc = "Cumulative stats with Moffat tree (MPS at front)";#else char *stats_desc = "Cumulative stats with Moffat tree";#endifstatic void get_interval(context *pContext, freq_value *pLow, freq_value *pHigh, int symbol);static void halve_context(context *pContext);/* INCR_SYMBOL_PROB increments the specified symbol probability by the 'incr' * amount. If the most probable symbol is maintined at the end of the coding * range (MOST_PROB_AT_END #defined), then both INCR_SYMBOL_PROB_ACTUAL and * INCR_SYMBOL_PROB_MPS are used. Otherwise, just INCR_SYMBOL_PROB_ACTUAL * is used. *//* INCR_SYMBOL_PROB_ACTUAL: Increment 'symbol' in 'pContext' by inc1' */#define INCR_SYMBOL_PROB_ACTUAL(pContext, symbol, inc1) \ freq_value _inc = (inc1); \ freq_value *_tree = pContext->tree; \ int p=symbol; \ /* Increment stats */ \ while (p > 0) { \ _tree[p] += _inc; \ p = BACK(p); \ } \ pContext->total += _inc; \/* * INCR_SYMBOL_PROB_MPS: Update most frequent symbol, assuming 'symbol' * in 'pContext' was just incremented. *//* Assumes _inc already set by macro above *//* And _low and _high set before also */#define INCR_SYMBOL_PROB_MPS(pContext, symbol) \ { \ if (symbol == pContext->most_freq_symbol) \ pContext->most_freq_count += _inc; \ else if ((_high)-(_low)+(_inc) > pContext->most_freq_count) \ { pContext->most_freq_symbol = symbol; \ pContext->most_freq_count = (_high) - (_low) + (_inc); \ pContext->most_freq_pos = _low; \ } \ else if (symbol < pContext->most_freq_symbol) \ pContext->most_freq_pos += _inc; \ }/* * Define INCR_SYMBOL_PROB. Definition depends on whether most probable * symbol needs to be remembered */#ifdef MOST_PROB_AT_END# define INCR_SYMBOL_PROB(pContext, symbol, low1, high1, inc1) \ do { \ freq_value _low = low1; \ freq_value _high = high1; \ INCR_SYMBOL_PROB_ACTUAL(pContext, symbol, inc1) \ INCR_SYMBOL_PROB_MPS (pContext, symbol) \ } while (0)#else# define INCR_SYMBOL_PROB(pContext, symbol, low1, high1, inc1) \ do { \ INCR_SYMBOL_PROB_ACTUAL(pContext, symbol, inc1) \ } while (0)#endif#define MIN(a,b) ((a) < (b) ? (a) : (b))#define GET_COUNT(pContext, symbol, c) \ do { \ if ((symbol) & 1) c = pContext->tree[symbol]; \ else { \ int q = symbol + 1; \ int z = MIN(FORW(symbol), pContext->max_length + 1); \ c = pContext->tree[symbol]; \ while (q < z) { \ c -= pContext->tree[q]; \ q = FORW(q); \ } \ } \ } while (0)/* * Zero frequency probability is specified as a count out of the total * frequency count. It is stored as the first item in the tree (item * 1). Luckily, a Moffat tree is defined such that we can read the * value of item 1 directly (pContext->tree[1]), although it cannot be * updated directly. After each symbol is coded, adjust_zero_freq() is * called to ensure that the zero frequency probability stored in the * tree is still accurate (and changes it if it has changed). */#define adjust_zero_freq(pContext) \do { freq_value diff; \ diff = ZERO_FREQ_PROB(pContext) - pContext->tree[1]; \ if (diff != 0) \ INCR_SYMBOL_PROB(pContext, 1, 0, pContext->tree[1], diff); \ } while (0)/* * ZERO_FREQ_PROB defines the count for the escape symbol. We * implemented a variation of the XC method (which we call AX). We * create a special escape symbol, which we keep up to date with the * count of "number of singletons + 1". To achieve this, but still be * efficient with static contexts, we falsely increment the number of * singletons at the start of modelling for dynamic contexts, and keep * it at 0 for static contexts. This way, nSingletons is always our * zero frequency probability, without the need to check if the context * is static or dynamic (remember that this operation would be done for * each symbol coded). *//* Zero frequency symbol probability for context `ctx' */#define ZERO_FREQ_PROB(ctx) ((freq_value)ctx->nSingletons)void init_zero_freq(context *pContext){ /* nSingletons is now actually nSingletons + 1, but this means we can use nSingletons directly as zero_freq_prob (see above) */ if (pContext->type == DYNAMIC) pContext->nSingletons += pContext->incr; else pContext->nSingletons = 0;}/* * * Create a new frequency table using a binary index tree. * Table may be STATIC or DYNAMIC depending on the type parameter. * DYNAMIC tables may grow above their intial length as new symbols * are installed. * * max_length is set to 2^ceil(log_2 length). * Valid tree indicies are 1..max_length-1. * (max_length, refers to the maximum length of the structure before it * needs to expand) */context *create_context(int length, int type){ context *pContext; int i; int size = 1;#ifdef VARY_NBITS /* Ensure max frequency set up. */ Max_frequency = ((freq_value) 1 << F_bits);#endif /* * increment length to accommodate the fact * that symbol 0 is stored at pos 2 in the array. * (Escape symbol is at pos 1, pos 0 is not used). */ length+=2; /* round length up to next power of two */ while (size < length-1) size = size << 1; /* malloc context structure and array for frequencies */ if (((pContext = (context *) malloc(sizeof(context))) == NULL) || ((pContext->tree = (freq_value *) malloc((size+1)*sizeof(freq_value))) == NULL)) { fprintf(stderr, "stats: not enough memory to create context\n"); exit(1); } pContext->initial_size = size; /* save for purging later */ pContext->length = 1; /* current no. of symbols */ pContext->total = 0; /* total frequency */ pContext->nSymbols = 1; /* count of symbols */ pContext->type = type; /* is context DYNAMIC or STATIC */ pContext->max_length = size; /* no. symbols before growing */ pContext->most_freq_symbol = -1; /* Initially no most_freq_symbol */ pContext->most_freq_count = 0; pContext->most_freq_pos = 0; /* initialise contents of tree array to zero */ for (i = 0; i <= pContext->max_length; i++) pContext->tree[i] = 0; /* increment is initially 2 ^ f */ pContext->incr = (freq_value) 1 << F_bits; pContext->nSingletons = 0; init_zero_freq(pContext); adjust_zero_freq(pContext); return pContext; /* return a pointer to the context */}/* * * install a new symbol in a context's frequency table * returns 0 if successful, TOO_MANY_SYMBOLS or NO_MEMORY if install fails * */int install_symbol(context *pContext, int symbol){ int i; freq_value low, high; /* Increment because first user symbol (symbol 0) is stored at array position 2 */ symbol+=2; /* * if new symbol is greater than current array length then double length * of array */ while (symbol > pContext->max_length) { pContext->tree = (freq_value *) realloc(pContext->tree, (2*pContext->max_length+1) * sizeof(freq_value)); if (pContext->tree == NULL) { fprintf(stderr, "stats: not enough memory to expand context\n"); return NO_MEMORY; } /* clear new part of table to zero */ for (i = pContext->max_length+1; i <= 2*pContext->max_length; i++) pContext->tree[i] = 0; pContext->max_length <<= 1; } /* check that we are not installing too many symbols */ if (((pContext->nSymbols + 1) << 1) >= Max_frequency) /* * cannot install another symbol as all frequencies will * halve to one and an infinite loop will result */ return TOO_MANY_SYMBOLS; if (symbol > pContext->length) /* update length if necessary */ pContext->length = symbol; pContext->nSymbols++; /* increment count of symbols */ get_interval(pContext, &low, &high, symbol); /* update the number of singletons if context is DYNAMIC */ INCR_SYMBOL_PROB(pContext, symbol, low, high, pContext->incr); if (pContext->type == DYNAMIC) pContext->nSingletons += pContext->incr; adjust_zero_freq(pContext); /* halve frequency counts if total greater than Max_frequency */ while (pContext->total > Max_frequency) halve_context(pContext); return 0;}/* install_symbol() *//* * * encode a symbol given its context * the lower and upper bounds are determined using the frequency table, * and then passed on to the coder * if the symbol has zero frequency, code an escape symbol and * return NOT_KNOWN otherwise returns 0 * */int encode(context *pContext, int symbol){ freq_value low, high, low_w, high_w; symbol+=2; if ((symbol > 0) && (symbol <= pContext->max_length)) { if (pContext->most_freq_symbol == symbol) { low = pContext->most_freq_pos; high = low + pContext->most_freq_count; } else get_interval(pContext, &low, &high, symbol); } else low = high = 0; if (low == high) { if (ZERO_FREQ_PROB(pContext) == 0) { fprintf(stderr,"stats: cannot code zero-probability novel symbol"); abort(); exit(1); } /* encode the escape symbol if unknown symbol */ symbol = 1; if (pContext->most_freq_symbol == 1) { low = pContext->most_freq_pos; high = low + pContext->most_freq_count; } else get_interval(pContext, &low, &high, symbol); } /* Shift high and low if required so that most probable symbol * is at the end of the range * (Shifted values are low_w, and high_w, as original values * are needed when updating the stats) */#ifdef MOST_PROB_AT_END if (symbol > pContext->most_freq_symbol) { low_w = low - pContext->most_freq_count; high_w = high - pContext->most_freq_count; } else if (symbol == pContext->most_freq_symbol) { low_w = pContext->total - pContext->most_freq_count; high_w = pContext->total; } else { low_w = low; high_w = high; }#else low_w = low; high_w = high;#endif /* call the coder with the low, high and total for this symbol * (with low_w, high_w: Most probable symbol moved to end of range) */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -