phrase.cpp
来自「开放源码的编译器open watcom 1.6.0版的源代码」· C++ 代码 · 共 1,157 行 · 第 1/2 页
CPP
1,157 行
/****************************************************************************
*
* Open Watcom Project
*
* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
* ========================================================================
*
* This file contains Original Code and/or Modifications of Original
* Code as defined in and that are subject to the Sybase Open Watcom
* Public License version 1.0 (the 'License'). You may not use this file
* except in compliance with the License. BY USING THIS FILE YOU AGREE TO
* ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
* provided with the Original Code and Modifications, and is also
* available at www.sybase.com/developer/opensource.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
* NON-INFRINGEMENT. Please see the License for the specific language
* governing rights and limitations under the License.
*
* ========================================================================
*
* Description: WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
* DESCRIBE IT HERE!
*
****************************************************************************/
/*
PHRASE: phrase-replacement for compression purposes
*/
#include <stdio.h>
#include <ctype.h>
#include "phrase.h"
#include "compress.h"
#define NEW_USAGE "Usage: phrase filename"
#define HASH_SIZE 691
#define PTBL_SIZE 1720
#define MAX_DATA_SIZE 0xFFFF
char const PhExt[] = ".PH";
struct Edge; // Forward declaration.
//
// Phrase --specialized string class for storing
// candidate strings.
//
struct Phrase
{
char *_str;
int _len;
int _bufLen;
int _numUses;
Edge *_firstEdge;
Phrase *_next;
int _val;
static Pool *_pool;
static void initPool() { _pool = new Pool( sizeof(Phrase) , PTBL_SIZE ); };
static void freePool() { delete _pool; };
void *operator new( size_t ) { return _pool->get(); };
void operator delete( void *p, size_t ) { _pool->release(p); };
Phrase();
Phrase( Phrase const &p );
~Phrase();
Phrase & operator=( Phrase const &p );
int operator>( Phrase const &p );
};
static Pool *Phrase::_pool = NULL;
// Phrase::Phrase --default constructor.
Phrase::Phrase()
: _bufLen( 80 ) // Arbitrary large value
{
_str = new char[_bufLen];
}
// Phrase::Phrase --copy constructor.
Phrase::Phrase( Phrase const &p )
: _numUses( 1 ),
_firstEdge( NULL ),
_next( NULL )
{
_bufLen = p._len;
_len = p._len;
_str = new char[_len];
memcpy( _str, p._str, _len * sizeof( char ) );
}
// Phrase::operator= --Assignment operator.
Phrase & Phrase::operator=( Phrase const &p )
{
_bufLen = p._len;
_len = p._len;
_str = (char *) renew( _str, _len * sizeof( char ) );
memcpy( _str, p._str, _len* sizeof( char ) );
_numUses = 1;
_firstEdge = NULL;
_next = NULL;
return *this;
}
// Phrase::~Phrase --destructor.
Phrase::~Phrase()
{
delete[] _str;
}
// Phrase::operator> --comparison operator.
int Phrase::operator>( Phrase const &p )
{
return _val > p._val;
}
//
// P_String --Class to represent phrases AFTER the phrase table
// has been selected.
//
struct P_String
{
char *_str;
int _len;
int _index;
P_String *_next;
P_String( Phrase const &p );
~P_String();
private:
// Assignment of P_Strings is not permitted.
P_String( P_String const & ){};
P_String & operator=( P_String const & ) { return *this; };
};
// P_String::P_String --Constructor.
P_String::P_String( Phrase const &p )
: _next( NULL )
{
_len = p._len;
_str = new char[_len];
memcpy( _str, p._str, _len );
}
// P_String::~P_String --Destructor.
P_String::~P_String()
{
delete[] _str;
}
//
// Edge --Class to represent the notion "Phrase B follows Phrase
// A".
//
struct Edge
{
Phrase *_dest;
Edge *_next;
int _val;
};
//
// PTable --Class to store a working set of Phrases.
//
class PTable
{
Pool *_edges;
Phrase **_phrases;
int _size;
int _maxSize;
Phrase **_htable;
int _iterPos;
// Helper function for heapsort-ing.
void heapify( int start );
// Assignment of PTable's is not permitted, so it's private.
PTable( PTable const & ){};
PTable & operator=( PTable const & ) { return *this; };
public:
PTable();
~PTable();
Phrase *match( char * &start );
Phrase *find( Phrase *other );
int &follows( Phrase *first, Phrase *second );
void insert( Phrase *p );
// Access function.
int size() { return _size; };
// Iter functions.
void start();
Phrase *next();
void clear();
void prune();
};
// PTable::PTable --Constructor.
PTable::PTable()
: _maxSize( PTBL_SIZE ),
_size( 0 ),
_iterPos( -1 )
{
_edges = new Pool( sizeof(Edge) );
_phrases = new Phrase *[_maxSize];
// Set all of _phrases to NULL.
memset( _phrases, 0x00, _maxSize*sizeof(Phrase *) );
_htable = new Phrase *[HASH_SIZE];
// Set all of _phrases to NULL.
memset( _htable, 0x00, HASH_SIZE*sizeof(Phrase *) );
}
// PTable::~PTable --Destructor.
PTable::~PTable()
{
// Delete any phrases remaining in the Pool.
if( _phrases ){
for( int i=0; i<_size; i++ ){
delete _phrases[i];
}
delete[] _phrases;
}
if( _edges ) delete _edges;
if( _htable ) delete[] _htable;
}
// PTable::match --Find the best match for a character string.
#define PH_MIN_LEN 3
Phrase *PTable::match( char * &start )
{
int length;
uint_32 h_val;
Phrase *result;
length = strlen( start );
if( length == 0 ){
return NULL;
}
result = NULL;
if( length >= PH_MIN_LEN ){
// First, try to find a match based on the first three characters.
h_val = 0;
memcpy( &h_val, start, PH_MIN_LEN );
result = _htable[h_val%HASH_SIZE];
while( result != NULL && result->_len > length ){
result = result->_next;
}
while( result != NULL &&
memcmp( result->_str, start, result->_len ) != 0 ){
result = result->_next;
}
}
if( result != NULL ){
start += result->_len;
if( isspace( *start ) ){
start++;
}
} else {
// If necessary, check for a shorter match.
if( length >= PH_MIN_LEN-1 ){
length = 1;
while( length < PH_MIN_LEN-1 && !isspace( start[length] ) ){
length++;
}
}
h_val = 0;
memcpy( &h_val, start, length );
result = _htable[h_val%HASH_SIZE];
while( result != NULL && result->_len > length ){
result = result->_next;
}
while( result != NULL &&
memcmp( result->_str, start, result->_len ) != 0 ){
result = result->_next;
}
if( result != NULL ){
start += result->_len;
if( isspace( *start ) ){
start++;
}
} else {
int found_text = 0;
while( *start != '\0' ){
if( found_text && isspace(*start) ){
start++;
break;
} else if( !found_text && !isspace(*start) ){
found_text = 1;
}
start++;
}
}
}
return result;
}
// PTable::find --Find a given Phrase in the table.
Phrase *PTable::find( Phrase *other )
{
uint_32 h_val;
Phrase *result;
int len = other->_len;
char *str = other->_str;
h_val = 0;
if( len < PH_MIN_LEN ){
memcpy( &h_val, str, len );
} else {
memcpy( &h_val, str, PH_MIN_LEN );
}
result = _htable[h_val%HASH_SIZE];
while( result != NULL && result->_len > len ){
result = result->_next;
}
while( result != NULL && result->_len==len &&
memcmp( result->_str, str, len ) != 0 ){
result = result->_next;
}
if( result != NULL && result->_len != len ){
result = NULL;
}
return result;
}
// PTable::follows --Return a reference to the number of times
// "second" has followed "first".
int &PTable::follows( Phrase *first, Phrase *second )
{
Edge *current = first->_firstEdge;
while( current != NULL ){
if( current->_dest == second ){
break;
}
current = current->_next;
}
if( current == NULL ){
current = (Edge *) _edges->get();
current->_dest = second;
current->_val = 0;
current->_next = first->_firstEdge;
first->_firstEdge = current;
}
return *(& current->_val);
}
// PTable::insert --insert a Phrase in the table.
void PTable::insert( Phrase *p )
{
Phrase *current, *temp;
uint_32 h_val = 0;
if( p->_len < PH_MIN_LEN ){
memcpy( &h_val, p->_str, p->_len );
} else {
memcpy( &h_val, p->_str, PH_MIN_LEN );
}
current = _htable[h_val%HASH_SIZE];
temp = NULL;
while( current != NULL && current->_len > p->_len ){
temp = current;
current = current->_next;
}
p->_next = current;
if( temp != NULL ){
temp->_next = p;
} else {
_htable[h_val%HASH_SIZE] = p;
}
if( _size == _maxSize ){
_phrases = (Phrase **) renew( _phrases, 2*_maxSize*sizeof(Phrase*));
// Set the new part of _phrases to NULL.
memset( _phrases+_maxSize, 0x00, _maxSize*sizeof(Phrase *) );
_maxSize *= 2;
}
_phrases[_size++] = p;
}
// PTable::start --Start iterating through the table.
void PTable::start()
{
_iterPos = 0;
}
// PTable::next --Iterate through the phrase table.
Phrase *PTable::next()
{
if( _iterPos < 0 || _iterPos >= _size ) return NULL;
return _phrases[_iterPos++];
}
// PTable::clear --Clear the phrase table.
void PTable::clear()
{
int i;
Edge *current, *temp;
for( i=0; i<_size; i++ ){
current = _phrases[i]->_firstEdge;
while( current != NULL ){
temp = current;
current = current->_next;
_edges->release( temp );
}
delete _phrases[i];
}
for( i=0; i<HASH_SIZE; i++ ){
_htable[i] = NULL;
}
_iterPos = -1;
_size = 0;
}
// PTable::heapify --Helper function for heapsort.
void PTable::heapify( int start )
{
int left, right;
int max = start;
Phrase *temp;
for( ;; ){
left = 2*start+1;
right = left+1;
if( left < _size && (*_phrases[left]) > (*_phrases[start]) ){
max = left;
}
if( right < _size && (*_phrases[right]) > (*_phrases[max]) ){
max = right;
}
if( max == start ) break;
temp = _phrases[start];
_phrases[start] = _phrases[max];
_phrases[max] = temp;
start = max;
}
}
// PTable::prune --Eliminate all but the 'best' phrases.
void PTable::prune()
{
int old_size;
int i;
Phrase *temp;
char *firstc, *startc;
int totalsize;
// Toss out memory we no longer need.
delete _edges;
_edges = NULL;
delete[] _htable;
_htable = NULL;
// Heapsort the Phrases to get the top (PTBL_SIZE) Phrases.
old_size = _size;
for( i=0; i<_size; i++ ){
// Get rid of leading spaces for efficiency reasons.
firstc = _phrases[i]->_str;
startc = _phrases[i]->_str;
while( firstc-startc < _phrases[i]->_len ){
if( isspace(*firstc) ){
firstc++;
} else {
break;
}
}
if( firstc > startc ){
memmove( startc, firstc, _phrases[i]->_len-(firstc-startc));
_phrases[i]->_len -= firstc-startc;
}
_phrases[i]->_val = (_phrases[i]->_len-2)*(_phrases[i]->_numUses-1);
}
for( i=(_size-2)/2; i>=0; i-- ){
heapify( i );
}
totalsize = 2; // Size of first 2-byte phrase index.
for( i=0; i<PTBL_SIZE; i++ ){
if( _size == 0 ) break;
totalsize += _phrases[0]->_len+2; // Phrase length + index length
if( totalsize > MAX_DATA_SIZE ){
break;
}
temp = _phrases[_size-1];
_phrases[_size-1] = _phrases[0];
_phrases[0] = temp;
_size--;
heapify(0);
}
// Delete the remainder of the phrases.
for( i=0; i<_size; i++ ){
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?