phrase.cpp

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C++ 代码 · 共 1,157 行 · 第 1/2 页

CPP
1,157
字号
/****************************************************************************
*
*                            Open Watcom Project
*
*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
*  ========================================================================
*
*    This file contains Original Code and/or Modifications of Original
*    Code as defined in and that are subject to the Sybase Open Watcom
*    Public License version 1.0 (the 'License'). You may not use this file
*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
*    provided with the Original Code and Modifications, and is also
*    available at www.sybase.com/developer/opensource.
*
*    The Original Code and all software distributed under the License are
*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
*    NON-INFRINGEMENT. Please see the License for the specific language
*    governing rights and limitations under the License.
*
*  ========================================================================
*
* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
*               DESCRIBE IT HERE!
*
****************************************************************************/


/*
PHRASE:  phrase-replacement for compression purposes
*/

#include <stdio.h>
#include <ctype.h>
#include "phrase.h"
#include "compress.h"

#define NEW_USAGE   "Usage: phrase filename"

#define HASH_SIZE   691
#define PTBL_SIZE   1720
#define MAX_DATA_SIZE   0xFFFF


char const  PhExt[] = ".PH";


struct Edge;    // Forward declaration.


//
//  Phrase  --specialized string class for storing
//        candidate strings.
//

struct Phrase
{
    char    *_str;
    int     _len;
    int     _bufLen;

    int     _numUses;
    Edge    *_firstEdge;

    Phrase  *_next;

    int     _val;

    static Pool *_pool;
    static void initPool() { _pool = new Pool( sizeof(Phrase) , PTBL_SIZE ); };
    static void freePool() { delete _pool; };

    void *operator new( size_t ) { return _pool->get(); };
    void operator delete( void *p, size_t ) { _pool->release(p); };

    Phrase();
    Phrase( Phrase const &p );
    ~Phrase();

    Phrase &    operator=( Phrase const &p );
    int     operator>( Phrase const &p );
};

static Pool *Phrase::_pool = NULL;


//  Phrase::Phrase  --default constructor.

Phrase::Phrase()
    : _bufLen( 80 )     // Arbitrary large value
{
    _str = new char[_bufLen];
}


//  Phrase::Phrase  --copy constructor.

Phrase::Phrase( Phrase const &p )
    : _numUses( 1 ),
      _firstEdge( NULL ),
      _next( NULL )
{
    _bufLen = p._len;
    _len = p._len;
    _str = new char[_len];
    memcpy( _str, p._str, _len * sizeof( char ) );
}


//  Phrase::operator=   --Assignment operator.

Phrase & Phrase::operator=( Phrase const &p )
{
    _bufLen = p._len;
    _len = p._len;
    _str = (char *) renew( _str, _len * sizeof( char ) );
    memcpy( _str, p._str, _len* sizeof( char ) );
    _numUses = 1;
    _firstEdge = NULL;
    _next = NULL;

    return *this;
}


//  Phrase::~Phrase --destructor.

Phrase::~Phrase()
{
    delete[] _str;
}


//  Phrase::operator>   --comparison operator.

int Phrase::operator>( Phrase const &p )
{
    return _val > p._val;
}


//
//  P_String    --Class to represent phrases AFTER the phrase table
//        has been selected.
//

struct P_String
{
    char    *_str;
    int     _len;
    int     _index;
    P_String    *_next;

    P_String( Phrase const &p );
    ~P_String();

private:
    // Assignment of P_Strings is not permitted.
    P_String( P_String const & ){};
    P_String &  operator=( P_String const & ) { return *this; };
};


//  P_String::P_String  --Constructor.

P_String::P_String( Phrase const &p )
    : _next( NULL )
{
    _len = p._len;
    _str = new char[_len];
    memcpy( _str, p._str, _len );
}


//  P_String::~P_String --Destructor.

P_String::~P_String()
{
    delete[] _str;
}


//
//  Edge    --Class to represent the notion "Phrase B follows Phrase
//        A".
//

struct Edge
{
    Phrase  *_dest;
    Edge    *_next;
    int     _val;
};


//
//  PTable  --Class to store a working set of Phrases.
//

class PTable
{
    Pool    *_edges;

    Phrase  **_phrases;
    int     _size;
    int     _maxSize;

    Phrase  **_htable;

    int     _iterPos;

    // Helper function for heapsort-ing.
    void    heapify( int start );

    // Assignment of PTable's is not permitted, so it's private.
    PTable( PTable const & ){};
    PTable &    operator=( PTable const & ) { return *this; };

public:
    PTable();
    ~PTable();

    Phrase  *match( char * &start );
    Phrase  *find( Phrase *other );
    int     &follows( Phrase *first, Phrase *second );

    void    insert( Phrase *p );

    //  Access function.
    int     size() { return _size; };

    // Iter functions.
    void    start();
    Phrase  *next();

    void    clear();
    void    prune();
};


//  PTable::PTable  --Constructor.

PTable::PTable()
    : _maxSize( PTBL_SIZE ),
      _size( 0 ),
      _iterPos( -1 )
{
    _edges = new Pool( sizeof(Edge) );

    _phrases = new Phrase *[_maxSize];
    // Set all of _phrases to NULL.
    memset( _phrases, 0x00, _maxSize*sizeof(Phrase *) );

    _htable = new Phrase *[HASH_SIZE];
    // Set all of _phrases to NULL.
    memset( _htable, 0x00, HASH_SIZE*sizeof(Phrase *) );
}


//  PTable::~PTable --Destructor.

PTable::~PTable()
{
    // Delete any phrases remaining in the Pool.
    if( _phrases ){
    for( int i=0; i<_size; i++ ){
        delete _phrases[i];
    }
    delete[] _phrases;
    }

    if( _edges ) delete _edges;
    if( _htable ) delete[] _htable;
}


//  PTable::match   --Find the best match for a character string.

#define PH_MIN_LEN  3

Phrase *PTable::match( char * &start )
{
    int     length;
    uint_32 h_val;
    Phrase  *result;

    length = strlen( start );
    if( length == 0 ){
    return NULL;
    }

    result = NULL;

    if( length >= PH_MIN_LEN ){
    // First, try to find a match based on the first three characters.
    h_val = 0;
    memcpy( &h_val, start, PH_MIN_LEN );
    result = _htable[h_val%HASH_SIZE];

    while( result != NULL && result->_len > length ){
        result = result->_next;
    }
    while( result != NULL &&
           memcmp( result->_str, start, result->_len ) != 0 ){

           result = result->_next;
    }
    }
    if( result != NULL ){
    start += result->_len;
    if( isspace( *start ) ){
        start++;
    }
    } else {
    // If necessary, check for a shorter match.
    if( length >= PH_MIN_LEN-1 ){
        length = 1;
        while( length < PH_MIN_LEN-1 && !isspace( start[length] ) ){
        length++;
        }
    }
    h_val = 0;
    memcpy( &h_val, start, length );
    result = _htable[h_val%HASH_SIZE];

    while( result != NULL && result->_len > length ){
        result = result->_next;
    }
    while( result != NULL &&
           memcmp( result->_str, start, result->_len ) != 0 ){
        result = result->_next;
    }
    if( result != NULL ){
        start += result->_len;
        if( isspace( *start ) ){
        start++;
        }
    } else {
        int found_text = 0;
        while( *start != '\0' ){
        if( found_text && isspace(*start) ){
            start++;
            break;
        } else if( !found_text && !isspace(*start) ){
            found_text = 1;
        }
        start++;
        }
    }
    }
    return result;
}


//  PTable::find    --Find a given Phrase in the table.

Phrase *PTable::find( Phrase *other )
{
    uint_32 h_val;
    Phrase  *result;
    int     len = other->_len;
    char    *str = other->_str;

    h_val = 0;
    if( len < PH_MIN_LEN ){
    memcpy( &h_val, str, len );
    } else {
    memcpy( &h_val, str, PH_MIN_LEN );
    }

    result = _htable[h_val%HASH_SIZE];
    while( result != NULL && result->_len > len ){
    result = result->_next;
    }
    while( result != NULL && result->_len==len &&
           memcmp( result->_str, str, len ) != 0 ){
    result = result->_next;
    }
    if( result != NULL && result->_len != len ){
    result = NULL;
    }

    return result;
}


//  PTable::follows --Return a reference to the number of times
//            "second" has followed "first".

int &PTable::follows( Phrase *first, Phrase *second )
{
    Edge    *current = first->_firstEdge;
    while( current != NULL ){
    if( current->_dest == second ){
        break;
    }
    current = current->_next;
    }
    if( current == NULL ){
    current = (Edge *) _edges->get();
    current->_dest = second;
    current->_val = 0;
    current->_next = first->_firstEdge;
    first->_firstEdge = current;
    }

    return *(& current->_val);
}


//  PTable::insert  --insert a Phrase in the table.

void PTable::insert( Phrase *p )
{
    Phrase  *current, *temp;
    uint_32 h_val = 0;
    if( p->_len < PH_MIN_LEN ){
    memcpy( &h_val, p->_str, p->_len );
    } else {
    memcpy( &h_val, p->_str, PH_MIN_LEN );
    }

    current = _htable[h_val%HASH_SIZE];
    temp = NULL;

    while( current != NULL && current->_len > p->_len ){
    temp = current;
    current = current->_next;
    }
    p->_next = current;
    if( temp != NULL ){
    temp->_next = p;
    } else {
    _htable[h_val%HASH_SIZE] = p;
    }

    if( _size == _maxSize ){
    _phrases = (Phrase **) renew( _phrases, 2*_maxSize*sizeof(Phrase*));
    // Set the new part of _phrases to NULL.
    memset( _phrases+_maxSize, 0x00, _maxSize*sizeof(Phrase *) );
    _maxSize *= 2;
    }
    _phrases[_size++] = p;
}


//  PTable::start   --Start iterating through the table.

void PTable::start()
{
    _iterPos = 0;
}


//  PTable::next    --Iterate through the phrase table.

Phrase *PTable::next()
{
    if( _iterPos < 0 || _iterPos >= _size ) return NULL;

    return _phrases[_iterPos++];
}


//  PTable::clear   --Clear the phrase table.

void PTable::clear()
{
    int     i;
    Edge    *current, *temp;

    for( i=0; i<_size; i++ ){
    current = _phrases[i]->_firstEdge;
    while( current != NULL ){
        temp = current;
        current = current->_next;
        _edges->release( temp );
    }
    delete _phrases[i];
    }
    for( i=0; i<HASH_SIZE; i++ ){
    _htable[i] = NULL;
    }
    _iterPos = -1;
    _size = 0;
}


//  PTable::heapify --Helper function for heapsort.

void PTable::heapify( int start )
{
    int     left, right;
    int     max = start;
    Phrase  *temp;

    for( ;; ){
    left = 2*start+1;
    right = left+1;

    if( left < _size && (*_phrases[left]) > (*_phrases[start]) ){
        max = left;
    }
    if( right < _size && (*_phrases[right]) > (*_phrases[max]) ){
        max = right;
    }

    if( max == start ) break;

    temp = _phrases[start];
    _phrases[start] = _phrases[max];
    _phrases[max] = temp;

    start = max;
    }
}


//  PTable::prune   --Eliminate all but the 'best' phrases.

void PTable::prune()
{
    int     old_size;
    int     i;
    Phrase  *temp;
    char    *firstc, *startc;
    int     totalsize;

    // Toss out memory we no longer need.
    delete _edges;
    _edges = NULL;
    delete[] _htable;
    _htable = NULL;

    // Heapsort the Phrases to get the top (PTBL_SIZE) Phrases.
    old_size = _size;
    for( i=0; i<_size; i++ ){
    // Get rid of leading spaces for efficiency reasons.
    firstc = _phrases[i]->_str;
    startc = _phrases[i]->_str;
    while( firstc-startc < _phrases[i]->_len ){
        if( isspace(*firstc) ){
        firstc++;
        } else {
        break;
        }
    }
    if( firstc > startc ){
        memmove( startc, firstc, _phrases[i]->_len-(firstc-startc));
        _phrases[i]->_len -= firstc-startc;
    }
    _phrases[i]->_val = (_phrases[i]->_len-2)*(_phrases[i]->_numUses-1);
    }
    for( i=(_size-2)/2; i>=0; i-- ){
    heapify( i );
    }

    totalsize = 2;  // Size of first 2-byte phrase index.
    for( i=0; i<PTBL_SIZE; i++ ){
    if( _size == 0 ) break;
    totalsize += _phrases[0]->_len+2;  // Phrase length + index length

    if( totalsize > MAX_DATA_SIZE ){
        break;
    }

    temp = _phrases[_size-1];
    _phrases[_size-1] = _phrases[0];
    _phrases[0] = temp;
    _size--;
    heapify(0);
    }

    // Delete the remainder of the phrases.
    for( i=0; i<_size; i++ ){

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?