phrase.cpp

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C++ 代码 · 共 1,157 行 · 第 1/2 页

CPP
1,157
字号
    delete _phrases[i];
    }
    while( _size < old_size ){
    if( _phrases[_size]->_val > 4 ) break;
    delete _phrases[_size];
    _size++;
    }
    memmove( _phrases, _phrases+_size, (old_size-_size)*sizeof(Phrase*) );
    _size = old_size-_size;
}



//  HFPhrases::HFPhrases()  --Default constructor.

HFPhrases::HFPhrases( HFSDirectory * d_file, InFile* (*firstf)(),
                      InFile* (*nextf)() )
    : _firstf( firstf ),
      _nextf( nextf ),
      _oldPtable( NULL ),
      _newPtable( NULL ),
      _result( NULL ),
      _htable( NULL ),
      _scanner( NULL ),
      _size( 0 ),
      _numPhrases( 0 )
{
    d_file->addFile( this, "|Phrases" );
}



//  HFPhrases::~HFPhrases() --Destructor.

HFPhrases::~HFPhrases()
{
    if( _oldPtable ) delete _oldPtable;
    if( _newPtable ) delete _newPtable;
    if( _result ){
    for( int i=0; i<_resultSize ; i++ ){
        delete _result[i];
    }
    delete[] _result;
    }
    if( _htable ){
    delete[] _htable;
    }
}


//  HFPhrases::size --Overrides Dumpable::size.

uint_32 HFPhrases::size()
{
    if( _size > 0 ){
    return _size;
    }

    if( _result == NULL ){
    createQueue( "phrases.ph" );
    }

    CompWriter  riter;
    CompReader  reader( &riter );
    P_String    *string;
    int     i;

    _size = 10;     // Size of the phrase table header.
    _phSize = 0;

    for( i=0; i<_numPhrases; i++ ){
    string = _result[i];
    _phSize += string->_len;
    _size += sizeof(uint_16) + reader.compress( string->_str, string->_len );
    }

    return _size;
}



//  HFPhrases::dump --Overrides Dumpable::dump.

int HFPhrases::dump( OutFile *dest )
{
    const uint_16 magic = 0x0100;
    int       i;

    dest->writebuf( &_numPhrases, sizeof( uint_16 ), 1 );
    dest->writebuf( &magic, sizeof( uint_16 ), 1 );
    dest->writebuf( &_phSize, sizeof( uint_32 ), 1 );

    uint_16 curr_size = (uint_16) ( (_numPhrases+1) * sizeof( uint_16 ) );
    for( i=0; i<_numPhrases; i++ ){
    dest->writebuf( &curr_size, sizeof( uint_16 ), 1 );
    curr_size = (uint_16) (curr_size + _result[i]->_len);
    }
    dest->writebuf( &curr_size, sizeof( uint_16 ), 1 );

    CompOutFile riter( dest );
    CompReader  reader( &riter );
    P_String    *string;

    for( i=0; i<_numPhrases; i++ ){
    string = _result[i];
    reader.compress( string->_str, string->_len );
    }
    reader.flush();

    return 1;
}



//  HFPhrases::startInput   --Prepare to read the first block
//                of input.

void HFPhrases::startInput()
{
    InFile  *input;

    if( _scanner ) delete _scanner;
    _scanner = NULL;
    input = (*_firstf)();
    if( input == NULL ) return;
    _scanner = new Scanner( input );
}


//  HFPhrases::nextInput    --Get the next block of input.
//

char* HFPhrases::nextInput()
{
    InFile  *input;
    Token   *next;
    char    *result;

    if( _scanner == NULL ) return NULL;

    for( ;; ){
    next = _scanner->next();

    if( next->_type == TOK_END ){
        delete _scanner;
        _scanner = NULL;
        input = (*_nextf)();
        if( input == NULL ){
        return NULL;
        }
        _scanner = new Scanner( input );

    } else if( next->_type != TOK_TEXT ){
        int push_level, done = 0;

        for( ;; ){
        switch( next->_type ){
        case TOK_END:
        case TOK_TEXT:  // deliberate fall-through
            done = 1;
            break;

        case TOK_COMMAND:
            if( strcmp( next->_text, "colortbl" )==0 ||
            strcmp( next->_text, "fonttbl" )==0  ||
            strcmp( next->_text, "footnote" )==0 ||
            strcmp( next->_text, "stylesheet" )==0 ){
            push_level = 0;
            do{
                next = _scanner->next();
                if( next->_type == TOK_PUSH_STATE ){
                push_level++;
                } else if( next->_type == TOK_POP_STATE ){
                push_level--;
                } else if( next->_type == TOK_END ){
                break;
                }
            }while( push_level >= 0 );
            } else if( strcmp( next->_text, "v" )==0 &&
                   (!next->_hasValue ||
                next->_value != 0 ) ){
            push_level = 0;
            do{
                next = _scanner->next();
                if( next->_type == TOK_PUSH_STATE ){
                push_level++;
                } else if( next->_type == TOK_POP_STATE ){
                push_level--;
                } else if( next->_type == TOK_COMMAND &&
                       strcmp( next->_text, "v" )==0 &&
                       next->_hasValue &&
                       next->_value == 0 ){
                break;
                } else if( next->_type == TOK_END ){
                break;
                }
            }while( push_level >= 0 );
            }
            break;
        }

        if( done ) break;

        next = _scanner->next();
        }
        if( next->_type == TOK_END ){
        delete _scanner;
        _scanner = NULL;
        input = (*_nextf)();
        if( input == NULL ){
            return NULL;
        }
        _scanner = new Scanner( input );

        } else {
        result = next->_text;
        break;
        }
    } else {
        result = next->_text;
        break;
    }
    }

    return result;
}


//  HFPhrases::readPhrases  --Fill the phrase table with candidate
//                phrases.

void HFPhrases::readPhrases()
{
    char    *block = NULL;
    char    *end;
    int     found_text;
    int     count;
    int     getnext;
    Phrase  phr;
    Phrase  *p_phr, *last, *next, *lookahead;
    PTable  *temp;
    Edge    *current;


    Phrase::initPool();

    _oldPtable = new PTable;
    _newPtable = new PTable;

    // Put all of the words in the file in a dictionary.
    HCStartPhrase();
    HCPhraseLoop(1);
    startInput();
    while( (block = nextInput()) != NULL ){
    last = NULL;
    while( *block != '\0' ){
        found_text = 0;
        phr._len = 0;
        end = block;
        while( *end != '\0' ){
        if( found_text && isspace(*end) ){
            break;
        } else if( !found_text && !isspace(*end) ){
            found_text = 1;
        }
        if( phr._len == phr._bufLen ){
            phr._str = (char *) renew(phr._str, 2*phr._bufLen );
            phr._bufLen *= 2;
        }
        phr._str[phr._len++] = *end++;
        }

        // Create the phrase.
        p_phr = _newPtable->find( &phr );
        if( p_phr != NULL ){
        p_phr->_numUses += 1;
        } else {
        _newPtable->insert( p_phr = new Phrase( phr ) );
        }

        if( last != NULL ){
        _newPtable->follows( last, p_phr ) += 1;
        }
        last = p_phr;

        if( *end != '\0' ){
        block = end+1;
        } else {
        block = end;
        }
    }
    }


    // Build up longer phrases iteratively with extra
    // passes over the file.

    // NOTE THE ARBITRARY CUTOFF.  I have reason to suspect this
    // algorithm is non-terminating in certain cases.
    for( count=1; count<10; count++ ){
    HCPhraseLoop( count+1 );

    temp = _oldPtable;
    _oldPtable = _newPtable;
    _newPtable = temp;

    startInput();
    while( (block = nextInput()) != NULL ){
        last = next = lookahead = NULL;
        getnext = 1;
        while( *block != '\0' ){
        if( getnext ){
            next = _oldPtable->match( block );
        }
        if( *block != '\0' ){
            lookahead = _oldPtable->match( block );
        } else {
            lookahead = NULL;
        }
        if( next == NULL || lookahead == NULL ||
            _oldPtable->follows( next, lookahead ) < 2 ){
            if( next != NULL ){
            p_phr = _newPtable->find( next );
            if( p_phr != NULL ){
                p_phr->_numUses++;
            } else {
                _newPtable->insert( p_phr = new Phrase(*next) );
            }
            if( last != NULL ){
                _newPtable->follows( last, p_phr ) += 1;
            }
            } else {
            p_phr = NULL;
            }

            next = lookahead;
            getnext = 0;
        } else {
            // Set phr to (next + lookahead).
            phr._len = next->_len + lookahead->_len + 1;
            if( phr._bufLen < phr._len ){
            phr._bufLen = phr._len;
            phr._str = (char *) renew( phr._str, phr._len );
            }
            memcpy( phr._str, next->_str, next->_len );
            phr._str[next->_len] = ' ';
            memcpy( phr._str+next->_len+1, lookahead->_str,
                    lookahead->_len );

            p_phr = _newPtable->find( &phr );
            if( p_phr != NULL ){
            p_phr->_numUses++;
            } else {
            _newPtable->insert( p_phr = new Phrase(phr) );
            }
            if( last != NULL ){
            _newPtable->follows( last, p_phr ) += 1;
            }

            next = NULL;
            lookahead = NULL;
            getnext = 1;
        }

        last = p_phr;
        }
        if( next != NULL ){
        p_phr = _newPtable->find( next );
        if( p_phr != NULL ){
            p_phr->_numUses++;
        } else {
            _newPtable->insert( p_phr = new Phrase(*next) );
        }
        if( last != NULL ){
            _newPtable->follows( last, p_phr ) += 1;
        }
        }
    }

    _oldPtable->clear();

    _newPtable->start();
    while( (p_phr = _newPtable->next()) != NULL ){
        current = p_phr->_firstEdge;
        while( current != NULL ){
        if( current->_val >= 2 ) break;
        current = current->_next;
        }
        if( current != NULL ) break;
    }
    if( p_phr == NULL ) break;
    }

    HCDoneTick();

    delete _oldPtable;
    _oldPtable = NULL;
}


//  HFPhrases::initHashTable    --Initialize the hash table.

void HFPhrases::initHashTable()
{
    uint_32 hvalue;
    P_String    *curr_str;

    if( _htable == NULL ){
    _htable = new P_String *[HASH_SIZE];
    }
    memset( _htable, 0x00, HASH_SIZE * sizeof( P_String * ) );

    for( int i=0; i<_resultSize; i++ ){
    curr_str = _result[i];
    memcpy( &hvalue, curr_str->_str, PH_MIN_LEN );
    hvalue &= 0xFFFFFF;
    hvalue %= HASH_SIZE;

    curr_str->_next = _htable[hvalue];
    _htable[hvalue] = curr_str;
    }
}


//  HFPhrases::createQueue  --Find all candidate Phrases with a high
//                enough _value field, and add them to a
//                priority queue.

void HFPhrases::createQueue( char const *path )
{
    Phrase      *current;
    int         i;

    _newPtable->prune();

    _resultSize = _newPtable->size();
    _result = new P_String *[_resultSize];

    _newPtable->start();

    OutFile ph_file( path );
    for( i=0; (current = _newPtable->next()) != NULL; i++ ){
    _result[i] = new P_String( *current );

    ph_file.writebuf( _result[i]->_str, 1, _result[i]->_len );
    ph_file.writech( '\r' );
    ph_file.writech( '\n' );

    _result[i]->_index = i;
    }
    ph_file.close();

    // We no longer need the dictionary, or the Phrase queue.
    delete _newPtable;
    _newPtable = NULL;

    Phrase::freePool();

    // Initialize the 'hash table'.
    initHashTable();
}


//  HFPhrases::oldTable --Use a previously created phrase table.

int HFPhrases::oldTable( char const *path )
{
    InFile  ph_file( path );
    if( ph_file.bad() ){
    return 0;
    }

    Phrase  current;
    int     ptable_size = PTBL_SIZE;
    int     done = 0;
    int     c = '\0';
    int     totalsize;  // Size of the phrase data loaded.

    _result = new P_String *[ptable_size];
    _resultSize = 0;
    current._len = 0;
    totalsize = 2;  // Size of first 2-byte phrase index.
    while( c != EOF ){
    c = ph_file.nextch();
    if( c == EOF || c == '\n' ){
        if( current._len != 0 ){

        totalsize += current._len+2;    // Phrase size + index size
        if( totalsize > MAX_DATA_SIZE ){
            break;
        }

        if( _resultSize == ptable_size ){
            _result = (P_String**) renew( _result, 2*ptable_size*sizeof(Phrase*) );
            ptable_size *= 2;
        }
        _result[_resultSize] = new P_String( current );
        _result[_resultSize]->_index = _resultSize;
        _resultSize += 1;
        current._len = 0;
        }
    } else {
        if( current._len == current._bufLen ){
        current._str = (char *) renew( current._str, 2*current._bufLen );
        current._bufLen *= 2;
        }
        current._str[current._len++] = (char) c;
    }
    }

    // Initialize the 'hash table'.
    initHashTable();

    return 1;
}



//  HFPhrases::replace  --Go through a block of text and replace
//            common phrases where they appear.

void HFPhrases::replace( char * dst, char const *src, int & len )
{
    uint_32 hvalue = 0;
    P_String    *current, *best;
    int     read_pos = 0;
    int     write_pos = 0;

    while( read_pos < len-2 ){
    memcpy( &hvalue, src + read_pos, PH_MIN_LEN );
    hvalue %= HASH_SIZE;

    current = _htable[hvalue];
    best = NULL;
    while( current != NULL ){
        if( current->_len <= len - read_pos &&
            memcmp( current->_str, src + read_pos, current->_len ) == 0 ){
        if( best == NULL || best->_len < current->_len ){
            best = current;
        }
        }
        current = current->_next;
    }

    if( best == NULL ){
        dst[write_pos++] = src[read_pos++];
    } else {
        if( best->_index >= _numPhrases ){
        if( best->_index > _numPhrases ){
            P_String *temp = _result[_numPhrases];
            _result[_numPhrases] = _result[best->_index];
            _result[best->_index] = temp;
            _result[best->_index]->_index = best->_index;
            best->_index = _numPhrases;
        }
        _numPhrases = (uint_16) (_numPhrases+1);
        }

        // Convert the index to a WinHelp "phrase code".
        // See "phrases.doc".
        dst[write_pos] = (uint_8) ((( best->_index >> 7 ) & 0xF ) + 1 );
        dst[write_pos+1] = (uint_8) (( best->_index & 0x7f ) << 1 );

        read_pos += best->_len;
        if( src[read_pos] == ' ' ){
        dst[write_pos+1] |= 0x1;
        read_pos++;
        }
        write_pos += 2;
    }
    }
    while( read_pos < len ){
    dst[write_pos++] = src[read_pos++];
    }

    len = write_pos;
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?