phrase.cpp
来自「开放源码的编译器open watcom 1.6.0版的源代码」· C++ 代码 · 共 1,157 行 · 第 1/2 页
CPP
1,157 行
delete _phrases[i];
}
while( _size < old_size ){
if( _phrases[_size]->_val > 4 ) break;
delete _phrases[_size];
_size++;
}
memmove( _phrases, _phrases+_size, (old_size-_size)*sizeof(Phrase*) );
_size = old_size-_size;
}
// HFPhrases::HFPhrases() --Default constructor.
HFPhrases::HFPhrases( HFSDirectory * d_file, InFile* (*firstf)(),
InFile* (*nextf)() )
: _firstf( firstf ),
_nextf( nextf ),
_oldPtable( NULL ),
_newPtable( NULL ),
_result( NULL ),
_htable( NULL ),
_scanner( NULL ),
_size( 0 ),
_numPhrases( 0 )
{
d_file->addFile( this, "|Phrases" );
}
// HFPhrases::~HFPhrases() --Destructor.
HFPhrases::~HFPhrases()
{
if( _oldPtable ) delete _oldPtable;
if( _newPtable ) delete _newPtable;
if( _result ){
for( int i=0; i<_resultSize ; i++ ){
delete _result[i];
}
delete[] _result;
}
if( _htable ){
delete[] _htable;
}
}
// HFPhrases::size --Overrides Dumpable::size.
uint_32 HFPhrases::size()
{
if( _size > 0 ){
return _size;
}
if( _result == NULL ){
createQueue( "phrases.ph" );
}
CompWriter riter;
CompReader reader( &riter );
P_String *string;
int i;
_size = 10; // Size of the phrase table header.
_phSize = 0;
for( i=0; i<_numPhrases; i++ ){
string = _result[i];
_phSize += string->_len;
_size += sizeof(uint_16) + reader.compress( string->_str, string->_len );
}
return _size;
}
// HFPhrases::dump --Overrides Dumpable::dump.
int HFPhrases::dump( OutFile *dest )
{
const uint_16 magic = 0x0100;
int i;
dest->writebuf( &_numPhrases, sizeof( uint_16 ), 1 );
dest->writebuf( &magic, sizeof( uint_16 ), 1 );
dest->writebuf( &_phSize, sizeof( uint_32 ), 1 );
uint_16 curr_size = (uint_16) ( (_numPhrases+1) * sizeof( uint_16 ) );
for( i=0; i<_numPhrases; i++ ){
dest->writebuf( &curr_size, sizeof( uint_16 ), 1 );
curr_size = (uint_16) (curr_size + _result[i]->_len);
}
dest->writebuf( &curr_size, sizeof( uint_16 ), 1 );
CompOutFile riter( dest );
CompReader reader( &riter );
P_String *string;
for( i=0; i<_numPhrases; i++ ){
string = _result[i];
reader.compress( string->_str, string->_len );
}
reader.flush();
return 1;
}
// HFPhrases::startInput --Prepare to read the first block
// of input.
void HFPhrases::startInput()
{
InFile *input;
if( _scanner ) delete _scanner;
_scanner = NULL;
input = (*_firstf)();
if( input == NULL ) return;
_scanner = new Scanner( input );
}
// HFPhrases::nextInput --Get the next block of input.
//
char* HFPhrases::nextInput()
{
InFile *input;
Token *next;
char *result;
if( _scanner == NULL ) return NULL;
for( ;; ){
next = _scanner->next();
if( next->_type == TOK_END ){
delete _scanner;
_scanner = NULL;
input = (*_nextf)();
if( input == NULL ){
return NULL;
}
_scanner = new Scanner( input );
} else if( next->_type != TOK_TEXT ){
int push_level, done = 0;
for( ;; ){
switch( next->_type ){
case TOK_END:
case TOK_TEXT: // deliberate fall-through
done = 1;
break;
case TOK_COMMAND:
if( strcmp( next->_text, "colortbl" )==0 ||
strcmp( next->_text, "fonttbl" )==0 ||
strcmp( next->_text, "footnote" )==0 ||
strcmp( next->_text, "stylesheet" )==0 ){
push_level = 0;
do{
next = _scanner->next();
if( next->_type == TOK_PUSH_STATE ){
push_level++;
} else if( next->_type == TOK_POP_STATE ){
push_level--;
} else if( next->_type == TOK_END ){
break;
}
}while( push_level >= 0 );
} else if( strcmp( next->_text, "v" )==0 &&
(!next->_hasValue ||
next->_value != 0 ) ){
push_level = 0;
do{
next = _scanner->next();
if( next->_type == TOK_PUSH_STATE ){
push_level++;
} else if( next->_type == TOK_POP_STATE ){
push_level--;
} else if( next->_type == TOK_COMMAND &&
strcmp( next->_text, "v" )==0 &&
next->_hasValue &&
next->_value == 0 ){
break;
} else if( next->_type == TOK_END ){
break;
}
}while( push_level >= 0 );
}
break;
}
if( done ) break;
next = _scanner->next();
}
if( next->_type == TOK_END ){
delete _scanner;
_scanner = NULL;
input = (*_nextf)();
if( input == NULL ){
return NULL;
}
_scanner = new Scanner( input );
} else {
result = next->_text;
break;
}
} else {
result = next->_text;
break;
}
}
return result;
}
// HFPhrases::readPhrases --Fill the phrase table with candidate
// phrases.
void HFPhrases::readPhrases()
{
char *block = NULL;
char *end;
int found_text;
int count;
int getnext;
Phrase phr;
Phrase *p_phr, *last, *next, *lookahead;
PTable *temp;
Edge *current;
Phrase::initPool();
_oldPtable = new PTable;
_newPtable = new PTable;
// Put all of the words in the file in a dictionary.
HCStartPhrase();
HCPhraseLoop(1);
startInput();
while( (block = nextInput()) != NULL ){
last = NULL;
while( *block != '\0' ){
found_text = 0;
phr._len = 0;
end = block;
while( *end != '\0' ){
if( found_text && isspace(*end) ){
break;
} else if( !found_text && !isspace(*end) ){
found_text = 1;
}
if( phr._len == phr._bufLen ){
phr._str = (char *) renew(phr._str, 2*phr._bufLen );
phr._bufLen *= 2;
}
phr._str[phr._len++] = *end++;
}
// Create the phrase.
p_phr = _newPtable->find( &phr );
if( p_phr != NULL ){
p_phr->_numUses += 1;
} else {
_newPtable->insert( p_phr = new Phrase( phr ) );
}
if( last != NULL ){
_newPtable->follows( last, p_phr ) += 1;
}
last = p_phr;
if( *end != '\0' ){
block = end+1;
} else {
block = end;
}
}
}
// Build up longer phrases iteratively with extra
// passes over the file.
// NOTE THE ARBITRARY CUTOFF. I have reason to suspect this
// algorithm is non-terminating in certain cases.
for( count=1; count<10; count++ ){
HCPhraseLoop( count+1 );
temp = _oldPtable;
_oldPtable = _newPtable;
_newPtable = temp;
startInput();
while( (block = nextInput()) != NULL ){
last = next = lookahead = NULL;
getnext = 1;
while( *block != '\0' ){
if( getnext ){
next = _oldPtable->match( block );
}
if( *block != '\0' ){
lookahead = _oldPtable->match( block );
} else {
lookahead = NULL;
}
if( next == NULL || lookahead == NULL ||
_oldPtable->follows( next, lookahead ) < 2 ){
if( next != NULL ){
p_phr = _newPtable->find( next );
if( p_phr != NULL ){
p_phr->_numUses++;
} else {
_newPtable->insert( p_phr = new Phrase(*next) );
}
if( last != NULL ){
_newPtable->follows( last, p_phr ) += 1;
}
} else {
p_phr = NULL;
}
next = lookahead;
getnext = 0;
} else {
// Set phr to (next + lookahead).
phr._len = next->_len + lookahead->_len + 1;
if( phr._bufLen < phr._len ){
phr._bufLen = phr._len;
phr._str = (char *) renew( phr._str, phr._len );
}
memcpy( phr._str, next->_str, next->_len );
phr._str[next->_len] = ' ';
memcpy( phr._str+next->_len+1, lookahead->_str,
lookahead->_len );
p_phr = _newPtable->find( &phr );
if( p_phr != NULL ){
p_phr->_numUses++;
} else {
_newPtable->insert( p_phr = new Phrase(phr) );
}
if( last != NULL ){
_newPtable->follows( last, p_phr ) += 1;
}
next = NULL;
lookahead = NULL;
getnext = 1;
}
last = p_phr;
}
if( next != NULL ){
p_phr = _newPtable->find( next );
if( p_phr != NULL ){
p_phr->_numUses++;
} else {
_newPtable->insert( p_phr = new Phrase(*next) );
}
if( last != NULL ){
_newPtable->follows( last, p_phr ) += 1;
}
}
}
_oldPtable->clear();
_newPtable->start();
while( (p_phr = _newPtable->next()) != NULL ){
current = p_phr->_firstEdge;
while( current != NULL ){
if( current->_val >= 2 ) break;
current = current->_next;
}
if( current != NULL ) break;
}
if( p_phr == NULL ) break;
}
HCDoneTick();
delete _oldPtable;
_oldPtable = NULL;
}
// HFPhrases::initHashTable --Initialize the hash table.
void HFPhrases::initHashTable()
{
uint_32 hvalue;
P_String *curr_str;
if( _htable == NULL ){
_htable = new P_String *[HASH_SIZE];
}
memset( _htable, 0x00, HASH_SIZE * sizeof( P_String * ) );
for( int i=0; i<_resultSize; i++ ){
curr_str = _result[i];
memcpy( &hvalue, curr_str->_str, PH_MIN_LEN );
hvalue &= 0xFFFFFF;
hvalue %= HASH_SIZE;
curr_str->_next = _htable[hvalue];
_htable[hvalue] = curr_str;
}
}
// HFPhrases::createQueue --Find all candidate Phrases with a high
// enough _value field, and add them to a
// priority queue.
void HFPhrases::createQueue( char const *path )
{
Phrase *current;
int i;
_newPtable->prune();
_resultSize = _newPtable->size();
_result = new P_String *[_resultSize];
_newPtable->start();
OutFile ph_file( path );
for( i=0; (current = _newPtable->next()) != NULL; i++ ){
_result[i] = new P_String( *current );
ph_file.writebuf( _result[i]->_str, 1, _result[i]->_len );
ph_file.writech( '\r' );
ph_file.writech( '\n' );
_result[i]->_index = i;
}
ph_file.close();
// We no longer need the dictionary, or the Phrase queue.
delete _newPtable;
_newPtable = NULL;
Phrase::freePool();
// Initialize the 'hash table'.
initHashTable();
}
// HFPhrases::oldTable --Use a previously created phrase table.
int HFPhrases::oldTable( char const *path )
{
InFile ph_file( path );
if( ph_file.bad() ){
return 0;
}
Phrase current;
int ptable_size = PTBL_SIZE;
int done = 0;
int c = '\0';
int totalsize; // Size of the phrase data loaded.
_result = new P_String *[ptable_size];
_resultSize = 0;
current._len = 0;
totalsize = 2; // Size of first 2-byte phrase index.
while( c != EOF ){
c = ph_file.nextch();
if( c == EOF || c == '\n' ){
if( current._len != 0 ){
totalsize += current._len+2; // Phrase size + index size
if( totalsize > MAX_DATA_SIZE ){
break;
}
if( _resultSize == ptable_size ){
_result = (P_String**) renew( _result, 2*ptable_size*sizeof(Phrase*) );
ptable_size *= 2;
}
_result[_resultSize] = new P_String( current );
_result[_resultSize]->_index = _resultSize;
_resultSize += 1;
current._len = 0;
}
} else {
if( current._len == current._bufLen ){
current._str = (char *) renew( current._str, 2*current._bufLen );
current._bufLen *= 2;
}
current._str[current._len++] = (char) c;
}
}
// Initialize the 'hash table'.
initHashTable();
return 1;
}
// HFPhrases::replace --Go through a block of text and replace
// common phrases where they appear.
void HFPhrases::replace( char * dst, char const *src, int & len )
{
uint_32 hvalue = 0;
P_String *current, *best;
int read_pos = 0;
int write_pos = 0;
while( read_pos < len-2 ){
memcpy( &hvalue, src + read_pos, PH_MIN_LEN );
hvalue %= HASH_SIZE;
current = _htable[hvalue];
best = NULL;
while( current != NULL ){
if( current->_len <= len - read_pos &&
memcmp( current->_str, src + read_pos, current->_len ) == 0 ){
if( best == NULL || best->_len < current->_len ){
best = current;
}
}
current = current->_next;
}
if( best == NULL ){
dst[write_pos++] = src[read_pos++];
} else {
if( best->_index >= _numPhrases ){
if( best->_index > _numPhrases ){
P_String *temp = _result[_numPhrases];
_result[_numPhrases] = _result[best->_index];
_result[best->_index] = temp;
_result[best->_index]->_index = best->_index;
best->_index = _numPhrases;
}
_numPhrases = (uint_16) (_numPhrases+1);
}
// Convert the index to a WinHelp "phrase code".
// See "phrases.doc".
dst[write_pos] = (uint_8) ((( best->_index >> 7 ) & 0xF ) + 1 );
dst[write_pos+1] = (uint_8) (( best->_index & 0x7f ) << 1 );
read_pos += best->_len;
if( src[read_pos] == ' ' ){
dst[write_pos+1] |= 0x1;
read_pos++;
}
write_pos += 2;
}
}
while( read_pos < len ){
dst[write_pos++] = src[read_pos++];
}
len = write_pos;
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?