scanning.cpp

来自「开放源码的编译器open watcom 1.6.0版的源代码」· C++ 代码 · 共 376 行

CPP
376
字号
/****************************************************************************
*
*                            Open Watcom Project
*
*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
*
*  ========================================================================
*
*    This file contains Original Code and/or Modifications of Original
*    Code as defined in and that are subject to the Sybase Open Watcom
*    Public License version 1.0 (the 'License'). You may not use this file
*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
*    provided with the Original Code and Modifications, and is also
*    available at www.sybase.com/developer/opensource.
*
*    The Original Code and all software distributed under the License are
*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
*    NON-INFRINGEMENT. Please see the License for the specific language
*    governing rights and limitations under the License.
*
*  ========================================================================
*
* Description:  Scanning of .rtf files.
*
****************************************************************************/


#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include "scanning.h"
#include "hcerrors.h"

#define BUF_SIZE    512


//  Token::Token

Token::Token() : _text( BUF_SIZE )
{ }


//  C-tor and D-tor for class Scanner.

Scanner::Scanner( InFile *src )
    : _buffer( BUF_SIZE ),
      _source( src ),
      _lineNum( 1 )
{
    tokens[2] = new Token;
    tokens[1] = new Token;
    tokens[0] = new Token;
    if( !src->bad() ) {
        _maxBuf = src->readbuf( _buffer, BUF_SIZE );
        _curPos = 0;
        getToken( tokens[1] );
        getToken( tokens[2] );
    }
}

Scanner::~Scanner()
{
    delete tokens[0];
    delete tokens[1];
    delete tokens[2];
}


//  Scanner::nextch --Get the next character to be processed.
//            Inlined to save a little speed.

#define     S_ENDC  0xFF

inline char Scanner::nextch()
{
    if( _maxBuf == 0 ) {
        return( S_ENDC );
    }
    if( _curPos == _maxBuf ) {
        _maxBuf = _source->readbuf( &_buffer[0], BUF_SIZE );
        if( _maxBuf == 0 ) {
            return( S_ENDC );
        } else {
            _curPos = 0;
        }
    } else if( _curPos == _maxBuf-1 ) {
        _buffer[0] = _buffer[_curPos];
        _maxBuf = _source->readbuf( &_buffer[1], BUF_SIZE-1 ) + 1;
        _curPos = 0;
    }
    return( _buffer[_curPos++] );
}


//  Scanner::putback    --Unget a character.

void Scanner::putback( char c )
{
    if( _maxBuf > 0 ) {
        _buffer[--_curPos] = c;
    }
}


//  Scanner::handleSlash  --Process RTF tokens beginning with a backslash.

TokenTypes Scanner::handleSlash( Token * tok )
{
    TokenTypes  result;
    char    current = nextch();

    if( current == S_ENDC ) {
        HCWarning( RTF_BADEOF, _source->name() );
        result = TOK_END;
    } else if( current == '*' ) {

        // Certain RTF commands begin with "\*\", not "\".

        current = nextch( );
        if( current != '\\' ) {
            HCWarning( RTF_BADCOMMAND, _lineNum, _source->name() );
            if( current != S_ENDC ) {
                putback( current );
            }
            result = TOK_NONE;
        } else {
            result = handleSlash( tok );
        }
    } else if( current == '\n' ) {

        // A "\" just before a new-line is the same as "\par".

        strncpy( tok->_text, "par", 4 );
        result = TOK_COMMAND;
        ++_lineNum;
    } else if( isSpecial( current ) ) {

        // Some characters are escaped, like "\{".

        result = TOK_SPEC_CHAR;
        tok->_value = current;
    } else if( current == '\'' ) {

        // "\'nnn" signifies the byte with value nnn.

        result = TOK_SPEC_CHAR;
        pullHex( tok );
    } else if( islower( current ) ) {

        // All RTF commands are in lower case.

        putback( current );
        result = TOK_COMMAND;
        pullCommand( tok );
    } else {
        HCWarning( RTF_BADCOMMAND, _lineNum, _source->name() );
        result = TOK_NONE;
    }
    return( result );
}


//  Scanner::isSpecial  --Check if the argument is a special character.

int Scanner::isSpecial( char c )
{
    static char const specials[] = "-:\\_{|}\"";
    int     i;

    for( i = 0; i < sizeof( specials ) - 1; i++ ) {
        if( c == specials[i] ) {
            break;
        }
    }
    return( specials[i] != '\0' );
}


//  Scanner::isFootnoteChar  --Check if c is a "footnote" character.
//               This is a feature specific to the .HLP format.

int Scanner::isFootnoteChar( char c )
{
    int result = 0;
    switch( c ) {
    case '#':   // Context string
    case '$':   // Title
    case 'K':   // Keywords
    case '+':   // Macros
    case '!':   // Browse Sequence Identifiers
    case '*':   // Build Tags (not supported)
        result = 1;
    }
    return( result );
}


//  Scanner::pullCommand   --Read the text of an RTF command.

void Scanner::pullCommand( Token * tok )
{
    char    current;
    char    num_string[7];
    int     i;

    tok->_text[0] = (char)nextch();

    for( i=1; i<BUF_SIZE-1; i++ ) {
        current = nextch( );

        if( !islower( current ) ) break;
        tok->_text[i] = (char) current;
    }
    tok->_text[i] = '\0';

    if( current == S_ENDC || (!isdigit(current) && current != '-') ) {
        tok->_hasValue = 0;
    } else {
        tok->_hasValue = 1;
        for( i=0; i<6; i++ ) {
            num_string[i] = (char) current;
            current = nextch( );
            if( !isdigit(current) ) break;
        }
        num_string[i+1] = '\0';
        tok->_value = atoi( num_string );
    }
    if( current != S_ENDC && current != ' ' ) {
        putback( current );
    }
}


//  Scanner::pullText   --Pull a block of plain text from a .RTF file.

#define HARD_SPACE  0xA0

void Scanner::pullText( Token * tok )
{
    tok->_text[0] = (char) nextch( );

    int i=1;
    char current;
    while( i<BUF_SIZE-1 ) {
        current = nextch( );

        if( current == S_ENDC  ||
            current == '{'  ||
            current == '}'  ||
            isFootnoteChar( current ) ) break;

        if( current == '\\' ) {
            if( _curPos < _maxBuf && _buffer[_curPos] == '~' ) {
                nextch();
                current = HARD_SPACE;
            } else {
                break;
            }
        }

        if( current == '\n' ) {
            ++_lineNum;
            continue;
        }
        tok->_text[i++] = (char) current;
    }
    tok->_text[i] = '\0';
    tok->_value = i;
    if( current != S_ENDC && i<BUF_SIZE-1 ) {
    putback( current );
    }
}


//  Scanner::pullHex    --Get the hex value of bytes specified by "\'nnn".

void Scanner::pullHex( Token * tok )
{
    char    result[3];
    char    current;
    int     i;

    for( i=0; i<2; ++i ) {
        current = nextch( );
        if( !isxdigit( current ) ) {
            break;
        }
        result[i] = (char) current;
    }
    result[i] = '\0';
    if( i<2 && current != S_ENDC ) {
        putback( current );
    }
    if( i==0 ) {
        tok->_type = TOK_NONE;
    } else {
        tok->_hasValue = 1;
        tok->_value = strtol( result, NULL, 16 );
    }
}


//  Scanner::getToken   --Get the next token from the input stream.

void Scanner::getToken( Token * tok )
{
    char current;

    for( ;; ) {
        current = nextch( );
        if( current != '\n' ) break;
        ++_lineNum;
    }

    tok->_lineNum = _lineNum;
    switch( current ) {
    case S_ENDC:
        tok->_type = TOK_END;
        break;

    case '{':
        tok->_type = TOK_PUSH_STATE;
        break;

    case '}':
        tok->_type = TOK_POP_STATE;
        break;

    case '\\':
        current = nextch();
        if( current == '~' ) {
            tok->_type = TOK_TEXT;
            putback( HARD_SPACE );
            pullText( tok );
        } else {
            putback( current );
            tok->_type = handleSlash( tok );
        }
        break;

    case '\t':
        tok->_type = TOK_COMMAND;
        strncpy( tok->_text, "tab", 4 );
        break;

    default:
        if( isFootnoteChar( current ) ) {
            tok->_type = TOK_SPEC_CHAR;
            tok->_value = current;
        } else {
            tok->_type = TOK_TEXT;
            putback( current );
            pullText( tok );
        }
    }
}


//  Scanner::next   --Return the next token in the lookahead buffer.

Token   *Scanner::next()
{
    Token   *temp = tokens[0];

    tokens[0] = tokens[1];
    tokens[1] = tokens[2];
    tokens[2] = temp;

    getToken( tokens[2] );
    return( tokens[0] );
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?