📄 rtflex.cpp
字号:
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
//
// Use of this source code is subject to the terms of the Microsoft shared
// source or premium shared source license agreement under which you licensed
// this source code. If you did not accept the terms of the license agreement,
// you are not authorized to use this source code. For the terms of the license,
// please see the license agreement between you and Microsoft or, if applicable,
// see the SOURCE.RTF on your install media or the root of your tools installation.
// THE SOURCE CODE IS PROVIDED "AS IS", WITH NO WARRANTIES.
//
/*
* @doc INTERNAL
*
* @module RTFLEX.CPP - RichEdit RTF reader lexical analyzer |
*
* This file contains the implementation of the lexical analyzer part of
* the RTF reader.
*
* Authors: <nl>
* Original RichEdit 1.0 RTF converter: Anthony Francisco <nl>
* Conversion to C++ and RichEdit 2.0: Murray Sargent <nl>
*
* @devnote
* All sz's in the RTF*.? files refer to a LPSTRs, not LPTSTRs, unless
* noted as a szUnicode.
*/
#include "_common.h"
#include "_rtfread.h"
#include "hash.h"
ASSERTDATA
#include "tokens.cpp"
#pragma BEGIN_CODESPACE_DATA
// Array used by character classification macros to speed classification
// of chars residing in two or more discontiguous ranges, e.g., alphanumeric
// or hex. The alphabetics used in RTF control words are lower-case ASCII.
// *** DO NOT DBCS rgbCharClass[] ***
#define fCS fCT + fSP
#define fSB fBL + fSP
#define fHD fHX + fDG
#define fHU fHX + fUC
#define fHL fHX + fLC
const BYTE rgbCharClass[256] =
{
fCT,fCT,fCT,fCT,fCT,fCT,fCT,fCT, fCT,fCS,fCS,fCS,fCS,fCS,fCT,fCT,
fCT,fCT,fCT,fCT,fCT,fCT,fCT,fCT, fCT,fCT,fCT,fCT,fCT,fCT,fCT,fCT,
fSB,fPN,fPN,fPN,fPN,fPN,fPN,fPN, fPN,fPN,fPN,fPN,fPN,fPN,fPN,fPN,
fHD,fHD,fHD,fHD,fHD,fHD,fHD,fHD, fHD,fHD,fPN,fPN,fPN,fPN,fPN,fPN,
fPN,fHU,fHU,fHU,fHU,fHU,fHU,fUC, fUC,fUC,fUC,fUC,fUC,fUC,fUC,fUC,
fUC,fUC,fUC,fUC,fUC,fUC,fUC,fUC, fUC,fUC,fUC,fPN,fPN,fPN,fPN,fPN,
fPN,fHL,fHL,fHL,fHL,fHL,fHL,fLC, fLC,fLC,fLC,fLC,fLC,fLC,fLC,fLC,
fLC,fLC,fLC,fLC,fLC,fLC,fLC,fLC, fLC,fLC,fLC,fPN,fPN,fPN,fPN,fPN,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const char szRTFSig[] = "{\\rtf";
// Specifies the number of bytes we can safely "UngetChar"
// before possibly underflowing the buffer.
const int cbBackupMax = 4;
#pragma END_CODESPACE_DATA
// Bug2298 - I found an RTF writer which emits uppercase RTF keywords,
// so I had to change IsLCAscii to IsAlphaChar for use in scanning
// for RTF keywords.
inline BOOL IsAlphaChar(BYTE b)
{
return ((DWORD)((b) - 'a') <= (DWORD)('z' - 'a')) ||
((DWORD)((b) - 'A') <= (DWORD)('Z' - 'A'));
}
// Quick and dirty tolower(b)
inline BYTE REToLower(BYTE b)
{
Assert(!b || IsAlphaChar(b));
return b ? (b | 0x20) : b;
}
/*
* CRTFRead::InitLex()
*
* @mfunc
* Initialize the lexical analyzer. Reset the variables. if reading in
* from resource file, sort the keyword list (). Uses global hinstRE
* from the RichEdit to find out where its resources are. Note: in
* RichEdit 2.0, currently the resource option is not supported.
*
* @rdesc
* TRUE If lexical analyzer was initialized
*/
BOOL CRTFRead::InitLex()
{
TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::InitLex");
AssertSz(cKeywords == i_TokenIndexMax,
"Keyword index enumeration is incompatible with rgKeyword[]");
Assert(!_szText && !_pchRTFBuffer);
// Allocate our buffers with an extra byte for szText so that hex
// conversion doesn't have to worry about running off the end if the
// first char is NULL
if ((_szText = (BYTE *)PvAlloc(cachTextMax + 1, GMEM_ZEROINIT)) &&
(_pchRTFBuffer = (BYTE *)PvAlloc(cachBufferMost, GMEM_ZEROINIT)))
{
return TRUE; // Signal that lexer is initialized
}
_ped->GetCallMgr()->SetOutOfMemory();
_ecParseError = ecLexInitFailed;
return FALSE;
}
/*
* CRTFRead::DeinitLex()
*
* @mfunc
* Shut down lexical analyzer
*/
void CRTFRead::DeinitLex()
{
TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::DeinitLex");
#ifdef KEYWORD_RESOURCE
if (hglbKeywords)
{
FreeResource(hglbKeywords);
hglbKeywords = NULL;
rgKeyword = NULL;
}
#endif
FreePv(_szText);
FreePv(_pchRTFBuffer);
}
/*
* CRTFRead::GetChar()
*
* @mfunc
* Get next char, filling buffer as needed
*
* @rdesc
* BYTE nonzero char value if success; else 0
*/
BYTE CRTFRead::GetChar()
{
TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::GetChar");
if (_pchRTFCurrent == _pchRTFEnd && !FillBuffer())
{
_ecParseError = ecUnexpectedEOF;
return 0;
}
return *_pchRTFCurrent++;
}
/*
* CRTFRead::FillBuffer()
*
* @mfunc
* Fill RTF buffer & return != 0 if successful
*
* @rdesc
* LONG # chars read
*
* @comm
* This routine doesn't bother copying anything down if
* pchRTFCurrent <lt> pchRTFEnd so anything not read yet is lost.
* The only exception to this is that it always copies down the
* last two bytes read so that UngetChar() will work. ReadData()
* actually counts on this behavior, so if you change it, change
* ReadData() accordingly.
*/
LONG CRTFRead::FillBuffer()
{
TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::FillBuffer");
LONG cchRead;
if (!_pchRTFCurrent)
{
// No data yet, nothing for backup
// Leave cbBackupMax NULL chars so backup
// area of buffer doesn't contain garbage.
for(int i = 0; i < cbBackupMax; i++)
{
_pchRTFBuffer[i] = 0;
}
}
else
{
Assert(_pchRTFCurrent == _pchRTFEnd);
// Copy most recently read chars in case
// we need to back up
int cbBackup = min(cbBackupMax,
DiffPtrs(_pchRTFCurrent, &_pchRTFBuffer[cbBackupMax], BYTE));
int i;
for(i = -1; i >= -cbBackup; i--)
{
_pchRTFBuffer[cbBackupMax + i] = _pchRTFCurrent[i];
}
if(cbBackup < cbBackupMax)
{
// NULL the before the first valid character in the backup buffer
_pchRTFBuffer[cbBackupMax + i] = 0;
}
}
_pchRTFCurrent = &_pchRTFBuffer[cbBackupMax];
// Fill buffer with as much as we can take given our starting offset
_pes->dwError = _pes->pfnCallback(_pes->dwCookie,
_pchRTFCurrent,
cachBufferMost - cbBackupMax,
&cchRead);
if (_pes->dwError)
{
TRACEERRSZSC("RTFLEX: GetChar()", _pes->dwError);
_ecParseError = ecGeneralFailure;
return 0;
}
_pchRTFEnd = &_pchRTFBuffer[cbBackupMax + cchRead]; // Point the end
#if defined(DEBUG) && !defined(MACPORT)
if(_hfileCapture)
{
DWORD cbLeftToWrite = cchRead;
DWORD cbWritten = 0;
BYTE *pbToWrite = (BYTE *)_pchRTFCurrent;
while(WriteFile(_hfileCapture,
pbToWrite,
cbLeftToWrite,
&cbWritten,
NULL) &&
(pbToWrite += cbWritten,
(cbLeftToWrite -= cbWritten)));
}
#endif
return cchRead;
}
/*
* CRTFRead::UngetChar()
*
* @mfunc
* Bump our file pointer back one char
*
* @rdesc
* BOOL TRUE on success
*
* @comm
* You can safely UngetChar _at most_ cbBackupMax times without
* error.
*/
BOOL CRTFRead::UngetChar()
{
TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::UngetChar");
if (_pchRTFCurrent == _pchRTFBuffer || !_pchRTFCurrent)
{
Assert(0);
_ecParseError = ecUnGetCharFailed;
return FALSE;
}
--_pchRTFCurrent;
return TRUE;
}
/*
* CRTFRead::UngetChar(cch)
*
* @mfunc
* Bump our file pointer back 'cch' chars
*
* @rdesc
* BOOL TRUE on success
*
* @comm
* You can safely UngetChar _at most_ cbBackupMax times without
* error.
*/
BOOL CRTFRead::UngetChar(UINT cch)
{
TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::UngetChar");
AssertSz(cch <= cbBackupMax, "CRTFRead::UngetChar(): Number of UngetChar's "
"exceeds size of backup buffer.");
while(cch-- > 0)
{
if(!UngetChar())
return FALSE;
}
return TRUE;
}
/*
* CRTFRead::GetHex()
*
* @mfunc
* Get next char if hex and return hex value
* If not hex, leave char in buffer and return 255
*
* @rdesc
* BYTE hex value of GetChar() if hex; else 255
*/
BYTE CRTFRead::GetHex()
{
TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::GetHex");
BYTE ch = GetChar();
if (IsXDigit(ch))
return (ch <= '9') ? ch - '0' : (ch & 0x4f) - 'A' + 10;
if (ch)
UngetChar();
return 255;
}
/*
* CRTFRead::GetHexSkipCRLF()
*
* @mfunc
* Get next char if hex and return hex value
* If not hex, leave char in buffer and return 255
*
* @rdesc
* BYTE hex value of GetChar() if hex; else 255
*
* @devnote
* Keep this in sync with GetHex above.
*/
BYTE CRTFRead::GetHexSkipCRLF()
{
TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::GetHexSkipCRLF");
BYTE ch = GetChar();
// skip \r \n
while(ch == CR || ch == LF)
{
ch = GetChar();
}
if (IsXDigit(ch))
return (ch <= '9') ? ch - '0' : (ch & 0x4f) - 'A' + 10;
if (ch)
UngetChar();
return 255;
}
/*
* CRTFRead::TokenGetHex()
*
* @mfunc
* Get an 8 bit character saved as a 2 hex digit value
*
* @rdesc
* TOKEN value of hex number read in
*/
TOKEN CRTFRead::TokenGetHex()
{
TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::TokenGetHex");
BYTE bChar0 = GetHex();
BYTE bChar1;
if(bChar0 < 16 && (bChar1 = GetHex()) < 16)
_token = bChar0 << 4 | bChar1;
else
_token = tokenError;
return _token;
}
/*
* CRTFRead::SkipToEndOfGroup()
*
* @mfunc
* Skip to end of current group
*
* @rdesc
* EC An error code
*/
EC CRTFRead::SkipToEndOfGroup()
{
TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::SkipToEndOfGroup");
INT nDepth = 1;
BYTE ach;
while(TRUE)
{
ach = GetChar();
switch(ach)
{
case BSLASH:
{
BYTE achNext = GetChar();
// EOF: goto done; else ignore NULLs
if(!achNext && _ecParseError == ecUnexpectedEOF)
goto done;
if(achNext == 'b' && UngetChar() &&
TokenGetKeyword() == tokenBinaryData)
{
// We've encountered the \binN tag in the RTF we want
// to skip. _iParam contains N from \binN once the
// tag is parsed by TokenGetKeyword()
SkipBinaryData(_iParam);
}
break;
}
case LBRACE:
nDepth++;
break;
case RBRACE:
if (--nDepth <= 0)
goto done;
break;
case 0:
if(_ecParseError == ecUnexpectedEOF)
goto done;
#ifdef PWD_JUPITER
default:
// GuyBark JupiterJ 50034: Detect Lead bytes here.
if(IsLeadByte(ach, _nCodePage))
{
// Get the trailing byte and ignore it.
ach = GetChar();
// Extra checkjust in case we hit the end of the file.
if(ach == 0)
{
if(_ecParseError == ecUnexpectedEOF)
goto done;
}
}
break;
#endif // PWD_JUPITER
}
}
Assert(!_ecParseError);
_ecParseError = ecUnexpectedEOF;
done:
return _ecParseError;
}
/*
* CRTFRead::TokenFindKeyword(szKeyword)
*
* @mfunc
* Find keyword <p szKeyword> and return its token value
*
* @rdesc
* TOKEN token number of keyword
*/
TOKEN CRTFRead::TokenFindKeyword(
BYTE * szKeyword) // @parm Keyword to find
{
TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::TokenFindKeyword");
INT iMin;
INT iMax;
INT iMid;
INT nComp;
BYTE * pchCandidate;
BYTE * pchKeyword;
const KEYWORD * pk;
AssertSz(szKeyword[0],
"CRTFRead::TokenFindKeyword: null keyword");
#ifdef RTF_HASHCACHE
if ( _rtfHashInited )
{
// Hash is 23% faster than the following binary search on finds
// and 55% faster on misses: For 97 words stored in a 257 cache.
// Performance numbers will change when the total stored goes up.
pk = HashKeyword_Fetch ( (CHAR *) szKeyword );
}
else
#endif
{
iMin = 0;
iMax = cKeywords - 1;
pk = NULL;
do // Note (MS3): Hash would be quicker than binary search
{
iMid = (iMin + iMax) / 2;
pchCandidate = (BYTE *)rgKeyword[iMid].szKeyword;
pchKeyword = szKeyword;
while (!(nComp = REToLower(*pchKeyword) - *pchCandidate) // Be sure to match
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -