📄 rtflex.cpp

📁 Windows CE 6.0 Word Application 源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
12 下一页
//
// Copyright (c) Microsoft Corporation.  All rights reserved.
//
//
// Use of this source code is subject to the terms of the Microsoft shared
// source or premium shared source license agreement under which you licensed
// this source code. If you did not accept the terms of the license agreement,
// you are not authorized to use this source code. For the terms of the license,
// please see the license agreement between you and Microsoft or, if applicable,
// see the SOURCE.RTF on your install media or the root of your tools installation.
// THE SOURCE CODE IS PROVIDED "AS IS", WITH NO WARRANTIES.
//
/*
 *	@doc INTERNAL
 *
 *	@module RTFLEX.CPP - RichEdit RTF reader lexical analyzer |
 *
 *		This file contains the implementation of the lexical analyzer part of
 *		the RTF reader.
 *
 *	Authors: <nl>
 *		Original RichEdit 1.0 RTF converter: Anthony Francisco <nl>
 *		Conversion to C++ and RichEdit 2.0:  Murray Sargent <nl>
 *
 *	@devnote
 *		All sz's in the RTF*.? files refer to a LPSTRs, not LPTSTRs, unless
 *		noted as a szUnicode.
 */

#include "_common.h"
#include "_rtfread.h"
#include "hash.h"

ASSERTDATA

#include "tokens.cpp"

#pragma BEGIN_CODESPACE_DATA

// Array used by character classification macros to speed classification
// of chars residing in two or more discontiguous ranges, e.g., alphanumeric
// or hex.  The alphabetics used in RTF control words are lower-case ASCII.
// *** DO NOT DBCS rgbCharClass[] ***

#define	fCS		fCT + fSP
#define fSB		fBL + fSP
#define fHD		fHX + fDG
#define	fHU		fHX + fUC
#define	fHL		fHX + fLC

const BYTE rgbCharClass[256] =
{
	fCT,fCT,fCT,fCT,fCT,fCT,fCT,fCT, fCT,fCS,fCS,fCS,fCS,fCS,fCT,fCT,
	fCT,fCT,fCT,fCT,fCT,fCT,fCT,fCT, fCT,fCT,fCT,fCT,fCT,fCT,fCT,fCT,
	fSB,fPN,fPN,fPN,fPN,fPN,fPN,fPN, fPN,fPN,fPN,fPN,fPN,fPN,fPN,fPN,
	fHD,fHD,fHD,fHD,fHD,fHD,fHD,fHD, fHD,fHD,fPN,fPN,fPN,fPN,fPN,fPN,

	fPN,fHU,fHU,fHU,fHU,fHU,fHU,fUC, fUC,fUC,fUC,fUC,fUC,fUC,fUC,fUC,
	fUC,fUC,fUC,fUC,fUC,fUC,fUC,fUC, fUC,fUC,fUC,fPN,fPN,fPN,fPN,fPN,
	fPN,fHL,fHL,fHL,fHL,fHL,fHL,fLC, fLC,fLC,fLC,fLC,fLC,fLC,fLC,fLC,
	fLC,fLC,fLC,fLC,fLC,fLC,fLC,fLC, fLC,fLC,fLC,fPN,fPN,fPN,fPN,fPN,

	0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,

	0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
};

const char szRTFSig[] = "{\\rtf";

// Specifies the number of bytes we can safely "UngetChar"
// before possibly underflowing the buffer.
const int cbBackupMax = 4;

#pragma END_CODESPACE_DATA

// Bug2298 - I found an RTF writer which emits uppercase RTF keywords,
// 			so I had to change IsLCAscii to IsAlphaChar for use in scanning
//			for RTF keywords.
inline BOOL IsAlphaChar(BYTE b)
{
	return ((DWORD)((b) - 'a') <= (DWORD)('z' - 'a')) ||
			((DWORD)((b) - 'A') <= (DWORD)('Z' - 'A'));
}

// Quick and dirty tolower(b)
inline BYTE REToLower(BYTE b)
{
	Assert(!b || IsAlphaChar(b));
	return b ? (b | 0x20) : b;
}


/*
 *	CRTFRead::InitLex()
 *
 *	@mfunc
 *		Initialize the lexical analyzer. Reset the variables. if reading in
 *		from resource file, sort the keyword list (). Uses global hinstRE
 *		from the RichEdit to find out where its resources are.  Note: in
 *		RichEdit 2.0, currently the resource option is not supported.
 *
 *	@rdesc
 *		TRUE				If lexical analyzer was initialized
 */
BOOL CRTFRead::InitLex()
{
	TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::InitLex");

	AssertSz(cKeywords == i_TokenIndexMax,
		"Keyword index enumeration is incompatible with rgKeyword[]");
	Assert(!_szText && !_pchRTFBuffer);

	// Allocate our buffers with an extra byte for szText so that hex
	// conversion doesn't have to worry about running off the end if the
	// first char is NULL
	if ((_szText	   = (BYTE *)PvAlloc(cachTextMax + 1, GMEM_ZEROINIT)) &&
		(_pchRTFBuffer = (BYTE *)PvAlloc(cachBufferMost, GMEM_ZEROINIT)))
	{
		return TRUE;					// Signal that lexer is initialized
	}

	_ped->GetCallMgr()->SetOutOfMemory();
	_ecParseError = ecLexInitFailed;
	return FALSE;
}

/*
 *	CRTFRead::DeinitLex()
 *
 *	@mfunc
 *		Shut down lexical analyzer
 */
void CRTFRead::DeinitLex()
{
	TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::DeinitLex");

#ifdef KEYWORD_RESOURCE
	if (hglbKeywords)
	{
		FreeResource(hglbKeywords);
		hglbKeywords = NULL;
		rgKeyword = NULL;
	}
#endif

	FreePv(_szText);
	FreePv(_pchRTFBuffer);
}

/*
 *	CRTFRead::GetChar()
 *	
 *	@mfunc
 *		Get next char, filling buffer as needed
 *	
 *	@rdesc
 *		BYTE			nonzero char value if success; else 0
 */
BYTE CRTFRead::GetChar()
{
	TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::GetChar");

	if (_pchRTFCurrent == _pchRTFEnd && !FillBuffer())
	{
		_ecParseError = ecUnexpectedEOF;
		return 0;
	}
	return *_pchRTFCurrent++;
}

/*
 *	CRTFRead::FillBuffer()
 *
 *	@mfunc
 *		Fill RTF buffer & return != 0 if successful
 *
 *	@rdesc
 *		LONG			# chars read
 *
 *	@comm
 *		This routine doesn't bother copying anything down if
 *		pchRTFCurrent <lt> pchRTFEnd so anything not read yet is lost.
 *		The only exception to this is that it always copies down the
 *		last two bytes read so that UngetChar() will work. ReadData()
 *		actually counts on this behavior, so if you change it, change
 *		ReadData() accordingly.
 */
LONG CRTFRead::FillBuffer()
{
	TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::FillBuffer");

	LONG cchRead;

	if (!_pchRTFCurrent)				
	{									
		// No data yet, nothing for backup
		// Leave cbBackupMax NULL chars so backup
		// area of buffer doesn't contain garbage.

		for(int i = 0; i < cbBackupMax; i++)
		{
			_pchRTFBuffer[i] = 0;
		}
	}
	else
	{
		Assert(_pchRTFCurrent == _pchRTFEnd);

		// Copy most recently read chars in case
		//  we need to back up

		int cbBackup = min(cbBackupMax, 
				DiffPtrs(_pchRTFCurrent, &_pchRTFBuffer[cbBackupMax], BYTE)); 
		int i;

		for(i = -1; i >= -cbBackup; i--)
		{
			_pchRTFBuffer[cbBackupMax + i] = _pchRTFCurrent[i];
		}

		if(cbBackup < cbBackupMax)
		{
			// NULL the before the first valid character in the backup buffer
			_pchRTFBuffer[cbBackupMax + i] = 0;
		}
	}
	_pchRTFCurrent = &_pchRTFBuffer[cbBackupMax];

	// Fill buffer with as much as we can take given our starting offset
	_pes->dwError = _pes->pfnCallback(_pes->dwCookie,
									  _pchRTFCurrent,
									  cachBufferMost - cbBackupMax,
									  &cchRead);
	if (_pes->dwError)
	{
		TRACEERRSZSC("RTFLEX: GetChar()", _pes->dwError);
		_ecParseError = ecGeneralFailure;
		return 0;
	}

	_pchRTFEnd = &_pchRTFBuffer[cbBackupMax + cchRead];		// Point the end

#if defined(DEBUG) && !defined(MACPORT)
	if(_hfileCapture)
	{
		DWORD cbLeftToWrite = cchRead;
		DWORD cbWritten = 0;
		BYTE *pbToWrite = (BYTE *)_pchRTFCurrent;
		
		while(WriteFile(_hfileCapture,
						pbToWrite,
						cbLeftToWrite,
						&cbWritten,
						NULL) && 
						(pbToWrite += cbWritten,
						(cbLeftToWrite -= cbWritten)));
	}
#endif

	return cchRead;
}

/*
 *	CRTFRead::UngetChar()
 *
 *	@mfunc
 *		Bump our file pointer back one char
 *
 *	@rdesc
 *		BOOL				TRUE on success
 *
 *	@comm
 *		You can safely UngetChar _at most_ cbBackupMax times without
 *		error.
 */
BOOL CRTFRead::UngetChar()
{
	TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::UngetChar");

	if (_pchRTFCurrent == _pchRTFBuffer || !_pchRTFCurrent)
	{
		Assert(0);
		_ecParseError = ecUnGetCharFailed;
		return FALSE;
	}

	--_pchRTFCurrent;
	return TRUE;
}

/*
 *	CRTFRead::UngetChar(cch)
 *
 *	@mfunc
 *		Bump our file pointer back 'cch' chars
 *
 *	@rdesc
 *		BOOL				TRUE on success
 *
 *	@comm
 *		You can safely UngetChar _at most_ cbBackupMax times without
 *		error.
 */
BOOL CRTFRead::UngetChar(UINT cch)
{
	TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::UngetChar");

	AssertSz(cch <= cbBackupMax, "CRTFRead::UngetChar():  Number of UngetChar's "
								"exceeds size of backup buffer.");

	while(cch-- > 0)
	{
		if(!UngetChar())
			return FALSE;
	}

	return TRUE;
}

/*
 *	CRTFRead::GetHex()
 *
 *	@mfunc
 *		Get next char if hex and return hex value
 *		If not hex, leave char in buffer and return 255
 *
 *	@rdesc
 *		BYTE			hex value of GetChar() if hex; else 255
 */
BYTE CRTFRead::GetHex()
{
	TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::GetHex");

	BYTE ch = GetChar();

	if (IsXDigit(ch))
		return (ch <= '9') ? ch - '0' : (ch & 0x4f) - 'A' + 10;
	if (ch)
		UngetChar();
	return 255;
}

/*
 *	CRTFRead::GetHexSkipCRLF()
 *
 *	@mfunc
 *		Get next char if hex and return hex value
 *		If not hex, leave char in buffer and return 255
 *
 *	@rdesc
 *		BYTE			hex value of GetChar() if hex; else 255
 *
 *	@devnote
 *		Keep this in sync with GetHex above.
 */
BYTE CRTFRead::GetHexSkipCRLF()
{
	TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::GetHexSkipCRLF");

	BYTE ch = GetChar();

	// skip \r \n
	while(ch == CR || ch == LF)
	{
		ch = GetChar(); 
	}

	if (IsXDigit(ch))
		return (ch <= '9') ? ch - '0' : (ch & 0x4f) - 'A' + 10;
	if (ch)
		UngetChar();
	return 255;
}

/*
 *	CRTFRead::TokenGetHex()
 *
 *	@mfunc
 *		Get an 8 bit character saved as a 2 hex digit value
 *
 *	@rdesc
 *		TOKEN			value of hex number read in
 */
TOKEN CRTFRead::TokenGetHex()
{
	TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::TokenGetHex");

	BYTE bChar0 = GetHex();
	BYTE bChar1;

	if(bChar0 < 16 && (bChar1 = GetHex()) < 16)
		_token = bChar0 << 4 | bChar1;
	else
		_token = tokenError;

	return _token;
}

/*
 *	CRTFRead::SkipToEndOfGroup()
 *
 *	@mfunc
 *		Skip to end of current group
 *
 *	@rdesc
 *		EC				An error code
 */
EC CRTFRead::SkipToEndOfGroup()
{
	TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::SkipToEndOfGroup");

	INT		nDepth = 1;
	BYTE	ach;

	while(TRUE)
	{
		ach = GetChar();
		switch(ach)
		{
			case BSLASH:
			{
				BYTE achNext = GetChar();

				// EOF: goto done; else ignore NULLs
				if(!achNext && _ecParseError == ecUnexpectedEOF)
					goto done;

				if(achNext == 'b' && UngetChar() && 
					TokenGetKeyword() == tokenBinaryData)
				{
					// We've encountered the \binN tag in the RTF we want
					//	to skip.  _iParam contains N from \binN once the 
					// 	tag is parsed by TokenGetKeyword()
					SkipBinaryData(_iParam);
				}
				break;
			}

			case LBRACE:
				nDepth++;
				break;

			case RBRACE:
				if (--nDepth <= 0)
					goto done;
				break;

			case 0:
				if(_ecParseError == ecUnexpectedEOF)
					goto done;

#ifdef PWD_JUPITER
			default:

				// GuyBark JupiterJ 50034: Detect Lead bytes here.
				if(IsLeadByte(ach, _nCodePage))
				{
					// Get the trailing byte and ignore it.
					ach = GetChar();

					// Extra checkjust in case we hit the end of the file.
					if(ach == 0)
					{
					    if(_ecParseError == ecUnexpectedEOF)
					        goto done;
					}

				}

				break;
#endif // PWD_JUPITER                
		}
	} 

	Assert(!_ecParseError);
	_ecParseError = ecUnexpectedEOF;

done:
	return _ecParseError;
}

/*
 *	CRTFRead::TokenFindKeyword(szKeyword)
 *
 *	@mfunc
 *		Find keyword <p szKeyword> and return its token value
 *
 *	@rdesc
 *		TOKEN			token number of keyword
 */
TOKEN CRTFRead::TokenFindKeyword(
	BYTE *	szKeyword)			// @parm Keyword to find
{
	TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::TokenFindKeyword");

	INT				iMin;
	INT				iMax;
	INT				iMid;
	INT				nComp;
	BYTE *			pchCandidate;
	BYTE *			pchKeyword;
	const KEYWORD *	pk;

	AssertSz(szKeyword[0],
		"CRTFRead::TokenFindKeyword: null keyword");

#ifdef RTF_HASHCACHE
	if ( _rtfHashInited )
	{
		// Hash is 23% faster than the following binary search on finds
		//  and 55% faster on misses: For 97 words stored in a 257 cache.
		//  Performance numbers will change when the total stored goes up.
		pk = HashKeyword_Fetch ( (CHAR *) szKeyword );
	}
	else
#endif
	{
		iMin = 0;
		iMax = cKeywords - 1;
		pk = NULL;
		do				// Note (MS3): Hash would be quicker than binary search
		{
			iMid		 = (iMin + iMax) / 2;
			pchCandidate = (BYTE *)rgKeyword[iMid].szKeyword;
			pchKeyword	 = szKeyword;
			while (!(nComp = REToLower(*pchKeyword) - *pchCandidate)	// Be sure to match
12 下一页
💿 文件大小 877 K
👤 上传用户 maige49
📂 所属分类 Windows CE
🏷️ 相关标签

#Application #Windows #Word #6.0
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -