⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rtflex.cpp

📁 Windows CE 6.0 Word Application 源码
💻 CPP
📖 第 1 页 / 共 2 页
字号:
				&& *pchKeyword)											//  terminating 0's
			{
				pchKeyword++;
				pchCandidate++;
			}
			if (nComp < 0)
				iMax = iMid - 1;
			else if (nComp)
				iMin = iMid + 1;
			else
			{
				pk = &rgKeyword[iMid];
				break;
			}
		} while (iMin <= iMax);
	}


	if( pk )
	{
		_token = pk->token;
		
		// here, we log the RTF keyword scan to aid in tracking RTF tag ocverage
// TODO: Implement RTF tag logging for the Mac and WinCE
#if defined(DEBUG) && !defined(MACPORT) && !defined(PEGASUS)
		if(_prtflg) 
		{
#ifdef RTF_HASCACHE
			_prtflg->AddAt(szKeyword); 
#else
			_prtflg->AddAt((size_t)iMid);
#endif
		}
#endif
	}
	else
		_token = tokenUnknownKeyword;		// No match: TODO: place to take

	return _token;				 			//  care of unrecognized RTF
}

/*
 *	CRTFRead::TokenGetKeyword()
 *
 *	@mfunc
 *		Collect a keyword and its parameter. Return token's keyword
 *
 *	@rdesc
 *		TOKEN				token number of keyword
 *
 *	@comm
 *		Most RTF control words (keywords) consist of a span of lower-case
 *		ASCII letters possibly followed by a span of decimal digits. Other
 *		control words consist of a single character that isn't LC ASCII. No
 *		control words contain upper-case characters.
 */
TOKEN CRTFRead::TokenGetKeyword()
{
	TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::TokenGetKeyword");

	BYTE		ach = GetChar();
	BYTE *		pach;
	const BYTE * pachSymbol;
	SHORT		cachKeyword = 1;
	BYTE		szKeyword[cachKeywordMax];

	_szParam[0] = '\0';							// Clear parameter
	_iParam = 0;

	if(!IsAlphaChar(ach))							// Not alpha, i.e.,
	{											//  single char
		if (ach == '\'')						// Most common case needs
		{										//  special treatment
			// Convert hex to char and store result in _token
			if(TokenGetHex() == tokenError)
			{							
				_ecParseError = ecUnexpectedChar;
				goto TokenError;
			}
			if((_token == CR || _token == LF) && FInDocTextDest())
			{
				// Add raw CR or LF in the byte stream as a \par
				return tokenEndParagraph;
			}
		}
		else
		{
			pachSymbol = szSymbolKeywords;			// Check for other known
			while (ach != *pachSymbol && *pachSymbol)		//  symbols
				pachSymbol++;
			if (*pachSymbol)							// Found one
			{
				_token = tokenSymbol[pachSymbol - szSymbolKeywords];
				if(_token > 0x7F)				// Token or larger Unicode
					return _token;				//  value
			}
			else if (!ach)						// No more input chars
				goto TokenError;
			else								// Code for unrecognized RTF
				_token = ach;					// We'll just insert it for now 
		}
		_token = TokenGetText((BYTE)_token);
		return _token; 
	}

	szKeyword[0] = ach;							// Collect keyword that starts
	pach = szKeyword + 1;						// 	with lower-case ASCII
	while (cachKeyword < cachKeywordMax &&
		   IsAlphaChar(ach = GetChar()))
	{
		*pach++ = ach;
	}
	*pach = '\0';								// Terminate keyword

	if (cachKeyword == cachKeywordMax)
	{
		_ecParseError = ecKeywordTooLong;
		goto TokenError;
	}

	if (IsDigit(ach) || ach == '-')				// Collect parameter
	{
              int cch = 0;  
              pach = _szParam;
		*pach++ = ach;
              cch++;
		if(ach != '-')
			_iParam = ach - '0';				// Get parameter value

		while ((IsDigit(ach = GetChar())) && (cch < cachParamMax))
		{
			_iParam = _iParam*10 + ach - '0';
			*pach++ = ach;
                     cch++;
		}
              *pach = '\0';							// Terminate parameter string
		if (_szParam[0] == '-')
			_iParam = -_iParam;
	}

	if (!_ecParseError &&						// We overshot:
		(ach == ' ' || UngetChar()))			//  if not ' ', unget char
			return TokenFindKeyword(szKeyword);	// Find and return keyword

TokenError:
	TRACEERRSZSC("TokenGetKeyword()", _ecParseError);
	return _token = tokenError;
}

/*
 *	CRTFRead::TokenGetText(ach)
 *
 *	@mfunc
 *		Collect a string of text starting with the char <p ach> and treat as a
 *		single token. The string ends when a LBRACE, RBRACE, or single '\\' is found.
 *
 *	@devnote
 *		We peek past the '\\' for \\'xx, which we decode and keep on going;
 *		else we return in a state where the next character is the '\\'.
 *
 *	@rdesc
 *		TOKEN			Token number of next token (tokenText or tokenError)
 */
TOKEN CRTFRead::TokenGetText(
	BYTE ach)				// @parm First char of 8-bit text string
{
	TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::TokenGetText");

	BYTE *	pach = _szText;
	SHORT	cachText = 0;
	LONG	CodePage = _pstateStackTop->nCodePage;
	BOOL	fAllASCII = TRUE;
	int     cTrailBytesNeeded = 0;

	// GuyBark JupiterJ 11/16/98
	// This routine's old use of "fLeadByte" has been replaced with the current
	// RichEdit 3.0 use of "cTrailBytesNeeded". This came to light when I 
	// investigated 50477, which happened when an attempt to paste UTF8 failed.
	// It turns out the old routine couldn't handle UTF8 as that's not DBCS.
	// Instead it can have multiple trail bytes.

	_token = tokenError;						// Default error

	// FUTURE(BradO):  This 'goto' into a while loop is pretty weak.
	//	Restructure this 'while' loop such that the 'goto' is removed.

	// add the character passed into the routine
	goto add;

	// If cTrailBytesNeeded is non-zero, we need to get all the trail bytes.  Otherwise,
	// a string end in the middle of a DBC or UTF-8 will cause bad display/print problem
	// - 5 to allow extra space for up to 4 bytes for UTF-8 and Null char
	while (cachText < cachTextMax - 5 || cTrailBytesNeeded)
	{
		ach = GetChar();
		switch (ach)
		{
			case BSLASH:
			{
				// FUTURE(BradO):  This code looks ALOT like TokenGetKeyword.
				//	We should combine the two into a common routine.

				BYTE achNext;

				// Get char after BSLASH
				if(!(achNext = GetChar()))
				{
					goto error;
				}
	
				if(achNext == '\'')					// Handle most frequent
				{									//  case here
					if(TokenGetHex() == tokenError)
					{
						if(cTrailBytesNeeded)
						{
							// The trail-byte must be a raw BSLASH.
							// Unget the single-quote.

							if(!UngetChar())
							{
								goto error;
							}
							// fall through to add BSLASH
						}
						else
						{
							_ecParseError = ecUnexpectedChar;
							goto error;
						}
					}
					else
					{
						ach = (BYTE)_token;

						if((cTrailBytesNeeded == 0) && (ach == CR || ach == LF) &&
							FInDocTextDest())
						{
							// Here, we have a raw CR or LF in document text.  
							// Unget the whole lot of characters and bail out.  
							// TokenGetKeyword will convert this CR or LF into
							// a \par.

							if(!UngetChar(4))
							{
								goto error;
							}
							goto done;
						}
					}
					goto add;
				}

				// If we are not expecting a trail byte, then
				// check next byte against list of RTF symbol

				// GuyBark JupiterJ 50478:
				// Forget about only doing this if we're not expecting a trail byte.
				// Say we have a "\{" as the trail byte of a DBCS character. Before,
				// if we were trying to find the trail byte, we'd simply pick up the
				// backslash and use that. The subsequent '{' would then start the 
				// next group, and everything fails. This RTF really means "the trail 
				// byte has the value of the '{' character". By always executing the
				// following code we interpret the RTF as it's meant to be.

				const BYTE *pachSymbol;

				// Check for other known symbols
				pachSymbol = szSymbolKeywords;			

				while(achNext != *pachSymbol && *pachSymbol)	
				{
					pachSymbol++;
				}

				TOKEN tokenTmp;

				if(*pachSymbol && 
					(tokenTmp = tokenSymbol[pachSymbol - szSymbolKeywords])
						 <= 0x7F)
				{
					ach = tokenTmp;
					goto add;
				}

				// We will want to unget the byte following the BSLASH
				if(!UngetChar())
				{
					goto error;
				}
	
				// GuyBark JupiterJ 50203:
				// The following test has been made tighter with the checking of
				// IsAlphaChar() here, (lifted from RichEdit3). This is to trap
				// hitting RTF tokens when we were expecting the trail byte of a 
				// DBCS character. For example, we can get RTF that contains this
				// in the middle of a FE text run...
				//
				// 0x81\plain
				//
				// 0x81 is a lead byte so we're expecting a trail byte to follow.
				// Previously we'd pick up the backslash as the trail byte, and then
				// add "plain" to the main text run. In fact, this RTF is bad, but
				// we can get it anyway. What we now do is consider 0x81 to be an
				// SBCS extended character, and then the "\plain" is treated as the
				// RTF token it is.

				if(cTrailBytesNeeded && !IsAlphaChar(achNext))
				{
					// this BSLASH is a raw BSLASH which is the trail 
					// byte for a DBCS character.
					// add the raw BSLASH
					goto add;					
				}

				// Here, my guess is that the BSLASH begins the next RTF 
				// keyword, so unget the BSLASH
			    if(!UngetChar())
				{
					goto error;					
				}
				goto done;
			}

			case LBRACE:						// End of text string
			case RBRACE:
				if(cTrailBytesNeeded)
				{
					// Previous char was a lead-byte of a DBCS pair, which makes
					// this char a raw trail-byte.
					goto add;
				}

				if(!UngetChar())				// Unget delimeter
				{
					goto error;
				}
				goto done;

			case LF:							// Throw away noise chars
			case CR:
				break;

			case 0:
				if(_ecParseError == ecUnexpectedEOF)
					goto done;
				ach = ' ';						// Replace NULL by blank

			default:							// Collect chars
add:
				// outstanding chars to be skipped after \uN tag
				if(_cbSkipForUnicode)
				{
					_cbSkipForUnicode--;
					continue;
				}

				*pach++ = ach;

				++cachText;

				if(ach > 0x7F)
				{
					fAllASCII = FALSE;
				}
	
				// GuyBark JupiterJ 50477:
				// RichEdit 3.0 now calls a new routine here to determine if
				// this is a lead byte. But with the exception of UTF8, we
				// already determine this ok with the call to IsLeadByte()
				// below. But for UTF8 we must take special action here.

				// Check if we are expecting more trail bytes
				if (cTrailBytesNeeded)
				{
				    cTrailBytesNeeded--;
				}
				else
				{
				    // Not already processing trail bytes. Is this UTF8?
				    if(CodePage == CP_UTF8)
				    {
				        // Yes. Take exactly the same steps here for UTF8 as the new 
				        // RichEdit 3.0 does. The maximum trail byte count we use here 
				        // is 3, even though in theory it could go up to 5. I queried 
				        // this with Murray Sargent who said that the number is 3 for 
				        // the foreseeable future (covers all the Unicode surrogates).

				        if (ach >= 0x0F0)
				        {
				            cTrailBytesNeeded = 3;
				        }
				        else if (ach >= 0x0E0)
				        {
				            cTrailBytesNeeded = 2;
				        }
				        else if (ach >= 0x0C0)
				        {
				            cTrailBytesNeeded = 1;
				        }
				        else
				        {
				            cTrailBytesNeeded = 0;
				        }
				    }
				    else
				    {
				        // This is not UTF8, so IsLeadByte() will do nicely.
				        cTrailBytesNeeded = (IsLeadByte(ach, CodePage) ? 1 : 0);
				    }
				}

				Assert(cTrailBytesNeeded >= 0);
    		}
	}

done:
	_token = fAllASCII ? tokenASCIIText : tokenText;
	*pach = '\0';								// Terminate token string

error:
	return _token;
}
 
/*
 *	CRTFRead::TokenGetToken()
 *
 *	@mfunc
 *		This function reads in next token from input stream
 *
 *	@rdesc
 *		TOKEN				token number of next token
 */
TOKEN CRTFRead::TokenGetToken()
{
	TRACEBEGIN(TRCSUBSYSRTFR, TRCSCOPEINTERN, "CRTFRead::TokenGetToken");

	BYTE		ach;

	_tokenLast	= _token;					// Used by \* destinations and FE
	_token = tokenEOF;

SkipNoise:
	ach = GetChar();
	switch (ach)
	{
	case CR:
	case LF:
		goto SkipNoise;

	case LBRACE:
		_token = tokenStartGroup;
		break;

	case RBRACE:
		_token = tokenEndGroup;
		break;

	case BSLASH:
		_token = TokenGetKeyword();
		break;

	case 0:									
		if(_ecParseError == ecUnexpectedEOF)
			break;
		ach = ' ';							// Replace NULL by blank
											// Fall thru to default
	default:
		if( !_pstateStackTop )
		{
			TRACEWARNSZ("Unexpected token in rtf file");
			Assert(_token == tokenEOF);
		}
		else if (_pstateStackTop->sDest == destObjectData || 
				 _pstateStackTop->sDest == destPicture )
		// not text but data
		{
			_token = tokenObjectDataValue + _pstateStackTop->sDest - destObjectData;
			UngetChar();
		}
		else
			_token = TokenGetText(ach);
	}
	return _token;
}


/*
 *	CRTFRead::FInDocTextDest()
 *
 *	@mfunc
 *		Returns a BOOL indicating if the current destination is one in which
 *		we would encounter document text.
 *
 *	@rdesc
 *		BOOL	indicates the current destination may contain document text.
 */
BOOL CRTFRead::FInDocTextDest() const
{
	switch(_pstateStackTop->sDest)
	{
		case destRTF:
		case destField:
		case destFieldResult:
		case destFieldInstruction:
		case destParaNumbering:
		case destParaNumText:
		case destNULL:
			return TRUE;

		case destFontTable:
		case destRealFontName:
		case destObjectClass:
		case destObjectName:
		case destFollowingPunct:
		case destLeadingPunct:
		case destColorTable:
		case destBinary:
		case destObject:
		case destObjectData:
		case destPicture:
		case destDocumentArea:
			return FALSE;
	
		default:
			AssertSz(0, "CRTFRead::FInDocTextDest():  New destination "
							"encountered - update enum in _rtfread.h");
			return TRUE;
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -