📄 reader.c
字号:
}
}
}
}
/*
* Read one token. Call the read hook if there is one. The
* token class is the return value. Returns rtfEOF when there
* are no more tokens.
*/
int RTFGetToken(RTF_Info *info)
{
RTFFuncPtr p;
TRACE("\n");
/* don't try to return anything once EOF is reached */
if (info->rtfClass == rtfEOF) {
return rtfEOF;
}
for (;;)
{
_RTFGetToken (info);
p = RTFGetReadHook (info);
if (p != NULL)
(*p) (info); /* give read hook a look at token */
/* Silently discard newlines, carriage returns, nulls. */
if (!(info->rtfClass == rtfText && info->rtfFormat != SF_TEXT
&& (info->rtfMajor == '\r' || info->rtfMajor == '\n' || info->rtfMajor == '\0')))
break;
}
return (info->rtfClass);
}
/*
* Install or return a token reader hook.
*/
void RTFSetReadHook(RTF_Info *info, RTFFuncPtr f)
{
info->readHook = f;
}
RTFFuncPtr RTFGetReadHook(RTF_Info *info)
{
return (info->readHook);
}
void RTFUngetToken(RTF_Info *info)
{
TRACE("\n");
if (info->pushedClass >= 0) /* there's already an ungotten token */
ERR ("cannot unget two tokens\n");
if (info->rtfClass < 0)
ERR ("no token to unget\n");
info->pushedClass = info->rtfClass;
info->pushedMajor = info->rtfMajor;
info->pushedMinor = info->rtfMinor;
info->pushedParam = info->rtfParam;
lstrcpyA (info->pushedTextBuf, info->rtfTextBuf);
}
int RTFPeekToken(RTF_Info *info)
{
_RTFGetToken (info);
RTFUngetToken (info);
return (info->rtfClass);
}
static void _RTFGetToken(RTF_Info *info)
{
TRACE("\n");
if (info->rtfFormat == SF_TEXT)
{
info->rtfMajor = GetChar (info);
info->rtfMinor = 0;
info->rtfParam = rtfNoParam;
info->rtfTextBuf[info->rtfTextLen = 0] = '\0';
if (info->rtfMajor == EOF)
info->rtfClass = rtfEOF;
else
info->rtfClass = rtfText;
return;
}
/* first check for pushed token from RTFUngetToken() */
if (info->pushedClass >= 0)
{
info->rtfClass = info->pushedClass;
info->rtfMajor = info->pushedMajor;
info->rtfMinor = info->pushedMinor;
info->rtfParam = info->pushedParam;
lstrcpyA (info->rtfTextBuf, info->pushedTextBuf);
info->rtfTextLen = lstrlenA(info->rtfTextBuf);
info->pushedClass = -1;
return;
}
/*
* Beyond this point, no token is ever seen twice, which is
* important, e.g., for making sure no "}" pops the font stack twice.
*/
_RTFGetToken2 (info);
}
int
RTFCharSetToCodePage(RTF_Info *info, int charset)
{
switch (charset)
{
case ANSI_CHARSET:
return 1252;
case DEFAULT_CHARSET:
return CP_ACP;
case SYMBOL_CHARSET:
return CP_SYMBOL;
case MAC_CHARSET:
return CP_MACCP;
case SHIFTJIS_CHARSET:
return 932;
case HANGEUL_CHARSET:
return 949;
case JOHAB_CHARSET:
return 1361;
case GB2312_CHARSET:
return 936;
case CHINESEBIG5_CHARSET:
return 950;
case GREEK_CHARSET:
return 1253;
case TURKISH_CHARSET:
return 1254;
case VIETNAMESE_CHARSET:
return 1258;
case HEBREW_CHARSET:
return 1255;
case ARABIC_CHARSET:
return 1256;
case BALTIC_CHARSET:
return 1257;
case RUSSIAN_CHARSET:
return 1251;
case THAI_CHARSET:
return 874;
case EASTEUROPE_CHARSET:
return 1250;
case OEM_CHARSET:
return CP_OEMCP;
default:
{
CHARSETINFO csi;
DWORD n = charset;
/* FIXME: TranslateCharsetInfo does not work as good as it
* should, so let's use it only when all else fails */
if (!TranslateCharsetInfo(&n, &csi, TCI_SRCCHARSET))
ERR("%s: unknown charset %u\n", __FUNCTION__, charset);
else
return csi.ciACP;
}
}
return 0;
}
/* this shouldn't be called anywhere but from _RTFGetToken() */
static void _RTFGetToken2(RTF_Info *info)
{
int sign;
int c;
TRACE("\n");
/* initialize token vars */
info->rtfClass = rtfUnknown;
info->rtfParam = rtfNoParam;
info->rtfTextBuf[info->rtfTextLen = 0] = '\0';
/* get first character, which may be a pushback from previous token */
if (info->pushedChar != EOF)
{
c = info->pushedChar;
info->rtfTextBuf[info->rtfTextLen++] = c;
info->rtfTextBuf[info->rtfTextLen] = '\0';
info->pushedChar = EOF;
}
else if ((c = GetChar (info)) == EOF)
{
info->rtfClass = rtfEOF;
return;
}
if (c == '{')
{
info->rtfClass = rtfGroup;
info->rtfMajor = rtfBeginGroup;
return;
}
if (c == '}')
{
info->rtfClass = rtfGroup;
info->rtfMajor = rtfEndGroup;
return;
}
if (c != '\\')
{
/*
* Two possibilities here:
* 1) ASCII 9, effectively like \tab control symbol
* 2) literal text char
*/
if (c == '\t') /* ASCII 9 */
{
info->rtfClass = rtfControl;
info->rtfMajor = rtfSpecialChar;
info->rtfMinor = rtfTab;
}
else
{
info->rtfClass = rtfText;
info->rtfMajor = c;
}
return;
}
if ((c = GetChar (info)) == EOF)
{
/* early eof, whoops (class is rtfUnknown) */
return;
}
if (!isalpha (c))
{
/*
* Three possibilities here:
* 1) hex encoded text char, e.g., \'d5, \'d3
* 2) special escaped text char, e.g., \{, \}
* 3) control symbol, e.g., \_, \-, \|, \<10>
*/
if (c == '\'') /* hex char */
{
int c2;
if ((c = GetChar (info)) != EOF && (c2 = GetChar (info)) != EOF)
{
/* should do isxdigit check! */
info->rtfClass = rtfText;
info->rtfMajor = RTFCharToHex (c) * 16 + RTFCharToHex (c2);
return;
}
/* early eof, whoops (class is rtfUnknown) */
return;
}
/* escaped char */
/*if (index (":{}\\", c) != (char *) NULL)*/ /* escaped char */
if (c == ':' || c == '{' || c == '}' || c == '\\')
{
info->rtfClass = rtfText;
info->rtfMajor = c;
return;
}
/* control symbol */
Lookup (info, info->rtfTextBuf); /* sets class, major, minor */
return;
}
/* control word */
while (isalpha (c))
{
if ((c = GetChar (info)) == EOF)
break;
}
/*
* At this point, the control word is all collected, so the
* major/minor numbers are determined before the parameter
* (if any) is scanned. There will be one too many characters
* in the buffer, though, so fix up before and restore after
* looking up.
*/
if (c != EOF)
info->rtfTextBuf[info->rtfTextLen-1] = '\0';
Lookup (info, info->rtfTextBuf); /* sets class, major, minor */
if (c != EOF)
info->rtfTextBuf[info->rtfTextLen-1] = c;
/*
* Should be looking at first digit of parameter if there
* is one, unless it's negative. In that case, next char
* is '-', so need to gobble next char, and remember sign.
*/
sign = 1;
if (c == '-')
{
sign = -1;
c = GetChar (info);
}
if (c != EOF && isdigit (c))
{
info->rtfParam = 0;
while (isdigit (c)) /* gobble parameter */
{
info->rtfParam = info->rtfParam * 10 + c - '0';
if ((c = GetChar (info)) == EOF)
break;
}
info->rtfParam *= sign;
}
/*
* If control symbol delimiter was a blank, gobble it.
* Otherwise the character is first char of next token, so
* push it back for next call. In either case, delete the
* delimiter from the token buffer.
*/
if (c != EOF)
{
if (c != ' ')
info->pushedChar = c;
info->rtfTextBuf[--info->rtfTextLen] = '\0';
}
}
/*
* Read the next character from the input. This handles setting the
* current line and position-within-line variables. Those variable are
* set correctly whether lines end with CR, LF, or CRLF (the last being
* the tricky case).
*
* bumpLine indicates whether the line number should be incremented on
* the *next* input character.
*/
static int GetChar(RTF_Info *info)
{
int c;
int oldBumpLine;
TRACE("\n");
if ((c = _RTFGetChar(info)) != EOF)
{
info->rtfTextBuf[info->rtfTextLen++] = c;
info->rtfTextBuf[info->rtfTextLen] = '\0';
}
if (info->prevChar == EOF)
info->bumpLine = 1;
oldBumpLine = info->bumpLine; /* non-zero if prev char was line ending */
info->bumpLine = 0;
if (c == '\r')
info->bumpLine = 1;
else if (c == '\n')
{
info->bumpLine = 1;
if (info->prevChar == '\r') /* oops, previous \r wasn't */
oldBumpLine = 0; /* really a line ending */
}
++info->rtfLinePos;
if (oldBumpLine) /* were we supposed to increment the */
{ /* line count on this char? */
++info->rtfLineNum;
info->rtfLinePos = 1;
}
info->prevChar = c;
return (c);
}
/*
* Synthesize a token by setting the global variables to the
* values supplied. Typically this is followed with a call
* to RTFRouteToken().
*
* If a param value other than rtfNoParam is passed, it becomes
* part of the token text.
*/
void RTFSetToken(RTF_Info *info, int class, int major, int minor, int param, const char *text)
{
TRACE("\n");
info->rtfClass = class;
info->rtfMajor = major;
info->rtfMinor = minor;
info->rtfParam = param;
if (param == rtfNoParam)
lstrcpyA(info->rtfTextBuf, text);
else
sprintf (info->rtfTextBuf, "%s%d", text, param);
info->rtfTextLen = lstrlenA (info->rtfTextBuf);
}
/* ---------------------------------------------------------------------- */
/*
* Special destination readers. They gobble the destination so the
* writer doesn't have to deal with them. That's wrong for any
* translator that wants to process any of these itself. In that
* case, these readers should be overridden by installing a different
* destination callback.
*
* NOTE: The last token read by each of these reader will be the
* destination's terminating '}', which will then be the current token.
* That '}' token is passed to RTFRouteToken() - the writer has already
* seen the '{' that began the destination group, and may have pushed a
* state; it also needs to know at the end of the group that a state
* should be popped.
*
* It's important that rtf.h and the control token lookup table list
* as many symbols as possible, because these destination readers
* unfortunately make strict assumptions about the input they expect,
* and a token of class rtfUnknown will throw them off easily.
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -