📄 reader.c

📁 winNT技术操作系统,国外开放的原代码和LIUX一样
💻 C
📖 第 1 页 / 共 5 页
字号:
			}
		}
	}
}


/*
 * Read one token.  Call the read hook if there is one.  The
 * token class is the return value.  Returns rtfEOF when there
 * are no more tokens.
 */

int RTFGetToken(RTF_Info *info)
{
	RTFFuncPtr	p;

	TRACE("\n");
	/* don't try to return anything once EOF is reached */
	if (info->rtfClass == rtfEOF) {
		return rtfEOF;
	}

	for (;;)
	{
		_RTFGetToken (info);
		p = RTFGetReadHook (info);
		if (p != NULL)
			(*p) (info);	/* give read hook a look at token */

		/* Silently discard newlines, carriage returns, nulls.  */
		if (!(info->rtfClass == rtfText && info->rtfFormat != SF_TEXT
			&& (info->rtfMajor == '\r' || info->rtfMajor == '\n' || info->rtfMajor == '\0')))
			break;
	}
	return (info->rtfClass);
}


/*
 * Install or return a token reader hook.
 */

void RTFSetReadHook(RTF_Info *info, RTFFuncPtr f)
{
	info->readHook = f;
}


RTFFuncPtr RTFGetReadHook(RTF_Info *info)
{
	return (info->readHook);
}


void RTFUngetToken(RTF_Info *info)
{
	TRACE("\n");

	if (info->pushedClass >= 0)	/* there's already an ungotten token */
		ERR ("cannot unget two tokens\n");
	if (info->rtfClass < 0)
		ERR ("no token to unget\n");
	info->pushedClass = info->rtfClass;
	info->pushedMajor = info->rtfMajor;
	info->pushedMinor = info->rtfMinor;
	info->pushedParam = info->rtfParam;
	lstrcpyA (info->pushedTextBuf, info->rtfTextBuf);
}


int RTFPeekToken(RTF_Info *info)
{
	_RTFGetToken (info);
	RTFUngetToken (info);
	return (info->rtfClass);
}


static void _RTFGetToken(RTF_Info *info)
{
	TRACE("\n");

	if (info->rtfFormat == SF_TEXT)
	{
		info->rtfMajor = GetChar (info);
		info->rtfMinor = 0;
		info->rtfParam = rtfNoParam;
		info->rtfTextBuf[info->rtfTextLen = 0] = '\0';
		if (info->rtfMajor == EOF)
			info->rtfClass = rtfEOF;
		else
			info->rtfClass = rtfText;
		return;
	}

	/* first check for pushed token from RTFUngetToken() */

	if (info->pushedClass >= 0)
	{
		info->rtfClass = info->pushedClass;
		info->rtfMajor = info->pushedMajor;
		info->rtfMinor = info->pushedMinor;
		info->rtfParam = info->pushedParam;
		lstrcpyA (info->rtfTextBuf, info->pushedTextBuf);
		info->rtfTextLen = lstrlenA(info->rtfTextBuf);
		info->pushedClass = -1;
		return;
	}

	/*
	 * Beyond this point, no token is ever seen twice, which is
	 * important, e.g., for making sure no "}" pops the font stack twice.
	 */

	_RTFGetToken2 (info);
}


int
RTFCharSetToCodePage(RTF_Info *info, int charset)
{
	switch (charset)
        {
                case ANSI_CHARSET:
                        return 1252;
                case DEFAULT_CHARSET:
                        return CP_ACP;
                case SYMBOL_CHARSET:
                        return CP_SYMBOL;
                case MAC_CHARSET:
                        return CP_MACCP;
                case SHIFTJIS_CHARSET:
                        return 932;
                case HANGEUL_CHARSET:
                        return 949;
                case JOHAB_CHARSET:
                        return 1361;
                case GB2312_CHARSET:
                        return 936;
                case CHINESEBIG5_CHARSET:
                        return 950;
                case GREEK_CHARSET:
                        return 1253;
                case TURKISH_CHARSET:
                        return 1254;
                case VIETNAMESE_CHARSET:
                        return 1258;
                case HEBREW_CHARSET:
                        return 1255;
                case ARABIC_CHARSET:
                        return 1256;
                case BALTIC_CHARSET:
                        return 1257;
                case RUSSIAN_CHARSET:
                        return 1251;
                case THAI_CHARSET:
                        return 874;
                case EASTEUROPE_CHARSET:
                        return 1250;
                case OEM_CHARSET:
                        return CP_OEMCP;
                default:
		{
                        CHARSETINFO csi;
                        DWORD n = charset;
                        
                        /* FIXME: TranslateCharsetInfo does not work as good as it
                         * should, so let's use it only when all else fails */
                        if (!TranslateCharsetInfo(&n, &csi, TCI_SRCCHARSET))
				ERR("%s: unknown charset %u\n", __FUNCTION__, charset);
			else
                                return csi.ciACP;
		}
	}
        return 0;
}


/* this shouldn't be called anywhere but from _RTFGetToken() */

static void _RTFGetToken2(RTF_Info *info)
{
	int	sign;
	int	c;

	TRACE("\n");

	/* initialize token vars */

	info->rtfClass = rtfUnknown;
	info->rtfParam = rtfNoParam;
	info->rtfTextBuf[info->rtfTextLen = 0] = '\0';

	/* get first character, which may be a pushback from previous token */

	if (info->pushedChar != EOF)
	{
		c = info->pushedChar;
		info->rtfTextBuf[info->rtfTextLen++] = c;
		info->rtfTextBuf[info->rtfTextLen] = '\0';
		info->pushedChar = EOF;
	}
	else if ((c = GetChar (info)) == EOF)
	{
		info->rtfClass = rtfEOF;
		return;
	}

	if (c == '{')
	{
		info->rtfClass = rtfGroup;
		info->rtfMajor = rtfBeginGroup;
		return;
	}
	if (c == '}')
	{
		info->rtfClass = rtfGroup;
		info->rtfMajor = rtfEndGroup;
		return;
	}
	if (c != '\\')
	{
		/*
		 * Two possibilities here:
		 * 1) ASCII 9, effectively like \tab control symbol
		 * 2) literal text char
		 */
		if (c == '\t')			/* ASCII 9 */
		{
			info->rtfClass = rtfControl;
			info->rtfMajor = rtfSpecialChar;
			info->rtfMinor = rtfTab;
		}
		else
		{
			info->rtfClass = rtfText;
			info->rtfMajor = c;
		}
		return;
	}
	if ((c = GetChar (info)) == EOF)
	{
		/* early eof, whoops (class is rtfUnknown) */
		return;
	}
	if (!isalpha (c))
	{
		/*
		 * Three possibilities here:
		 * 1) hex encoded text char, e.g., \'d5, \'d3
		 * 2) special escaped text char, e.g., \{, \}
		 * 3) control symbol, e.g., \_, \-, \|, \<10>
		 */
		if (c == '\'')				/* hex char */
		{
		int	c2;

			if ((c = GetChar (info)) != EOF && (c2 = GetChar (info)) != EOF)
			{
				/* should do isxdigit check! */
				info->rtfClass = rtfText;
				info->rtfMajor = RTFCharToHex (c) * 16 + RTFCharToHex (c2);
				return;
			}
			/* early eof, whoops (class is rtfUnknown) */
			return;
		}

		/* escaped char */
		/*if (index (":{}\\", c) != (char *) NULL)*/ /* escaped char */
		if (c == ':' || c == '{' || c == '}' || c == '\\')
		{
			info->rtfClass = rtfText;
			info->rtfMajor = c;
			return;
		}

		/* control symbol */
		Lookup (info, info->rtfTextBuf);	/* sets class, major, minor */
		return;
	}
	/* control word */
	while (isalpha (c))
	{
		if ((c = GetChar (info)) == EOF)
			break;
	}

	/*
	 * At this point, the control word is all collected, so the
	 * major/minor numbers are determined before the parameter
	 * (if any) is scanned.  There will be one too many characters
	 * in the buffer, though, so fix up before and restore after
	 * looking up.
	 */

	if (c != EOF)
		info->rtfTextBuf[info->rtfTextLen-1] = '\0';
	Lookup (info, info->rtfTextBuf);	/* sets class, major, minor */
	if (c != EOF)
		info->rtfTextBuf[info->rtfTextLen-1] = c;

	/*
	 * Should be looking at first digit of parameter if there
	 * is one, unless it's negative.  In that case, next char
	 * is '-', so need to gobble next char, and remember sign.
	 */

	sign = 1;
	if (c == '-')
	{
		sign = -1;
		c = GetChar (info);
	}
	if (c != EOF && isdigit (c))
	{
		info->rtfParam = 0;
		while (isdigit (c))	/* gobble parameter */
		{
			info->rtfParam = info->rtfParam * 10 + c - '0';
			if ((c = GetChar (info)) == EOF)
				break;
		}
		info->rtfParam *= sign;
	}
	/*
	 * If control symbol delimiter was a blank, gobble it.
	 * Otherwise the character is first char of next token, so
	 * push it back for next call.  In either case, delete the
	 * delimiter from the token buffer.
	 */
	if (c != EOF)
	{
		if (c != ' ')
			info->pushedChar = c;
		info->rtfTextBuf[--info->rtfTextLen] = '\0';
	}
}


/*
 * Read the next character from the input.  This handles setting the
 * current line and position-within-line variables.  Those variable are
 * set correctly whether lines end with CR, LF, or CRLF (the last being
 * the tricky case).
 *
 * bumpLine indicates whether the line number should be incremented on
 * the *next* input character.
 */


static int GetChar(RTF_Info *info)
{
	int	c;
	int	oldBumpLine;

	TRACE("\n");

	if ((c = _RTFGetChar(info)) != EOF)
	{
		info->rtfTextBuf[info->rtfTextLen++] = c;
		info->rtfTextBuf[info->rtfTextLen] = '\0';
	}
	if (info->prevChar == EOF)
		info->bumpLine = 1;
	oldBumpLine = info->bumpLine;	/* non-zero if prev char was line ending */
	info->bumpLine = 0;
	if (c == '\r')
		info->bumpLine = 1;
	else if (c == '\n')
	{
		info->bumpLine = 1;
		if (info->prevChar == '\r')		/* oops, previous \r wasn't */
			oldBumpLine = 0;	/* really a line ending */
	}
	++info->rtfLinePos;
	if (oldBumpLine)	/* were we supposed to increment the */
	{			/* line count on this char? */
		++info->rtfLineNum;
		info->rtfLinePos = 1;
	}
	info->prevChar = c;
	return (c);
}


/*
 * Synthesize a token by setting the global variables to the
 * values supplied.  Typically this is followed with a call
 * to RTFRouteToken().
 *
 * If a param value other than rtfNoParam is passed, it becomes
 * part of the token text.
 */

void RTFSetToken(RTF_Info *info, int class, int major, int minor, int param, const char *text)
{
	TRACE("\n");

	info->rtfClass = class;
	info->rtfMajor = major;
	info->rtfMinor = minor;
	info->rtfParam = param;
	if (param == rtfNoParam)
		lstrcpyA(info->rtfTextBuf, text);
	else
		sprintf (info->rtfTextBuf, "%s%d", text, param);
	info->rtfTextLen = lstrlenA (info->rtfTextBuf);
}


/* ---------------------------------------------------------------------- */

/*
 * Special destination readers.  They gobble the destination so the
 * writer doesn't have to deal with them.  That's wrong for any
 * translator that wants to process any of these itself.  In that
 * case, these readers should be overridden by installing a different
 * destination callback.
 *
 * NOTE: The last token read by each of these reader will be the
 * destination's terminating '}', which will then be the current token.
 * That '}' token is passed to RTFRouteToken() - the writer has already
 * seen the '{' that began the destination group, and may have pushed a
 * state; it also needs to know at the end of the group that a state
 * should be popped.
 *
 * It's important that rtf.h and the control token lookup table list
 * as many symbols as possible, because these destination readers
 * unfortunately make strict assumptions about the input they expect,
 * and a token of class rtfUnknown will throw them off easily.
 */
💿 文件大小 34543 K
👤 上传用户 ybsscauc
📂 所属分类操作系统开发
🏷️ 相关标签

#winNT #LIUX #操作系统 #代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -