⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 word2text.c

📁 这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解
💻 C
📖 第 1 页 / 共 3 页
字号:
/* * word2text.c * Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL * * Description: * MS Word to "text" functions */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#if defined(__riscos)#include "DeskLib:Hourglass.h"#include "drawfile.h"#endif /* __riscos */#include "antiword.h"#define INITIAL_SIZE		40#define EXTENTION_SIZE		20/* Macros to make sure all such statements will be identical */#define OUTPUT_LINE()		\	do {\		vAlign2Window(pDiag, pAnchor, lWidthMax, ucAlignment);\		TRACE_MSG("after vAlign2Window");\		pAnchor = pStartNewOutput(pAnchor, NULL);\		pOutput = pAnchor;\	} while(0)#define RESET_LINE()		\	do {\		pAnchor = pStartNewOutput(pAnchor, NULL);\		pOutput = pAnchor;\	} while(0)#if defined(__riscos)/* Length of the document in characters */static ULONG	ulDocumentLength;/* Number of characters processed so far */static ULONG	ulCharCounter;static int	iCurrPct, iPrevPct;#endif /* __riscos *//* The document is in the format belonging to this version of Word */static int	iWordVersion = -1;/* Special treatment for files from Word 4/5/6 on an Apple Macintosh */static BOOL	bOldMacFile = FALSE;/* Section Information */static const section_block_type	*pSection = NULL;static const section_block_type	*pSectionNext = NULL;/* All the (command line) options */static options_type	tOptions;/* Needed for reading a complete table row */static const row_block_type	*pRowInfo = NULL;static BOOL	bStartRow = FALSE;static BOOL	bEndRowNorm = FALSE;static BOOL	bEndRowFast = FALSE;static BOOL	bIsTableRow = FALSE;/* Index of the next style and font information */static USHORT	usIstdNext = ISTD_NORMAL;/* Needed for finding the start of a style */static const style_block_type	*pStyleInfo = NULL;static style_block_type		tStyleNext;static BOOL	bStartStyle = FALSE;static BOOL	bStartStyleNext = FALSE;/* Needed for finding the start of a font */static const font_block_type	*pFontInfo = NULL;static font_block_type		tFontNext;static BOOL	bStartFont = FALSE;static BOOL	bStartFontNext = FALSE;/* Needed for finding an image */static ULONG	ulFileOffsetImage = FC_INVALID;/* * vUpdateCounters - Update the counters for the hourglass */static voidvUpdateCounters(void){#if defined(__riscos)	ulCharCounter++;	iCurrPct = (int)((ulCharCounter * 100) / ulDocumentLength);	if (iCurrPct != iPrevPct) {		Hourglass_Percentage(iCurrPct);		iPrevPct = iCurrPct;	}#endif /* __riscos */} /* end of vUpdateCounters *//* * bOutputContainsText - see if the output contains more than white space */BOOLbOutputContainsText(const output_type *pAnchor){	const output_type	*pCurr;	size_t	tIndex;	fail(pAnchor == NULL);	for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {		fail(pCurr->lStringWidth < 0);		for (tIndex = 0; tIndex < pCurr->tNextFree; tIndex++) {			if (isspace((int)(UCHAR)pCurr->szStorage[tIndex])) {				continue;			}#if defined(DEBUG)			if (pCurr->szStorage[tIndex] == FILLER_CHAR) {				continue;			}#endif /* DEBUG */			return TRUE;		}	}	return FALSE;} /* end of bOutputContainsText *//* * lTotalStringWidth - compute the total width of the output string */static longlTotalStringWidth(const output_type *pAnchor){	const output_type	*pCurr;	long		lTotal;	lTotal = 0;	for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {		DBG_DEC_C(pCurr->lStringWidth < 0, pCurr->lStringWidth);		fail(pCurr->lStringWidth < 0);		lTotal += pCurr->lStringWidth;	}	return lTotal;} /* end of lTotalStringWidth *//* * vStoreByte - store one byte */static voidvStoreByte(UCHAR ucChar, output_type *pOutput){	fail(pOutput == NULL);	if (ucChar == 0) {		pOutput->szStorage[pOutput->tNextFree] = '\0';		return;	}	while (pOutput->tNextFree + 2 > pOutput->tStorageSize) {		pOutput->tStorageSize += EXTENTION_SIZE;		pOutput->szStorage = xrealloc(pOutput->szStorage,					pOutput->tStorageSize);	}	pOutput->szStorage[pOutput->tNextFree] = (char)ucChar;	pOutput->szStorage[pOutput->tNextFree + 1] = '\0';	pOutput->tNextFree++;} /* end of vStoreByte *//* * vStoreChar - store a character as one or more bytes */static voidvStoreChar(ULONG ulChar, BOOL bChangeAllowed, output_type *pOutput){	char	szResult[4];	size_t	tIndex, tLen;	fail(pOutput == NULL);	if (tOptions.eEncoding == encoding_utf_8 && bChangeAllowed) {		DBG_HEX_C(ulChar > 0xffff, ulChar);		fail(ulChar > 0xffff);		tLen = tUcs2Utf8(ulChar, szResult, sizeof(szResult));		for (tIndex = 0; tIndex < tLen; tIndex++) {			vStoreByte((UCHAR)szResult[tIndex], pOutput);		}	} else {		DBG_HEX_C(ulChar > 0xff, ulChar);		fail(ulChar > 0xff);		vStoreByte((UCHAR)ulChar, pOutput);		tLen = 1;	}	pOutput->lStringWidth += lComputeStringWidth(				pOutput->szStorage + pOutput->tNextFree - tLen,				tLen,				pOutput->tFontRef,				pOutput->usFontSize);} /* end of vStoreChar *//* * vStoreCharacter - store one character */static voidvStoreCharacter(ULONG ulChar, output_type *pOutput){	vStoreChar(ulChar, TRUE, pOutput);} /* end of vStoreCharacter *//* * vStoreString - store a string */static voidvStoreString(const char *szString, size_t tStringLength, output_type *pOutput){	size_t	tIndex;	fail(szString == NULL || pOutput == NULL);	for (tIndex = 0; tIndex < tStringLength; tIndex++) {		vStoreCharacter((ULONG)(UCHAR)szString[tIndex], pOutput);	}} /* end of vStoreString *//* * vStoreNumberAsDecimal - store a number as a decimal number */static voidvStoreNumberAsDecimal(UINT uiNumber, output_type *pOutput){	size_t	tLen;	char	szString[3 * sizeof(UINT) + 1];	fail(uiNumber == 0);	fail(pOutput == NULL);	tLen = (size_t)sprintf(szString, "%u", uiNumber);	vStoreString(szString, tLen, pOutput);} /* end of vStoreNumberAsDecimal *//* * vStoreNumberAsRoman - store a number as a roman numerical */static voidvStoreNumberAsRoman(UINT uiNumber, output_type *pOutput){	size_t	tLen;	char	szString[15];	fail(uiNumber == 0);	fail(pOutput == NULL);	tLen = tNumber2Roman(uiNumber, FALSE, szString);	vStoreString(szString, tLen, pOutput);} /* end of vStoreNumberAsRoman *//* * vStoreStyle - store a style */static voidvStoreStyle(diagram_type *pDiag, output_type *pOutput,	const style_block_type *pStyle){	size_t	tLen;	char	szString[120];	fail(pDiag == NULL);	fail(pOutput == NULL);	fail(pStyle == NULL);	if (tOptions.eConversionType == conversion_xml) {		vSetHeaders(pDiag, pStyle->usIstd);	} else {		tLen = tStyle2Window(szString, sizeof(szString),					pStyle, pSection);		vStoreString(szString, tLen, pOutput);	}} /* end of vStoreStyle *//* * vPutIndentation - output the specified amount of indentation */static voidvPutIndentation(diagram_type *pDiag, output_type *pOutput,	BOOL bNoMarks, BOOL bFirstLine,	UINT uiListNumber, UCHAR ucNFC, const char *szListChar,	long lLeftIndentation, long lLeftIndentation1){	long	lWidth;	size_t	tIndex, tNextFree;	char	szLine[30];	fail(pDiag == NULL);	fail(pOutput == NULL);	fail(szListChar == NULL);	fail(lLeftIndentation < 0);	if (tOptions.eConversionType == conversion_xml) {		/* XML does its own indentation at rendering time */		return;	}	if (bNoMarks) {		if (bFirstLine) {			lLeftIndentation += lLeftIndentation1;		}		if (lLeftIndentation < 0) {			lLeftIndentation = 0;		}		vSetLeftIndentation(pDiag, lLeftIndentation);		return;	}	if (lLeftIndentation <= 0) {		DBG_HEX_C(ucNFC != 0x00, ucNFC);		vSetLeftIndentation(pDiag, 0);		return;	}#if defined(DEBUG)	if (tOptions.eEncoding == encoding_utf_8) {		fail(strlen(szListChar) > 3);	} else {		DBG_HEX_C(iscntrl((int)szListChar[0]), szListChar[0]);		fail(iscntrl((int)szListChar[0]));		fail(szListChar[1] != '\0');	}#endif /* DEBUG */	switch (ucNFC) {	case LIST_ARABIC_NUM:	case LIST_NUMBER_TXT:		tNextFree = (size_t)sprintf(szLine, "%u", uiListNumber);		break;	case LIST_UPPER_ROMAN:	case LIST_LOWER_ROMAN:		tNextFree = tNumber2Roman(uiListNumber,				ucNFC == LIST_UPPER_ROMAN, szLine);		break;	case LIST_UPPER_ALPHA:	case LIST_LOWER_ALPHA:		tNextFree = tNumber2Alpha(uiListNumber,				ucNFC == LIST_UPPER_ALPHA, szLine);		break;	case LIST_ORDINAL_NUM:	case LIST_ORDINAL_TXT:		if (uiListNumber % 10 == 1 && uiListNumber != 11) {			tNextFree =				(size_t)sprintf(szLine, "%ust", uiListNumber);		} else if (uiListNumber % 10 == 2 && uiListNumber != 12) {			tNextFree =				(size_t)sprintf(szLine, "%und", uiListNumber);		} else if (uiListNumber % 10 == 3 && uiListNumber != 13) {			tNextFree =				(size_t)sprintf(szLine, "%urd", uiListNumber);		} else {			tNextFree =				(size_t)sprintf(szLine, "%uth", uiListNumber);		}		break;	case LIST_OUTLINE_NUM:		tNextFree = (size_t)sprintf(szLine, "%02u", uiListNumber);		break;	case LIST_SPECIAL:	case LIST_SPECIAL2:	case LIST_BULLETS:		tNextFree = 0;		break;	default:		DBG_HEX(ucNFC);		DBG_FIXME();		tNextFree = (size_t)sprintf(szLine, "%u", uiListNumber);		break;	}	tNextFree += (size_t)sprintf(szLine + tNextFree, "%.3s", szListChar);	szLine[tNextFree++] = ' ';	szLine[tNextFree] = '\0';	lWidth = lComputeStringWidth(szLine, tNextFree,				pOutput->tFontRef, pOutput->usFontSize);	lLeftIndentation -= lWidth;	if (lLeftIndentation < 0) {		lLeftIndentation = 0;	}	vSetLeftIndentation(pDiag, lLeftIndentation);	for (tIndex = 0; tIndex < tNextFree; tIndex++) {		vStoreChar((ULONG)(UCHAR)szLine[tIndex], FALSE, pOutput);	}} /* end of vPutIndentation *//* * vPutSeparatorLine - output a separator line * * A separator line is a horizontal line two inches long. * Two inches equals 144000 millipoints. */static voidvPutSeparatorLine(output_type *pOutput){	long	lCharWidth;	int	iCounter, iChars;	char	szOne[2];	fail(pOutput == NULL);	szOne[0] = OUR_EM_DASH;	szOne[1] = '\0';	lCharWidth = lComputeStringWidth(szOne, 1,				pOutput->tFontRef, pOutput->usFontSize);	NO_DBG_DEC(lCharWidth);	iChars = (int)((144000 + lCharWidth / 2) / lCharWidth);	NO_DBG_DEC(iChars);	for (iCounter = 0; iCounter < iChars; iCounter++) {		vStoreCharacter((ULONG)(UCHAR)OUR_EM_DASH, pOutput);	}} /* end of vPutSeparatorLine *//* * pStartNextOutput - start the next output record * * returns a pointer to the next record */static output_type *pStartNextOutput(output_type *pCurrent){	output_type	*pNew;	TRACE_MSG("pStartNextOutput");	if (pCurrent->tNextFree == 0) {		/* The current record is empty, re-use */		fail(pCurrent->szStorage[0] != '\0');		fail(pCurrent->lStringWidth != 0);		return pCurrent;	}	/* The current record is in use, make a new one */	pNew = xmalloc(sizeof(*pNew));	pCurrent->pNext = pNew;	pNew->tStorageSize = INITIAL_SIZE;	pNew->szStorage = xmalloc(pNew->tStorageSize);	pNew->szStorage[0] = '\0';	pNew->tNextFree = 0;	pNew->lStringWidth = 0;	pNew->ucFontColor = FONT_COLOR_DEFAULT;	pNew->usFontStyle = FONT_REGULAR;	pNew->tFontRef = (drawfile_fontref)0;	pNew->usFontSize = DEFAULT_FONT_SIZE;	pNew->pPrev = pCurrent;	pNew->pNext = NULL;	return pNew;} /* end of pStartNextOutput *//* * pStartNewOutput */static output_type *pStartNewOutput(output_type *pAnchor, output_type *pLeftOver){	output_type	*pCurr, *pNext;	USHORT		usFontStyle, usFontSize;	drawfile_fontref	tFontRef;	UCHAR		ucFontColor;	TRACE_MSG("pStartNewOutput");	ucFontColor = FONT_COLOR_DEFAULT;	usFontStyle = FONT_REGULAR;	tFontRef = (drawfile_fontref)0;	usFontSize = DEFAULT_FONT_SIZE;	/* Free the old output space */	pCurr = pAnchor;	while (pCurr != NULL) {		TRACE_MSG("Free the old output space");		pNext = pCurr->pNext;		pCurr->szStorage = xfree(pCurr->szStorage);		if (pCurr->pNext == NULL) {			ucFontColor = pCurr->ucFontColor;			usFontStyle = pCurr->usFontStyle;			tFontRef = pCurr->tFontRef;			usFontSize = pCurr->usFontSize;		}		pCurr = xfree(pCurr);		pCurr = pNext;	}	if (pLeftOver == NULL) {		/* Create new output space */		TRACE_MSG("Create new output space");		pLeftOver = xmalloc(sizeof(*pLeftOver));		pLeftOver->tStorageSize = INITIAL_SIZE;		NO_DBG_DEC(pLeftOver->tStorageSize);		TRACE_MSG("before 2nd xmalloc");		pLeftOver->szStorage = xmalloc(pLeftOver->tStorageSize);		TRACE_MSG("after 2nd xmalloc");		pLeftOver->szStorage[0] = '\0';		pLeftOver->tNextFree = 0;		pLeftOver->lStringWidth = 0;		pLeftOver->ucFontColor = ucFontColor;		pLeftOver->usFontStyle = usFontStyle;		pLeftOver->tFontRef = tFontRef;		pLeftOver->usFontSize = usFontSize;		pLeftOver->pPrev = NULL;		pLeftOver->pNext = NULL;	}	fail(!bCheckDoubleLinkedList(pLeftOver));	return pLeftOver;} /* end of pStartNewOutput *//* * ulGetChar - get the next character from the specified list * * returns the next character of EOF */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -