⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 word2text.c

📁 这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解
💻 C
📖 第 1 页 / 共 3 页
字号:
static ULONGulGetChar(FILE *pFile, list_id_enum eListID){	const font_block_type	*pCurr;	ULONG		ulChar, ulFileOffset, ulCharPos;	row_info_enum	eRowInfo;	USHORT		usChar, usPropMod;	BOOL		bSkip;	fail(pFile == NULL);	pCurr = pFontInfo;	bSkip = FALSE;	for (;;) {		usChar = usNextChar(pFile, eListID,				&ulFileOffset, &ulCharPos, &usPropMod);		if (usChar == (USHORT)EOF) {			return (ULONG)EOF;		}		vUpdateCounters();		eRowInfo = ePropMod2RowInfo(usPropMod, iWordVersion);		if (!bStartRow) {#if 0			bStartRow = eRowInfo == found_a_cell ||				(pRowInfo != NULL &&				 ulFileOffset == pRowInfo->ulFileOffsetStart &&				 eRowInfo != found_not_a_cell);#else			bStartRow = pRowInfo != NULL &&				ulFileOffset == pRowInfo->ulFileOffsetStart;#endif			NO_DBG_HEX_C(bStartRow, pRowInfo->ulFileOffsetStart);		}		if (!bEndRowNorm) {#if 0			bEndRow = eRowInfo == found_end_of_row ||				(pRowInfo != NULL &&				 ulFileOffset == pRowInfo->ulFileOffsetEnd &&				 eRowInfo != found_not_end_of_row);#else			bEndRowNorm = pRowInfo != NULL &&				ulFileOffset == pRowInfo->ulFileOffsetEnd;#endif			NO_DBG_HEX_C(bEndRowNorm, pRowInfo->ulFileOffsetEnd);		}		if (!bEndRowFast) {			bEndRowFast = eRowInfo == found_end_of_row;			NO_DBG_HEX_C(bEndRowFast, pRowInfo->ulFileOffsetEnd);		}		if (!bStartStyle) {			bStartStyle = pStyleInfo != NULL &&				ulFileOffset == pStyleInfo->ulFileOffset;			NO_DBG_HEX_C(bStartStyle, ulFileOffset);		}		if (pCurr != NULL && ulFileOffset == pCurr->ulFileOffset) {			bStartFont = TRUE;			NO_DBG_HEX(ulFileOffset);			pFontInfo = pCurr;			pCurr = pGetNextFontInfoListItem(pCurr);		}		/* Skip embedded characters */		if (usChar == START_EMBEDDED) {			bSkip = TRUE;			continue;		}		if (usChar == END_IGNORE || usChar == END_EMBEDDED) {			bSkip = FALSE;			continue;		}		if (bSkip) {			continue;		}		ulChar = ulTranslateCharacters(usChar,					ulFileOffset,					iWordVersion,					tOptions.eConversionType,					tOptions.eEncoding,					bOldMacFile);		if (ulChar == IGNORE_CHARACTER) {			continue;		}		if (ulChar == PICTURE) {			ulFileOffsetImage = ulGetPictInfoListItem(ulFileOffset);		} else {			ulFileOffsetImage = FC_INVALID;		}		if (ulChar == PAR_END) {			/* End of paragraph seen, prepare for the next */			vFillStyleFromStylesheet(usIstdNext, &tStyleNext);			vCorrectStyleValues(&tStyleNext);			bStartStyleNext = TRUE;			vFillFontFromStylesheet(usIstdNext, &tFontNext);			vCorrectFontValues(&tFontNext);			bStartFontNext = TRUE;		}		if (ulChar == PAGE_BREAK) {			/* Might be the start of a new section */			pSectionNext = pGetSectionInfo(pSection, ulCharPos);		}		return ulChar;	}} /* end of ulGetChar *//* * lGetWidthMax - get the maximum line width from the paragraph break value * * Returns the maximum line width in millipoints */static longlGetWidthMax(int iParagraphBreak){	fail(iParagraphBreak < 0);	if (iParagraphBreak == 0) {		return LONG_MAX;	}	if (iParagraphBreak < MIN_SCREEN_WIDTH) {		return lChar2MilliPoints(MIN_SCREEN_WIDTH);	}	if (iParagraphBreak > MAX_SCREEN_WIDTH) {		return lChar2MilliPoints(MAX_SCREEN_WIDTH);	}	return lChar2MilliPoints(iParagraphBreak);} /* end of lGetWidthMax *//* * bWordDecryptor - turn Word to something more useful * * returns TRUE when succesful, otherwise FALSE */BOOLbWordDecryptor(FILE *pFile, long lFilesize, diagram_type *pDiag){	imagedata_type	tImage;	const style_block_type	*pStyleTmp;	const font_block_type	*pFontTmp;	const char	*szListChar;	output_type	*pAnchor, *pOutput, *pLeftOver;	ULONG	ulChar;	long	lBeforeIndentation, lAfterIndentation;	long	lLeftIndentation, lLeftIndentation1, lRightIndentation;	long	lWidthCurr, lWidthMax, lDefaultTabWidth, lHalfSpaceWidth, lTmp;	list_id_enum 	eListID;	image_info_enum	eRes;	UINT	uiFootnoteNumber, uiEndnoteNumber, uiTmp;	int	iListSeqNumber;	BOOL	bWasTableRow, bTableFontClosed, bWasEndOfParagraph;	BOOL	bInList, bWasInList, bNoMarks, bFirstLine;	BOOL	bAllCapitals, bHiddenText, bMarkDelText, bSuccess;	USHORT	usListNumber;	USHORT	usFontStyle, usFontStyleMinimal, usFontSize, usTmp;	UCHAR	ucFontNumber, ucFontColor;	UCHAR	ucNFC, ucAlignment;	fail(pFile == NULL || lFilesize <= 0 || pDiag == NULL);	TRACE_MSG("bWordDecryptor");	iWordVersion = iInitDocument(pFile, lFilesize);	if (iWordVersion < 0) {		DBG_DEC(iWordVersion);		return FALSE;	}	vGetOptions(&tOptions);	bOldMacFile = bIsOldMacFile();	vPrepareHdrFtrText(pFile);	vPrepareFootnoteText(pFile);	vPrologue2(pDiag, iWordVersion);	/* Initialisation */#if defined(__riscos)	ulCharCounter = 0;	iCurrPct = 0;	iPrevPct = -1;	ulDocumentLength = ulGetDocumentLength();#endif /* __riscos */	pSection = pGetSectionInfo(NULL, 0);	pSectionNext = pSection;	lDefaultTabWidth = lGetDefaultTabWidth();	DBG_DEC_C(lDefaultTabWidth != 36000, lDefaultTabWidth);	pRowInfo = pGetNextRowInfoListItem();	DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetStart);	DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetEnd);	DBG_MSG_C(pRowInfo == NULL, "No rows at all");	bStartRow = FALSE;	bEndRowNorm = FALSE;	bEndRowFast = FALSE;	bIsTableRow = FALSE;	bWasTableRow = FALSE;	vResetStyles();	pStyleInfo = pGetNextTextStyle(NULL);	bStartStyle = FALSE;	bInList = FALSE;	bWasInList = FALSE;	iListSeqNumber = 0;	usIstdNext = ISTD_NORMAL;	pAnchor = NULL;	pFontInfo = pGetNextFontInfoListItem(NULL);	DBG_HEX_C(pFontInfo != NULL, pFontInfo->ulFileOffset);	DBG_MSG_C(pFontInfo == NULL, "No fonts at all");	bStartFont = FALSE;	ucFontNumber = 0;	usFontStyleMinimal = FONT_REGULAR;	usFontStyle = FONT_REGULAR;	usFontSize = DEFAULT_FONT_SIZE;	ucFontColor = FONT_COLOR_DEFAULT;	pAnchor = pStartNewOutput(pAnchor, NULL);	pOutput = pAnchor;	pOutput->ucFontColor = ucFontColor;	pOutput->usFontStyle = usFontStyle;	pOutput->tFontRef = tOpenFont(ucFontNumber, usFontStyle, usFontSize);	pOutput->usFontSize = usFontSize;	bTableFontClosed = TRUE;	lBeforeIndentation = 0;	lAfterIndentation = 0;	lLeftIndentation = 0;	lLeftIndentation1 = 0;	lRightIndentation = 0;	bWasEndOfParagraph = TRUE;	bNoMarks = TRUE;	bFirstLine = TRUE;	ucNFC = LIST_BULLETS;	if (pStyleInfo != NULL) {		szListChar = pStyleInfo->szListChar;		pStyleTmp = pStyleInfo;	} else {		if (tStyleNext.szListChar[0] == '\0') {			vGetBulletValue(tOptions.eConversionType,				tOptions.eEncoding, tStyleNext.szListChar, 4);		}		szListChar = tStyleNext.szListChar;		pStyleTmp = &tStyleNext;	}	usListNumber = 0;	ucAlignment = ALIGNMENT_LEFT;	bAllCapitals = FALSE;	bHiddenText = FALSE;	bMarkDelText = FALSE;	lWidthMax = lGetWidthMax(tOptions.iParagraphBreak);	NO_DBG_DEC(lWidthMax);	Hourglass_On();	uiFootnoteNumber = 0;	uiEndnoteNumber = 0;	eListID = text_list;	for(;;) {		ulChar = ulGetChar(pFile, eListID);		if (ulChar == (ULONG)EOF) {			if (bOutputContainsText(pAnchor)) {				OUTPUT_LINE();			} else {				RESET_LINE();			}			switch (eListID) {			case text_list:				if (tOptions.eConversionType !=							conversion_xml) {					eListID = footnote_list;					if (uiFootnoteNumber != 0) {						vPutSeparatorLine(pAnchor);						OUTPUT_LINE();						uiFootnoteNumber = 0;					}					break;				}				/* No break or return */			case footnote_list:				eListID = endnote_list;				if (uiEndnoteNumber != 0) {					vPutSeparatorLine(pAnchor);					OUTPUT_LINE();					uiEndnoteNumber = 0;				}				break;			case endnote_list:				eListID = textbox_list;				if (bExistsTextBox()) {					vPutSeparatorLine(pAnchor);					OUTPUT_LINE();				}				break;			case textbox_list:				eListID = hdrtextbox_list;				if (bExistsHdrTextBox()) {					vPutSeparatorLine(pAnchor);					OUTPUT_LINE();				}				break;			case hdrtextbox_list:			default:				eListID = end_of_lists;				break;			}			if (eListID == end_of_lists) {				break;			}			continue;		}		if (ulChar == UNKNOWN_NOTE_CHAR) {			switch (eListID) {			case footnote_list:				ulChar = FOOTNOTE_CHAR;				break;			case endnote_list:				ulChar = ENDNOTE_CHAR;				break;			default:				break;			}		}		if (bStartRow) {			/* Begin of a tablerow found */			if (bOutputContainsText(pAnchor)) {				OUTPUT_LINE();			} else {				RESET_LINE();			}			fail(pAnchor != pOutput);			if (bTableFontClosed) {				/* Start special table font */				vCloseFont();				/*				 * Compensate for the fact that Word uses				 * proportional fonts for its tables and we				 * only one fixed-width font				 */				uiTmp = ((UINT)usFontSize * 5 + 3) / 6;				if (uiTmp < MIN_TABLEFONT_SIZE) {					uiTmp = MIN_TABLEFONT_SIZE;				} else if (uiTmp > MAX_TABLEFONT_SIZE) {					uiTmp = MAX_TABLEFONT_SIZE;				}				pOutput->usFontSize = (USHORT)uiTmp;				pOutput->tFontRef =					tOpenTableFont(pOutput->usFontSize);				pOutput->usFontStyle = FONT_REGULAR;				pOutput->ucFontColor = FONT_COLOR_BLACK;				bTableFontClosed = FALSE;			}			bIsTableRow = TRUE;			bStartRow = FALSE;		}		if (bWasTableRow &&		    !bIsTableRow &&		    ulChar != PAR_END &&		    ulChar != HARD_RETURN &&		    ulChar != PAGE_BREAK &&		    ulChar != COLUMN_FEED) {			/*			 * The end of a table should be followed by an			 * empty line, like the end of a paragraph			 */			OUTPUT_LINE();			vEndOfParagraph(pDiag,					pOutput->tFontRef,					pOutput->usFontSize,					(long)pOutput->usFontSize * 600);		}		switch (ulChar) {		case PAGE_BREAK:		case COLUMN_FEED:			if (bIsTableRow) {				/* Ignore when in a table */				break;			}			if (bOutputContainsText(pAnchor)) {				OUTPUT_LINE();			} else {				RESET_LINE();			}			if (ulChar == PAGE_BREAK) {				vEndOfPage(pDiag, lAfterIndentation,						pSection != pSectionNext);			} else {				vEndOfParagraph(pDiag,					pOutput->tFontRef,					pOutput->usFontSize,					lAfterIndentation);			}			break;		default:			break;		}		if (bStartFont || (bStartFontNext && ulChar != PAR_END)) {			/* Begin of a font found */			if (bStartFont) {				/* bStartFont takes priority */				fail(pFontInfo == NULL);				pFontTmp = pFontInfo;			} else {				pFontTmp = &tFontNext;			}			bAllCapitals = bIsCapitals(pFontTmp->usFontStyle);			bHiddenText = bIsHidden(pFontTmp->usFontStyle);			bMarkDelText = bIsMarkDel(pFontTmp->usFontStyle);			usTmp = pFontTmp->usFontStyle &				(FONT_BOLD|FONT_ITALIC|FONT_UNDERLINE|				 FONT_STRIKE|FONT_MARKDEL|				 FONT_SUPERSCRIPT|FONT_SUBSCRIPT);			if (!bIsTableRow &&			    (usFontSize != pFontTmp->usFontSize ||			     ucFontNumber != pFontTmp->ucFontNumber ||			     usFontStyleMinimal != usTmp ||			     ucFontColor != pFontTmp->ucFontColor)) {				pOutput = pStartNextOutput(pOutput);				vCloseFont();				pOutput->ucFontColor = pFontTmp->ucFontColor;				pOutput->usFontStyle = pFontTmp->usFontStyle;				pOutput->usFontSize = pFontTmp->usFontSize;				pOutput->tFontRef = tOpenFont(						pFontTmp->ucFontNumber,						pFontTmp->usFontStyle,						pFontTmp->usFontSize);				fail(!bCheckDoubleLinkedList(pAnchor));			}			ucFontNumber = pFontTmp->ucFontNumber;			usFontSize = pFontTmp->usFontSize;			ucFontColor = pFontTmp->ucFontColor;			usFontStyle = pFontTmp->usFontStyle;			usFontStyleMinimal = usTmp;			if (bStartFont) {				/* Get the next font info */				pFontInfo = pGetNextFontInfoListItem(pFontInfo);				NO_DBG_HEX_C(pFontInfo != NULL,						pFontInfo->ulFileOffset);				DBG_MSG_C(pFontInfo == NULL, "No more fonts");			}			bStartFont = FALSE;			bStartFontNext = FALSE;		}		if (bStartStyle || (bStartStyleNext && ulChar != PAR_END)) {			bFirstLine = TRUE;			/* Begin of a style found */			if (bStartStyle) {				/* bStartStyle takes priority */				fail(pStyleInfo == NULL);				pStyleTmp = pStyleInfo;			} else {				pStyleTmp = &tStyleNext;			}			if (!bIsTableRow) {				vStoreStyle(pDiag, pOutput, pStyleTmp);			}			usIstdNext = pStyleTmp->usIstdNext;			lBeforeIndentation =				lTwips2MilliPoints(pStyleTmp->usBeforeIndent);			lAfterIndentation =				lTwips2MilliPoints(pStyleTmp->usAfterIndent);			lLeftIndentation =				lTwips2MilliPoints(pStyleTmp->sLeftIndent);			lLeftIndentation1 =				lTwips2MilliPoints(pStyleTmp->sLeftIndent1);			lRightIndentation =				lTwips2MilliPoints(pStyleTmp->sRightIndent);			bInList = bStyleImpliesList(pStyleTmp, iWordVersion);			bNoMarks = !bInList || pStyleTmp->bNumPause;			ucNFC = pStyleTmp->ucNFC;			szListChar = pStyleTmp->szListChar;			ucAlignment = pStyleTmp->ucAlignment;			if (bInList && !bWasInList) {				/* Start of a list */				iListSeqNumber++;				vStartOfList(pDiag, ucNFC,						bWasTableRow && !bIsTableRow);			}			if (!bInList && bWasInList) {				/* End of a list */				vEndOfList(pDiag);			}			bWasInList = bInList;			if (bStartStyle) {				pStyleInfo = pGetNextTextStyle(pStyleInfo);				NO_DBG_HEX_C(pStyleInfo != NULL,						pStyleInfo->ulFileOffset);				DBG_MSG_C(pStyleInfo == NULL,						"No more styles");			}			bStartStyle = FALSE;			bStartStyleNext = FALSE;		}		if (bWasEndOfParagraph) {			vStartOfParagraph1(pDiag, lBeforeIndentation);		}		if (!bIsTableRow &&		    lTotalStringWidth(pAnchor) == 0) {			if (!bNoMarks) {				usListNumber = usGetListValue(iListSeqNumber,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -