⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 word2text.c

📁 这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解
💻 C
📖 第 1 页 / 共 3 页
字号:
							iWordVersion,							pStyleTmp);			}			if (bInList && bFirstLine) {				vStartOfListItem(pDiag, bNoMarks);			}			vPutIndentation(pDiag, pAnchor, bNoMarks, bFirstLine,					usListNumber, ucNFC, szListChar,					lLeftIndentation, lLeftIndentation1);			bFirstLine = FALSE;			/* One number or mark per paragraph will do */			bNoMarks = TRUE;		}		if (bWasEndOfParagraph) {			vStartOfParagraph2(pDiag);			bWasEndOfParagraph = FALSE;		}		switch (ulChar) {		case PICTURE:			(void)memset(&tImage, 0, sizeof(tImage));			eRes = eExamineImage(pFile, ulFileOffsetImage, &tImage);			switch (eRes) {			case image_no_information:				bSuccess = FALSE;				break;			case image_minimal_information:			case image_full_information:#if 0				if (bOutputContainsText(pAnchor)) {					OUTPUT_LINE();				} else {					RESET_LINE();				}#endif				bSuccess = bTranslateImage(pDiag, pFile,					eRes == image_minimal_information,					ulFileOffsetImage, &tImage);				break;			default:				DBG_DEC(eRes);				bSuccess = FALSE;				break;			}			if (!bSuccess) {				vStoreString("[pic]", 5, pOutput);			}			break;		case FOOTNOTE_CHAR:			uiFootnoteNumber++;			if (tOptions.eConversionType == conversion_xml) {				vStoreCharacter((ULONG)FOOTNOTE_OR_ENDNOTE,								pOutput);				break;			}			vStoreCharacter((ULONG)'[', pOutput);			vStoreNumberAsDecimal(uiFootnoteNumber, pOutput);			vStoreCharacter((ULONG)']', pOutput);			break;		case ENDNOTE_CHAR:			uiEndnoteNumber++;			vStoreCharacter((ULONG)'[', pOutput);			vStoreNumberAsRoman(uiEndnoteNumber, pOutput);			vStoreCharacter((ULONG)']', pOutput);			break;		case UNKNOWN_NOTE_CHAR:			vStoreString("[?]", 3, pOutput);			break;		case PAR_END:			if (bIsTableRow) {				vStoreCharacter((ULONG)'\n', pOutput);				break;			}			if (bOutputContainsText(pAnchor)) {				OUTPUT_LINE();			} else {				vMove2NextLine(pDiag,					pOutput->tFontRef, pOutput->usFontSize);				RESET_LINE();			}			vEndOfParagraph(pDiag,					pOutput->tFontRef,					pOutput->usFontSize,					lAfterIndentation);			bWasEndOfParagraph = TRUE;			break;		case HARD_RETURN:			if (bIsTableRow) {				vStoreCharacter((ULONG)'\n', pOutput);				break;			}			if (bOutputContainsText(pAnchor)) {				OUTPUT_LINE();			} else {				vMove2NextLine(pDiag,					pOutput->tFontRef, pOutput->usFontSize);				RESET_LINE();			}			break;		case PAGE_BREAK:		case COLUMN_FEED:			pSection = pSectionNext;			break;		case TABLE_SEPARATOR:			if (bIsTableRow) {				vStoreCharacter(ulChar, pOutput);				break;			}			vStoreCharacter((ULONG)' ', pOutput);			vStoreCharacter((ULONG)TABLE_SEPARATOR_CHAR, pOutput);			break;		case TAB:			if (bIsTableRow ||			    tOptions.eConversionType == conversion_xml) {				vStoreCharacter((ULONG)' ', pOutput);				break;			}			if (tOptions.iParagraphBreak == 0 &&			    (tOptions.eConversionType == conversion_text ||			     tOptions.eConversionType == conversion_fmt_text)) {				/* No logical lines, so no tab expansion */				vStoreCharacter(TAB, pOutput);				break;			}			lHalfSpaceWidth = (lComputeSpaceWidth(					pOutput->tFontRef,					pOutput->usFontSize) + 1) / 2;			lTmp = lTotalStringWidth(pAnchor);			lTmp += lDrawUnits2MilliPoints(pDiag->lXleft);			lTmp /= lDefaultTabWidth;			do {				vStoreCharacter((ULONG)FILLER_CHAR, pOutput);				lWidthCurr = lTotalStringWidth(pAnchor);				lWidthCurr +=					lDrawUnits2MilliPoints(pDiag->lXleft);			} while (lTmp == lWidthCurr / lDefaultTabWidth &&				 lWidthCurr < lWidthMax + lRightIndentation);			break;		default:			if (bHiddenText && tOptions.bHideHiddenText) {				continue;			}			if (bMarkDelText && tOptions.bRemoveRemovedText) {				continue;			}			if (ulChar == UNICODE_ELLIPSIS &&			    tOptions.eEncoding != encoding_utf_8) {				vStoreString("...", 3, pOutput);			} else {				if (bAllCapitals) {					ulChar = ulToUpper(ulChar);				}				vStoreCharacter(ulChar, pOutput);			}			break;		}		if (bWasTableRow && !bIsTableRow) {			/* End of a table */			vEndOfTable(pDiag);			/* Resume normal font */			NO_DBG_MSG("End of table font");			vCloseFont();			bTableFontClosed = TRUE;			pOutput->ucFontColor = ucFontColor;			pOutput->usFontStyle = usFontStyle;			pOutput->usFontSize = usFontSize;			pOutput->tFontRef = tOpenFont(					ucFontNumber, usFontStyle, usFontSize);		}		bWasTableRow = bIsTableRow;		if (bIsTableRow) {			fail(pAnchor != pOutput);			if (!bEndRowNorm && !bEndRowFast) {				continue;			}			/* End of a table row */			if (bEndRowNorm) {				fail(pRowInfo == NULL);				vTableRow2Window(pDiag, pAnchor, pRowInfo,						tOptions.eConversionType,						tOptions.iParagraphBreak);			} else {				fail(!bEndRowFast);			}			/* Reset */			pAnchor = pStartNewOutput(pAnchor, NULL);			pOutput = pAnchor;			if (bEndRowNorm) {				pRowInfo = pGetNextRowInfoListItem();			}			bIsTableRow = FALSE;			bEndRowNorm = FALSE;			bEndRowFast = FALSE;			NO_DBG_HEX_C(pRowInfo != NULL,						pRowInfo->ulFileOffsetStart);			NO_DBG_HEX_C(pRowInfo != NULL,						pRowInfo->ulFileOffsetEnd);			continue;		}		lWidthCurr = lTotalStringWidth(pAnchor);		lWidthCurr += lDrawUnits2MilliPoints(pDiag->lXleft);		if (lWidthCurr < lWidthMax + lRightIndentation) {			continue;		}		pLeftOver = pSplitList(pAnchor);		vJustify2Window(pDiag, pAnchor,				lWidthMax, lRightIndentation, ucAlignment);		pAnchor = pStartNewOutput(pAnchor, pLeftOver);		for (pOutput = pAnchor;		     pOutput->pNext != NULL;		     pOutput = pOutput->pNext)			;	/* EMPTY */		fail(pOutput == NULL);		if (lTotalStringWidth(pAnchor) > 0) {			vSetLeftIndentation(pDiag, lLeftIndentation);		}	}	pAnchor = pStartNewOutput(pAnchor, NULL);	pAnchor->szStorage = xfree(pAnchor->szStorage);	pAnchor = xfree(pAnchor);	vCloseFont();	vFreeDocument();	Hourglass_Off();	return TRUE;} /* end of bWordDecryptor *//* * lLastStringWidth - compute the width of the last part of the output string */static longlLastStringWidth(const output_type *pAnchor){	const output_type	*pCurr, *pStart;	pStart = NULL;	for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {		if (pCurr->tNextFree == 1 &&		    (pCurr->szStorage[0] == PAR_END ||		     pCurr->szStorage[0] == HARD_RETURN)) {			/* Found a separator. Start after the separator */			pStart = pCurr->pNext;		}	}	if (pStart == NULL) {		/* No separators. Use the whole output string */		pStart = pAnchor;	}	return lTotalStringWidth(pStart);} /* end of lLastStringWidth *//* * pHdrFtrDecryptor - turn a header/footer list element to something useful */output_type *pHdrFtrDecryptor(FILE *pFile, ULONG ulCharPosStart, ULONG ulCharPosNext){	output_type	*pAnchor, *pOutput, *pLeftOver;	ULONG	ulChar, ulFileOffset, ulCharPos;	long	lWidthCurr, lWidthMax;	long	lRightIndentation;	USHORT	usChar;	UCHAR	ucAlignment;	BOOL	bSkip;	fail(iWordVersion < 0);	fail(tOptions.eConversionType == conversion_unknown);	fail(tOptions.eEncoding == 0);	if (ulCharPosStart == ulCharPosNext) {		/* There are no bytes to decrypt */		return NULL;	}	lRightIndentation = 0;	ucAlignment = ALIGNMENT_LEFT;	bSkip = FALSE;	lWidthMax = lGetWidthMax(tOptions.iParagraphBreak);	pAnchor = pStartNewOutput(NULL, NULL);	pOutput = pAnchor;	pOutput->tFontRef = tOpenFont(0, FONT_REGULAR, DEFAULT_FONT_SIZE);	usChar = usToHdrFtrPosition(pFile, ulCharPosStart);	ulCharPos = ulCharPosStart;	ulFileOffset = ulCharPos2FileOffset(ulCharPos);	while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext) {		/* Skip embedded characters */		if (usChar == START_EMBEDDED) {			bSkip = TRUE;		} else if (usChar == END_IGNORE || usChar == END_EMBEDDED) {			bSkip = FALSE;		}		/* Translate character */		if (bSkip || usChar == END_IGNORE || usChar == END_EMBEDDED) {			ulChar = IGNORE_CHARACTER;		} else {			ulChar = ulTranslateCharacters(usChar,					ulFileOffset,					iWordVersion,					tOptions.eConversionType,					tOptions.eEncoding,					bOldMacFile);		}		/* Process character */		if (ulChar != IGNORE_CHARACTER) {			switch (ulChar) {			case PICTURE:				vStoreString("[pic]", 5, pOutput);				break;			case PAR_END:			case HARD_RETURN:			case PAGE_BREAK:			case COLUMN_FEED:				/* To the next substring */				pOutput = pStartNextOutput(pOutput);				vCloseFont();				pOutput->tFontRef = tOpenFont(0,					FONT_REGULAR, DEFAULT_FONT_SIZE);				/* A substring with just one character */				if (ulChar == HARD_RETURN) {					vStoreCharacter(HARD_RETURN, pOutput);				} else {					vStoreCharacter(PAR_END, pOutput);				}				/* To the next substring */				pOutput = pStartNextOutput(pOutput);				vCloseFont();				pOutput->tFontRef = tOpenFont(0,					FONT_REGULAR, DEFAULT_FONT_SIZE);				fail(!bCheckDoubleLinkedList(pAnchor));				break;			case TABLE_SEPARATOR:				vStoreCharacter((ULONG)' ', pOutput);				vStoreCharacter((ULONG)TABLE_SEPARATOR_CHAR,							pOutput);				break;			case TAB:				vStoreCharacter((ULONG)FILLER_CHAR, pOutput);				break;			default:				vStoreCharacter(ulChar, pOutput);				break;			}		}		lWidthCurr = lLastStringWidth(pAnchor);		if (lWidthCurr >= lWidthMax + lRightIndentation) {			pLeftOver = pSplitList(pAnchor);			for (pOutput = pAnchor;			     pOutput->pNext != NULL;			     pOutput = pOutput->pNext)				;	/* EMPTY */			fail(pOutput == NULL);			/* To the next substring */			pOutput = pStartNextOutput(pOutput);			/* A substring with just one HARD_RETURN */			vStoreCharacter(HARD_RETURN, pOutput);			/* Put the leftover piece(s) at the end */			pOutput->pNext = pLeftOver;			if (pLeftOver != NULL) {				pLeftOver->pPrev = pOutput;			}			fail(!bCheckDoubleLinkedList(pAnchor));			for (pOutput = pAnchor;			     pOutput->pNext != NULL;			     pOutput = pOutput->pNext)				;	/* EMPTY */			fail(pOutput == NULL);		}		usChar = usNextChar(pFile, hdrftr_list,					&ulFileOffset, &ulCharPos, NULL);	}	vCloseFont();	if (bOutputContainsText(pAnchor)) {		return pAnchor;	}	pAnchor = pStartNewOutput(pAnchor, NULL);	pAnchor->szStorage = xfree(pAnchor->szStorage);	pAnchor = xfree(pAnchor);	return NULL;} /* end of pHdrFtrDecryptor *//* * pFootnoteDecryptor - turn a footnote text list element into text */char *szFootnoteDecryptor(FILE *pFile, ULONG ulCharPosStart, ULONG ulCharPosNext){	char	*szText;	ULONG	ulChar, ulFileOffset, ulCharPos;	USHORT	usChar;	size_t	tLen, tIndex, tNextFree, tStorageSize;	char	szResult[6];	BOOL	bSkip;	fail(iWordVersion < 0);	fail(tOptions.eConversionType == conversion_unknown);	fail(tOptions.eEncoding == 0);	if (ulCharPosStart == ulCharPosNext) {		/* There are no bytes to decrypt */		return NULL;	}	if (tOptions.eConversionType != conversion_xml) {		/* Only implemented for XML output */		return NULL;	}	bSkip = FALSE;	/* Initialise the text buffer */	tStorageSize = INITIAL_SIZE;	szText = xmalloc(tStorageSize);	tNextFree = 0;	szText[tNextFree] = '\0';	/* Goto the start */	usChar = usToFootnotePosition(pFile, ulCharPosStart);	ulCharPos = ulCharPosStart;	ulFileOffset = ulCharPos2FileOffset(ulCharPos);	/* Skip the unwanted starting characters */	while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext &&	       (usChar == FOOTNOTE_OR_ENDNOTE ||		usChar == PAR_END ||		usChar == TAB ||		usChar == (USHORT)' ')) {		usChar = usNextChar(pFile, footnote_list,					&ulFileOffset, &ulCharPos, NULL);	}	/* Process the footnote text */	while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext) {		/* Skip embedded characters */		if (usChar == START_EMBEDDED) {			bSkip = TRUE;		} else if (usChar == END_IGNORE || usChar == END_EMBEDDED) {			bSkip = FALSE;		}		/* Translate character */		if (bSkip ||		    usChar == END_IGNORE ||		    usChar == END_EMBEDDED ||		    usChar == FOOTNOTE_OR_ENDNOTE) {			ulChar = IGNORE_CHARACTER;		} else {			ulChar = ulTranslateCharacters(usChar,					ulFileOffset,					iWordVersion,					tOptions.eConversionType,					tOptions.eEncoding,					bOldMacFile);		}		/* Process character */		if (ulChar == PICTURE) {			tLen = 5;			strcpy(szResult, "[pic]");		} else if (ulChar == IGNORE_CHARACTER) {			tLen = 0;			szResult[0] = '\0';		} else {			switch (ulChar) {			case PAR_END:			case HARD_RETURN:			case PAGE_BREAK:			case COLUMN_FEED:				ulChar = (ULONG)PAR_END;				break;			case TAB:				ulChar = (ULONG)' ';				break;			default:				break;			}			tLen = tUcs2Utf8(ulChar, szResult, sizeof(szResult));		}		/* Add the results to the text */		if (tNextFree + tLen + 1 > tStorageSize) {			tStorageSize += EXTENTION_SIZE;			szText = xrealloc(szText, tStorageSize);		}		for (tIndex = 0; tIndex < tLen; tIndex++) {			szText[tNextFree++] = szResult[tIndex];		}		szText[tNextFree] = '\0';		/* Next character */		usChar = usNextChar(pFile, footnote_list,					&ulFileOffset, &ulCharPos, NULL);	}	/* Remove redundant spaces */	while (tNextFree != 0 && szText[tNextFree - 1] == ' ') {		szText[tNextFree - 1] = '\0';		tNextFree--;	}	if (tNextFree == 0) {		/* No text */		szText = xfree(szText);		return NULL;	}	return szText;} /* end of szFootnoteDecryptor */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -