⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 findtext.c

📁 这是一个同样来自贝尔实验室的和UNIX有着渊源的操作系统, 其简洁的设计和实现易于我们学习和理解
💻 C
字号:
/* * findtext.c * Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL * * Description: * Find the blocks that contain the text of MS Word files */#include <stdio.h>#include <stdlib.h>#include "antiword.h"/* * bAddTextBlocks - Add the blocks to the text block list * * Returns TRUE when successful, FALSE if not */BOOLbAddTextBlocks(ULONG ulCharPosFirst, ULONG ulTotalLength,	BOOL bUsesUnicode, USHORT usPropMod,	ULONG ulStartBlock, const ULONG *aulBBD, size_t tBBDLen){	text_block_type	tTextBlock;	ULONG	ulCharPos, ulOffset, ulIndex;	long	lToGo;	fail(ulTotalLength > (ULONG)LONG_MAX / 2);	fail(ulStartBlock > MAX_BLOCKNUMBER && ulStartBlock != END_OF_CHAIN);	fail(aulBBD == NULL);	NO_DBG_HEX(ulCharPosFirst);	NO_DBG_DEC(ulTotalLength);	if (bUsesUnicode) {		/* One character equals two bytes */		NO_DBG_MSG("Uses Unicode");		lToGo = (long)ulTotalLength * 2;	} else {		/* One character equals one byte */		NO_DBG_MSG("Uses ASCII");		lToGo = (long)ulTotalLength;	}	ulCharPos = ulCharPosFirst;	ulOffset = ulCharPosFirst;	for (ulIndex = ulStartBlock;	     ulIndex != END_OF_CHAIN && lToGo > 0;	     ulIndex = aulBBD[ulIndex]) {		if (ulIndex >= (ULONG)tBBDLen) {			DBG_DEC(ulIndex);			DBG_DEC(tBBDLen);			werr(1, "The Big Block Depot is damaged");		}		if (ulOffset >= BIG_BLOCK_SIZE) {			ulOffset -= BIG_BLOCK_SIZE;			continue;		}		tTextBlock.ulFileOffset =			(ulIndex + 1) * BIG_BLOCK_SIZE + ulOffset;		tTextBlock.ulCharPos = ulCharPos;		tTextBlock.ulLength = min(BIG_BLOCK_SIZE - ulOffset,						(ULONG)lToGo);		tTextBlock.bUsesUnicode = bUsesUnicode;		tTextBlock.usPropMod = usPropMod;		ulOffset = 0;		if (!bAdd2TextBlockList(&tTextBlock)) {			DBG_HEX(tTextBlock.ulFileOffset);			DBG_HEX(tTextBlock.ulCharPos);			DBG_DEC(tTextBlock.ulLength);			DBG_DEC(tTextBlock.bUsesUnicode);			DBG_DEC(tTextBlock.usPropMod);			return FALSE;		}		ulCharPos += tTextBlock.ulLength;		lToGo -= (long)tTextBlock.ulLength;	}	DBG_DEC_C(lToGo != 0, lToGo);	return lToGo == 0;} /* end of bAddTextBlocks *//* * bGet6DocumentText - make a list of the text blocks of Word 6/7 files * * Code for "fast saved" files. * * Returns TRUE when successful, FALSE if not */BOOLbGet6DocumentText(FILE *pFile, BOOL bUsesUnicode, ULONG ulStartBlock,	const ULONG *aulBBD, size_t tBBDLen, const UCHAR *aucHeader){	UCHAR	*aucBuffer;	ULONG	ulBeginTextInfo, ulTextOffset, ulTotLength;	size_t	tTextInfoLen;	int	iIndex, iType, iOff, iLen, iPieces;	USHORT	usPropMod;	DBG_MSG("bGet6DocumentText");	fail(pFile == NULL);	fail(aulBBD == NULL);	fail(aucHeader == NULL);	ulBeginTextInfo = ulGetLong(0x160, aucHeader);	/* fcClx */	DBG_HEX(ulBeginTextInfo);	tTextInfoLen = (size_t)ulGetLong(0x164, aucHeader);	/* lcbClx */	DBG_DEC(tTextInfoLen);	aucBuffer = xmalloc(tTextInfoLen);	if (!bReadBuffer(pFile, ulStartBlock,			aulBBD, tBBDLen, BIG_BLOCK_SIZE,			aucBuffer, ulBeginTextInfo, tTextInfoLen)) {		aucBuffer = xfree(aucBuffer);		return FALSE;	}	NO_DBG_PRINT_BLOCK(aucBuffer, tTextInfoLen);	iOff = 0;	while ((size_t)iOff < tTextInfoLen) {		iType = (int)ucGetByte(iOff, aucBuffer);		iOff++;		if (iType == 0) {			DBG_FIXME();			iOff++;			continue;		}		if (iType == 1) {			iLen = (int)usGetWord(iOff, aucBuffer);			vAdd2PropModList(aucBuffer + iOff);			iOff += iLen + 2;			continue;		}		if (iType != 2) {			werr(0, "Unknown type of 'fastsaved' format");			aucBuffer = xfree(aucBuffer);			return FALSE;		}		/* Type 2 */		iLen = (int)usGetWord(iOff, aucBuffer);		NO_DBG_DEC(iLen);		iOff += 4;		iPieces = (iLen - 4) / 12;		DBG_DEC(iPieces);		for (iIndex = 0; iIndex < iPieces; iIndex++) {			ulTextOffset = ulGetLong(				iOff + (iPieces + 1) * 4 + iIndex * 8 + 2,				aucBuffer);			usPropMod = usGetWord(				iOff + (iPieces + 1) * 4 + iIndex * 8 + 6,				aucBuffer);			ulTotLength = ulGetLong(iOff + (iIndex + 1) * 4,						aucBuffer) -					ulGetLong(iOff + iIndex * 4,						aucBuffer);			NO_DBG_HEX_C(usPropMod != 0, usPropMod);			if (!bAddTextBlocks(ulTextOffset, ulTotLength,					bUsesUnicode, usPropMod,					ulStartBlock,					aulBBD, tBBDLen)) {				aucBuffer = xfree(aucBuffer);				return FALSE;			}		}		break;	}	aucBuffer = xfree(aucBuffer);	return TRUE;} /* end of bGet6DocumentText *//* * bGet8DocumentText - make a list of the text blocks of Word 8/97 files * * Returns TRUE when successful, FALSE if not */BOOLbGet8DocumentText(FILE *pFile, const pps_info_type *pPPS,	const ULONG *aulBBD, size_t tBBDLen,	const ULONG *aulSBD, size_t tSBDLen,	const UCHAR *aucHeader){	const ULONG	*aulBlockDepot;	UCHAR	*aucBuffer;	ULONG	ulTextOffset, ulBeginTextInfo;	ULONG	ulTotLength, ulLen;	long	lIndex, lPieces, lOff;	size_t	tTextInfoLen, tBlockDepotLen, tBlockSize;	int	iType, iLen;	BOOL	bUsesUnicode;	USHORT	usPropMod;	DBG_MSG("bGet8DocumentText");	fail(pFile == NULL || pPPS == NULL);	fail(aulBBD == NULL || aulSBD == NULL);	fail(aucHeader == NULL);  	ulBeginTextInfo = ulGetLong(0x1a2, aucHeader);	/* fcClx */	DBG_HEX(ulBeginTextInfo);	tTextInfoLen = (size_t)ulGetLong(0x1a6, aucHeader);	/* lcbClx */	DBG_DEC(tTextInfoLen);	DBG_DEC(pPPS->tTable.ulSB);	DBG_HEX(pPPS->tTable.ulSize);	if (pPPS->tTable.ulSize == 0) {		return FALSE;	}	if (pPPS->tTable.ulSize < MIN_SIZE_FOR_BBD_USE) {	  	/* Use the Small Block Depot */		aulBlockDepot = aulSBD;		tBlockDepotLen = tSBDLen;		tBlockSize = SMALL_BLOCK_SIZE;	} else {	  	/* Use the Big Block Depot */		aulBlockDepot = aulBBD;		tBlockDepotLen = tBBDLen;		tBlockSize = BIG_BLOCK_SIZE;	}	aucBuffer = xmalloc(tTextInfoLen);	if (!bReadBuffer(pFile, pPPS->tTable.ulSB,			aulBlockDepot, tBlockDepotLen, tBlockSize,			aucBuffer, ulBeginTextInfo, tTextInfoLen)) {		aucBuffer = xfree(aucBuffer);		return FALSE;	}	NO_DBG_PRINT_BLOCK(aucBuffer, tTextInfoLen);	lOff = 0;	while (lOff < (long)tTextInfoLen) {		iType = (int)ucGetByte(lOff, aucBuffer);		lOff++;		if (iType == 0) {			DBG_FIXME();			lOff++;			continue;		}		if (iType == 1) {			iLen = (int)usGetWord(lOff, aucBuffer);			vAdd2PropModList(aucBuffer + lOff);			lOff += (long)iLen + 2;			continue;		}		if (iType != 2) {			werr(0, "Unknown type of 'fastsaved' format");			aucBuffer = xfree(aucBuffer);			return FALSE;		}		/* Type 2 */		ulLen = ulGetLong(lOff, aucBuffer);		if (ulLen < 4) {			DBG_DEC(ulLen);			return FALSE;		}		lOff += 4;		lPieces = (long)((ulLen - 4) / 12);		DBG_DEC(lPieces);		for (lIndex = 0; lIndex < lPieces; lIndex++) {			ulTextOffset = ulGetLong(				lOff + (lPieces + 1) * 4 + lIndex * 8 + 2,				aucBuffer);			usPropMod = usGetWord(				lOff + (lPieces + 1) * 4 + lIndex * 8 + 6,				aucBuffer);			ulTotLength = ulGetLong(lOff + (lIndex + 1) * 4,						aucBuffer) -					ulGetLong(lOff + lIndex * 4,						aucBuffer);			if ((ulTextOffset & BIT(30)) == 0) {				bUsesUnicode = TRUE;			} else {				bUsesUnicode = FALSE;				ulTextOffset &= ~BIT(30);				ulTextOffset /= 2;			}			NO_DBG_HEX_C(usPropMod != 0, usPropMod);			if (!bAddTextBlocks(ulTextOffset, ulTotLength,					bUsesUnicode, usPropMod,					pPPS->tWordDocument.ulSB,					aulBBD, tBBDLen)) {				aucBuffer = xfree(aucBuffer);				return FALSE;			}		}		break;	}	aucBuffer = xfree(aucBuffer);	return TRUE;} /* end of bGet8DocumentText */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -