📄 wordole.c
字号:
/* * wordole.c * Copyright (C) 1998-2004 A.J. van Os; Released under GPL * * Description: * Deal with the OLE internals of a MS Word file */#include <string.h>#include "antiword.h"/* Private type for Property Set Storage entries */typedef struct pps_entry_tag { ULONG ulNext; ULONG ulPrevious; ULONG ulDir; ULONG ulSB; ULONG ulSize; int iLevel; char szName[32]; UCHAR ucType;} pps_entry_type;/* Show that a PPS number or index should not be used */#define PPS_NUMBER_INVALID 0xffffffffUL/* Macro to make sure all such statements will be identical */#define FREE_ALL() \ do {\ vDestroySmallBlockList();\ aulRootList = xfree(aulRootList);\ aulSbdList = xfree(aulSbdList);\ aulBbdList = xfree(aulBbdList);\ aulSBD = xfree(aulSBD);\ aulBBD = xfree(aulBBD);\ } while(0)/* * ulReadLong - read four bytes from the given file and offset */static ULONGulReadLong(FILE *pFile, ULONG ulOffset){ UCHAR aucBytes[4]; fail(pFile == NULL); if (!bReadBytes(aucBytes, 4, ulOffset, pFile)) { werr(1, "Read long 0x%lx not possible", ulOffset); } return ulGetLong(0, aucBytes);} /* end of ulReadLong *//* * vName2String - turn the name into a proper string. */static voidvName2String(char *szName, const UCHAR *aucBytes, size_t tNameSize){ char *pcChar; size_t tIndex; fail(aucBytes == NULL || szName == NULL); if (tNameSize < 2) { szName[0] = '\0'; return; } for (tIndex = 0, pcChar = szName; tIndex < 2 * tNameSize; tIndex += 2, pcChar++) { *pcChar = (char)aucBytes[tIndex]; } szName[tNameSize - 1] = '\0';} /* end of vName2String *//* * tReadBlockIndices - read the Big/Small Block Depot indices * * Returns the number of indices read */static size_ttReadBlockIndices(FILE *pFile, ULONG *aulBlockDepot, size_t tMaxRec, ULONG ulOffset){ size_t tDone; int iIndex; UCHAR aucBytes[BIG_BLOCK_SIZE]; fail(pFile == NULL || aulBlockDepot == NULL); fail(tMaxRec == 0); /* Read a big block with BBD or SBD indices */ if (!bReadBytes(aucBytes, BIG_BLOCK_SIZE, ulOffset, pFile)) { werr(0, "Reading big block from 0x%lx is not possible", ulOffset); return 0; } /* Split the big block into indices, an index is four bytes */ tDone = min(tMaxRec, (size_t)BIG_BLOCK_SIZE / 4); for (iIndex = 0; iIndex < (int)tDone; iIndex++) { aulBlockDepot[iIndex] = ulGetLong(4 * iIndex, aucBytes); NO_DBG_DEC(aulBlockDepot[iIndex]); } return tDone;} /* end of tReadBlockIndices *//* * bGetBBD - get the Big Block Depot indices from the index-blocks */static BOOLbGetBBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen, ULONG *aulBBD, size_t tBBDLen){ ULONG ulBegin; size_t tToGo, tDone; int iIndex; fail(pFile == NULL || aulDepot == NULL || aulBBD == NULL); DBG_MSG("bGetBBD"); tToGo = tBBDLen; for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) { ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE; NO_DBG_HEX(ulBegin); tDone = tReadBlockIndices(pFile, aulBBD, tToGo, ulBegin); fail(tDone > tToGo); if (tDone == 0) { return FALSE; } aulBBD += tDone; tToGo -= tDone; } return tToGo == 0;} /* end of bGetBBD *//* * bGetSBD - get the Small Block Depot indices from the index-blocks */static BOOLbGetSBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen, ULONG *aulSBD, size_t tSBDLen){ ULONG ulBegin; size_t tToGo, tDone; int iIndex; fail(pFile == NULL || aulDepot == NULL || aulSBD == NULL); DBG_MSG("bGetSBD"); tToGo = tSBDLen; for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) { fail(aulDepot[iIndex] >= ULONG_MAX / BIG_BLOCK_SIZE); ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE; NO_DBG_HEX(ulBegin); tDone = tReadBlockIndices(pFile, aulSBD, tToGo, ulBegin); fail(tDone > tToGo); if (tDone == 0) { return FALSE; } aulSBD += tDone; tToGo -= tDone; } return tToGo == 0;} /* end of bGetSBD *//* * vComputePPSlevels - compute the levels of the Property Set Storage entries */static voidvComputePPSlevels(pps_entry_type *atPPSlist, pps_entry_type *pNode, int iLevel, int iRecursionLevel){ fail(atPPSlist == NULL || pNode == NULL); fail(iLevel < 0 || iRecursionLevel < 0); if (iRecursionLevel > 25) { /* This removes the possibility of an infinite recursion */ DBG_DEC(iRecursionLevel); return; } if (pNode->iLevel <= iLevel) { /* Avoid entering a loop */ DBG_DEC(iLevel); DBG_DEC(pNode->iLevel); return; } pNode->iLevel = iLevel; if (pNode->ulDir != PPS_NUMBER_INVALID) { vComputePPSlevels(atPPSlist, &atPPSlist[pNode->ulDir], iLevel + 1, iRecursionLevel + 1); } if (pNode->ulNext != PPS_NUMBER_INVALID) { vComputePPSlevels(atPPSlist, &atPPSlist[pNode->ulNext], iLevel, iRecursionLevel + 1); } if (pNode->ulPrevious != PPS_NUMBER_INVALID) { vComputePPSlevels(atPPSlist, &atPPSlist[pNode->ulPrevious], iLevel, iRecursionLevel + 1); }} /* end of vComputePPSlevels *//* * bGetPPS - search the Property Set Storage for three sets * * Return TRUE if the WordDocument PPS is found */static BOOLbGetPPS(FILE *pFile, const ULONG *aulRootList, size_t tRootListLen, pps_info_type *pPPS){ pps_entry_type *atPPSlist; ULONG ulBegin, ulOffset, ulTmp; size_t tNbrOfPPS, tNameSize; int iIndex, iStartBlock, iRootIndex; BOOL bWord, bExcel; UCHAR aucBytes[PROPERTY_SET_STORAGE_SIZE]; fail(pFile == NULL || aulRootList == NULL || pPPS == NULL); DBG_MSG("bGetPPS"); NO_DBG_DEC(tRootListLen); bWord = FALSE; bExcel = FALSE; (void)memset(pPPS, 0, sizeof(*pPPS)); /* Read and store all the Property Set Storage entries */ tNbrOfPPS = tRootListLen * BIG_BLOCK_SIZE / PROPERTY_SET_STORAGE_SIZE; atPPSlist = xcalloc(tNbrOfPPS, sizeof(pps_entry_type)); iRootIndex = 0; for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) { ulTmp = (ULONG)iIndex * PROPERTY_SET_STORAGE_SIZE; iStartBlock = (int)(ulTmp / BIG_BLOCK_SIZE); ulOffset = ulTmp % BIG_BLOCK_SIZE; ulBegin = (aulRootList[iStartBlock] + 1) * BIG_BLOCK_SIZE + ulOffset; NO_DBG_HEX(ulBegin); if (!bReadBytes(aucBytes, PROPERTY_SET_STORAGE_SIZE, ulBegin, pFile)) { werr(0, "Reading PPS %d is not possible", iIndex); atPPSlist = xfree(atPPSlist); return FALSE; } tNameSize = (size_t)usGetWord(0x40, aucBytes); tNameSize = (tNameSize + 1) / 2; vName2String(atPPSlist[iIndex].szName, aucBytes, tNameSize); atPPSlist[iIndex].ucType = ucGetByte(0x42, aucBytes); if (atPPSlist[iIndex].ucType == 5) { iRootIndex = iIndex; } atPPSlist[iIndex].ulPrevious = ulGetLong(0x44, aucBytes); atPPSlist[iIndex].ulNext = ulGetLong(0x48, aucBytes); atPPSlist[iIndex].ulDir = ulGetLong(0x4c, aucBytes); atPPSlist[iIndex].ulSB = ulGetLong(0x74, aucBytes); atPPSlist[iIndex].ulSize = ulGetLong(0x78, aucBytes); atPPSlist[iIndex].iLevel = INT_MAX; if ((atPPSlist[iIndex].ulPrevious >= (ULONG)tNbrOfPPS && atPPSlist[iIndex].ulPrevious != PPS_NUMBER_INVALID) || (atPPSlist[iIndex].ulNext >= (ULONG)tNbrOfPPS && atPPSlist[iIndex].ulNext != PPS_NUMBER_INVALID) || (atPPSlist[iIndex].ulDir >= (ULONG)tNbrOfPPS && atPPSlist[iIndex].ulDir != PPS_NUMBER_INVALID)) { DBG_DEC(iIndex); DBG_DEC(atPPSlist[iIndex].ulPrevious); DBG_DEC(atPPSlist[iIndex].ulNext); DBG_DEC(atPPSlist[iIndex].ulDir); DBG_DEC(tNbrOfPPS); werr(0, "The Property Set Storage is damaged"); atPPSlist = xfree(atPPSlist); return FALSE; } }#if 0 /* defined(DEBUG) */ DBG_MSG("Before"); for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) { DBG_MSG(atPPSlist[iIndex].szName); DBG_HEX(atPPSlist[iIndex].ulDir); DBG_HEX(atPPSlist[iIndex].ulPrevious); DBG_HEX(atPPSlist[iIndex].ulNext); DBG_DEC(atPPSlist[iIndex].ulSB); DBG_HEX(atPPSlist[iIndex].ulSize); DBG_DEC(atPPSlist[iIndex].iLevel); }#endif /* DEBUG */ /* Add level information to each entry */ vComputePPSlevels(atPPSlist, &atPPSlist[iRootIndex], 0, 0); /* Check the entries on level 1 for the required information */ NO_DBG_MSG("After"); for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {#if 0 /* defined(DEBUG) */ DBG_MSG(atPPSlist[iIndex].szName); DBG_HEX(atPPSlist[iIndex].ulDir); DBG_HEX(atPPSlist[iIndex].ulPrevious); DBG_HEX(atPPSlist[iIndex].ulNext); DBG_DEC(atPPSlist[iIndex].ulSB); DBG_HEX(atPPSlist[iIndex].ulSize); DBG_DEC(atPPSlist[iIndex].iLevel);#endif /* DEBUG */ if (atPPSlist[iIndex].iLevel != 1 || atPPSlist[iIndex].ucType != 2 || atPPSlist[iIndex].szName[0] == '\0' || atPPSlist[iIndex].ulSize == 0) { /* This entry can be ignored */ continue; } if (pPPS->tWordDocument.ulSize == 0 && STREQ(atPPSlist[iIndex].szName, "WordDocument")) { pPPS->tWordDocument.ulSB = atPPSlist[iIndex].ulSB; pPPS->tWordDocument.ulSize = atPPSlist[iIndex].ulSize; bWord = TRUE; } else if (pPPS->tData.ulSize == 0 && STREQ(atPPSlist[iIndex].szName, "Data")) { pPPS->tData.ulSB = atPPSlist[iIndex].ulSB; pPPS->tData.ulSize = atPPSlist[iIndex].ulSize; } else if (pPPS->t0Table.ulSize == 0 && STREQ(atPPSlist[iIndex].szName, "0Table")) { pPPS->t0Table.ulSB = atPPSlist[iIndex].ulSB; pPPS->t0Table.ulSize = atPPSlist[iIndex].ulSize; } else if (pPPS->t1Table.ulSize == 0 && STREQ(atPPSlist[iIndex].szName, "1Table")) { pPPS->t1Table.ulSB = atPPSlist[iIndex].ulSB; pPPS->t1Table.ulSize = atPPSlist[iIndex].ulSize; } else if (pPPS->tSummaryInfo.ulSize == 0 && STREQ(atPPSlist[iIndex].szName, "\005SummaryInformation")) { pPPS->tSummaryInfo.ulSB = atPPSlist[iIndex].ulSB; pPPS->tSummaryInfo.ulSize = atPPSlist[iIndex].ulSize; } else if (pPPS->tDocSummaryInfo.ulSize == 0 && STREQ(atPPSlist[iIndex].szName, "\005DocumentSummaryInformation")) { pPPS->tDocSummaryInfo.ulSB = atPPSlist[iIndex].ulSB; pPPS->tDocSummaryInfo.ulSize = atPPSlist[iIndex].ulSize; } else if (STREQ(atPPSlist[iIndex].szName, "Book") || STREQ(atPPSlist[iIndex].szName, "Workbook")) { bExcel = TRUE; } } /* Free the space for the Property Set Storage entries */ atPPSlist = xfree(atPPSlist); /* Draw your conclusions */ if (bWord) { return TRUE; } if (bExcel) { werr(0, "Sorry, but this is an Excel spreadsheet"); } else { werr(0, "This OLE file does not contain a Word document"); } return FALSE;} /* end of bGetPPS *//* * vGetBbdList - make a list of the places to find big blocks */static voidvGetBbdList(FILE *pFile, int iNbr, ULONG *aulBbdList, ULONG ulOffset){ int iIndex; fail(pFile == NULL); fail(iNbr > 127); fail(aulBbdList == NULL); NO_DBG_DEC(iNbr); for (iIndex = 0; iIndex < iNbr; iIndex++) { aulBbdList[iIndex] = ulReadLong(pFile, ulOffset + 4 * (ULONG)iIndex); NO_DBG_DEC(iIndex); NO_DBG_HEX(aulBbdList[iIndex]); }} /* end of vGetBbdList *//* * bGetDocumentText - make a list of the text blocks of a Word document * * Return TRUE when succesful, otherwise FALSE */static BOOLbGetDocumentText(FILE *pFile, const pps_info_type *pPPS, const ULONG *aulBBD, size_t tBBDLen, const ULONG *aulSBD, size_t tSBDLen,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -