📄 word2text.c
字号:
/* * word2text.c * Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL * * Description: * MS Word to "text" functions */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#if defined(__riscos)#include "DeskLib:Hourglass.h"#include "drawfile.h"#endif /* __riscos */#include "antiword.h"#define INITIAL_SIZE 40#define EXTENTION_SIZE 20/* Macros to make sure all such statements will be identical */#define OUTPUT_LINE() \ do {\ vAlign2Window(pDiag, pAnchor, lWidthMax, ucAlignment);\ TRACE_MSG("after vAlign2Window");\ pAnchor = pStartNewOutput(pAnchor, NULL);\ pOutput = pAnchor;\ } while(0)#define RESET_LINE() \ do {\ pAnchor = pStartNewOutput(pAnchor, NULL);\ pOutput = pAnchor;\ } while(0)#if defined(__riscos)/* Length of the document in characters */static ULONG ulDocumentLength;/* Number of characters processed so far */static ULONG ulCharCounter;static int iCurrPct, iPrevPct;#endif /* __riscos *//* The document is in the format belonging to this version of Word */static int iWordVersion = -1;/* Special treatment for files from Word 4/5/6 on an Apple Macintosh */static BOOL bOldMacFile = FALSE;/* Section Information */static const section_block_type *pSection = NULL;static const section_block_type *pSectionNext = NULL;/* All the (command line) options */static options_type tOptions;/* Needed for reading a complete table row */static const row_block_type *pRowInfo = NULL;static BOOL bStartRow = FALSE;static BOOL bEndRowNorm = FALSE;static BOOL bEndRowFast = FALSE;static BOOL bIsTableRow = FALSE;/* Index of the next style and font information */static USHORT usIstdNext = ISTD_NORMAL;/* Needed for finding the start of a style */static const style_block_type *pStyleInfo = NULL;static style_block_type tStyleNext;static BOOL bStartStyle = FALSE;static BOOL bStartStyleNext = FALSE;/* Needed for finding the start of a font */static const font_block_type *pFontInfo = NULL;static font_block_type tFontNext;static BOOL bStartFont = FALSE;static BOOL bStartFontNext = FALSE;/* Needed for finding an image */static ULONG ulFileOffsetImage = FC_INVALID;/* * vUpdateCounters - Update the counters for the hourglass */static voidvUpdateCounters(void){#if defined(__riscos) ulCharCounter++; iCurrPct = (int)((ulCharCounter * 100) / ulDocumentLength); if (iCurrPct != iPrevPct) { Hourglass_Percentage(iCurrPct); iPrevPct = iCurrPct; }#endif /* __riscos */} /* end of vUpdateCounters *//* * bOutputContainsText - see if the output contains more than white space */BOOLbOutputContainsText(const output_type *pAnchor){ const output_type *pCurr; size_t tIndex; fail(pAnchor == NULL); for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) { fail(pCurr->lStringWidth < 0); for (tIndex = 0; tIndex < pCurr->tNextFree; tIndex++) { if (isspace((int)(UCHAR)pCurr->szStorage[tIndex])) { continue; }#if defined(DEBUG) if (pCurr->szStorage[tIndex] == FILLER_CHAR) { continue; }#endif /* DEBUG */ return TRUE; } } return FALSE;} /* end of bOutputContainsText *//* * lTotalStringWidth - compute the total width of the output string */static longlTotalStringWidth(const output_type *pAnchor){ const output_type *pCurr; long lTotal; lTotal = 0; for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) { DBG_DEC_C(pCurr->lStringWidth < 0, pCurr->lStringWidth); fail(pCurr->lStringWidth < 0); lTotal += pCurr->lStringWidth; } return lTotal;} /* end of lTotalStringWidth *//* * vStoreByte - store one byte */static voidvStoreByte(UCHAR ucChar, output_type *pOutput){ fail(pOutput == NULL); if (ucChar == 0) { pOutput->szStorage[pOutput->tNextFree] = '\0'; return; } while (pOutput->tNextFree + 2 > pOutput->tStorageSize) { pOutput->tStorageSize += EXTENTION_SIZE; pOutput->szStorage = xrealloc(pOutput->szStorage, pOutput->tStorageSize); } pOutput->szStorage[pOutput->tNextFree] = (char)ucChar; pOutput->szStorage[pOutput->tNextFree + 1] = '\0'; pOutput->tNextFree++;} /* end of vStoreByte *//* * vStoreChar - store a character as one or more bytes */static voidvStoreChar(ULONG ulChar, BOOL bChangeAllowed, output_type *pOutput){ char szResult[4]; size_t tIndex, tLen; fail(pOutput == NULL); if (tOptions.eEncoding == encoding_utf_8 && bChangeAllowed) { DBG_HEX_C(ulChar > 0xffff, ulChar); fail(ulChar > 0xffff); tLen = tUcs2Utf8(ulChar, szResult, sizeof(szResult)); for (tIndex = 0; tIndex < tLen; tIndex++) { vStoreByte((UCHAR)szResult[tIndex], pOutput); } } else { DBG_HEX_C(ulChar > 0xff, ulChar); fail(ulChar > 0xff); vStoreByte((UCHAR)ulChar, pOutput); tLen = 1; } pOutput->lStringWidth += lComputeStringWidth( pOutput->szStorage + pOutput->tNextFree - tLen, tLen, pOutput->tFontRef, pOutput->usFontSize);} /* end of vStoreChar *//* * vStoreCharacter - store one character */static voidvStoreCharacter(ULONG ulChar, output_type *pOutput){ vStoreChar(ulChar, TRUE, pOutput);} /* end of vStoreCharacter *//* * vStoreString - store a string */static voidvStoreString(const char *szString, size_t tStringLength, output_type *pOutput){ size_t tIndex; fail(szString == NULL || pOutput == NULL); for (tIndex = 0; tIndex < tStringLength; tIndex++) { vStoreCharacter((ULONG)(UCHAR)szString[tIndex], pOutput); }} /* end of vStoreString *//* * vStoreNumberAsDecimal - store a number as a decimal number */static voidvStoreNumberAsDecimal(UINT uiNumber, output_type *pOutput){ size_t tLen; char szString[3 * sizeof(UINT) + 1]; fail(uiNumber == 0); fail(pOutput == NULL); tLen = (size_t)sprintf(szString, "%u", uiNumber); vStoreString(szString, tLen, pOutput);} /* end of vStoreNumberAsDecimal *//* * vStoreNumberAsRoman - store a number as a roman numerical */static voidvStoreNumberAsRoman(UINT uiNumber, output_type *pOutput){ size_t tLen; char szString[15]; fail(uiNumber == 0); fail(pOutput == NULL); tLen = tNumber2Roman(uiNumber, FALSE, szString); vStoreString(szString, tLen, pOutput);} /* end of vStoreNumberAsRoman *//* * vStoreStyle - store a style */static voidvStoreStyle(diagram_type *pDiag, output_type *pOutput, const style_block_type *pStyle){ size_t tLen; char szString[120]; fail(pDiag == NULL); fail(pOutput == NULL); fail(pStyle == NULL); if (tOptions.eConversionType == conversion_xml) { vSetHeaders(pDiag, pStyle->usIstd); } else { tLen = tStyle2Window(szString, sizeof(szString), pStyle, pSection); vStoreString(szString, tLen, pOutput); }} /* end of vStoreStyle *//* * vPutIndentation - output the specified amount of indentation */static voidvPutIndentation(diagram_type *pDiag, output_type *pOutput, BOOL bNoMarks, BOOL bFirstLine, UINT uiListNumber, UCHAR ucNFC, const char *szListChar, long lLeftIndentation, long lLeftIndentation1){ long lWidth; size_t tIndex, tNextFree; char szLine[30]; fail(pDiag == NULL); fail(pOutput == NULL); fail(szListChar == NULL); fail(lLeftIndentation < 0); if (tOptions.eConversionType == conversion_xml) { /* XML does its own indentation at rendering time */ return; } if (bNoMarks) { if (bFirstLine) { lLeftIndentation += lLeftIndentation1; } if (lLeftIndentation < 0) { lLeftIndentation = 0; } vSetLeftIndentation(pDiag, lLeftIndentation); return; } if (lLeftIndentation <= 0) { DBG_HEX_C(ucNFC != 0x00, ucNFC); vSetLeftIndentation(pDiag, 0); return; }#if defined(DEBUG) if (tOptions.eEncoding == encoding_utf_8) { fail(strlen(szListChar) > 3); } else { DBG_HEX_C(iscntrl((int)szListChar[0]), szListChar[0]); fail(iscntrl((int)szListChar[0])); fail(szListChar[1] != '\0'); }#endif /* DEBUG */ switch (ucNFC) { case LIST_ARABIC_NUM: case LIST_NUMBER_TXT: tNextFree = (size_t)sprintf(szLine, "%u", uiListNumber); break; case LIST_UPPER_ROMAN: case LIST_LOWER_ROMAN: tNextFree = tNumber2Roman(uiListNumber, ucNFC == LIST_UPPER_ROMAN, szLine); break; case LIST_UPPER_ALPHA: case LIST_LOWER_ALPHA: tNextFree = tNumber2Alpha(uiListNumber, ucNFC == LIST_UPPER_ALPHA, szLine); break; case LIST_ORDINAL_NUM: case LIST_ORDINAL_TXT: if (uiListNumber % 10 == 1 && uiListNumber != 11) { tNextFree = (size_t)sprintf(szLine, "%ust", uiListNumber); } else if (uiListNumber % 10 == 2 && uiListNumber != 12) { tNextFree = (size_t)sprintf(szLine, "%und", uiListNumber); } else if (uiListNumber % 10 == 3 && uiListNumber != 13) { tNextFree = (size_t)sprintf(szLine, "%urd", uiListNumber); } else { tNextFree = (size_t)sprintf(szLine, "%uth", uiListNumber); } break; case LIST_OUTLINE_NUM: tNextFree = (size_t)sprintf(szLine, "%02u", uiListNumber); break; case LIST_SPECIAL: case LIST_SPECIAL2: case LIST_BULLETS: tNextFree = 0; break; default: DBG_HEX(ucNFC); DBG_FIXME(); tNextFree = (size_t)sprintf(szLine, "%u", uiListNumber); break; } tNextFree += (size_t)sprintf(szLine + tNextFree, "%.3s", szListChar); szLine[tNextFree++] = ' '; szLine[tNextFree] = '\0'; lWidth = lComputeStringWidth(szLine, tNextFree, pOutput->tFontRef, pOutput->usFontSize); lLeftIndentation -= lWidth; if (lLeftIndentation < 0) { lLeftIndentation = 0; } vSetLeftIndentation(pDiag, lLeftIndentation); for (tIndex = 0; tIndex < tNextFree; tIndex++) { vStoreChar((ULONG)(UCHAR)szLine[tIndex], FALSE, pOutput); }} /* end of vPutIndentation *//* * vPutSeparatorLine - output a separator line * * A separator line is a horizontal line two inches long. * Two inches equals 144000 millipoints. */static voidvPutSeparatorLine(output_type *pOutput){ long lCharWidth; int iCounter, iChars; char szOne[2]; fail(pOutput == NULL); szOne[0] = OUR_EM_DASH; szOne[1] = '\0'; lCharWidth = lComputeStringWidth(szOne, 1, pOutput->tFontRef, pOutput->usFontSize); NO_DBG_DEC(lCharWidth); iChars = (int)((144000 + lCharWidth / 2) / lCharWidth); NO_DBG_DEC(iChars); for (iCounter = 0; iCounter < iChars; iCounter++) { vStoreCharacter((ULONG)(UCHAR)OUR_EM_DASH, pOutput); }} /* end of vPutSeparatorLine *//* * pStartNextOutput - start the next output record * * returns a pointer to the next record */static output_type *pStartNextOutput(output_type *pCurrent){ output_type *pNew; TRACE_MSG("pStartNextOutput"); if (pCurrent->tNextFree == 0) { /* The current record is empty, re-use */ fail(pCurrent->szStorage[0] != '\0'); fail(pCurrent->lStringWidth != 0); return pCurrent; } /* The current record is in use, make a new one */ pNew = xmalloc(sizeof(*pNew)); pCurrent->pNext = pNew; pNew->tStorageSize = INITIAL_SIZE; pNew->szStorage = xmalloc(pNew->tStorageSize); pNew->szStorage[0] = '\0'; pNew->tNextFree = 0; pNew->lStringWidth = 0; pNew->ucFontColor = FONT_COLOR_DEFAULT; pNew->usFontStyle = FONT_REGULAR; pNew->tFontRef = (drawfile_fontref)0; pNew->usFontSize = DEFAULT_FONT_SIZE; pNew->pPrev = pCurrent; pNew->pNext = NULL; return pNew;} /* end of pStartNextOutput *//* * pStartNewOutput */static output_type *pStartNewOutput(output_type *pAnchor, output_type *pLeftOver){ output_type *pCurr, *pNext; USHORT usFontStyle, usFontSize; drawfile_fontref tFontRef; UCHAR ucFontColor; TRACE_MSG("pStartNewOutput"); ucFontColor = FONT_COLOR_DEFAULT; usFontStyle = FONT_REGULAR; tFontRef = (drawfile_fontref)0; usFontSize = DEFAULT_FONT_SIZE; /* Free the old output space */ pCurr = pAnchor; while (pCurr != NULL) { TRACE_MSG("Free the old output space"); pNext = pCurr->pNext; pCurr->szStorage = xfree(pCurr->szStorage); if (pCurr->pNext == NULL) { ucFontColor = pCurr->ucFontColor; usFontStyle = pCurr->usFontStyle; tFontRef = pCurr->tFontRef; usFontSize = pCurr->usFontSize; } pCurr = xfree(pCurr); pCurr = pNext; } if (pLeftOver == NULL) { /* Create new output space */ TRACE_MSG("Create new output space"); pLeftOver = xmalloc(sizeof(*pLeftOver)); pLeftOver->tStorageSize = INITIAL_SIZE; NO_DBG_DEC(pLeftOver->tStorageSize); TRACE_MSG("before 2nd xmalloc"); pLeftOver->szStorage = xmalloc(pLeftOver->tStorageSize); TRACE_MSG("after 2nd xmalloc"); pLeftOver->szStorage[0] = '\0'; pLeftOver->tNextFree = 0; pLeftOver->lStringWidth = 0; pLeftOver->ucFontColor = ucFontColor; pLeftOver->usFontStyle = usFontStyle; pLeftOver->tFontRef = tFontRef; pLeftOver->usFontSize = usFontSize; pLeftOver->pPrev = NULL; pLeftOver->pNext = NULL; } fail(!bCheckDoubleLinkedList(pLeftOver)); return pLeftOver;} /* end of pStartNewOutput *//* * ulGetChar - get the next character from the specified list * * returns the next character of EOF */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -