📄 word2text.c
字号:
static ULONGulGetChar(FILE *pFile, list_id_enum eListID){ const font_block_type *pCurr; ULONG ulChar, ulFileOffset, ulCharPos; row_info_enum eRowInfo; USHORT usChar, usPropMod; BOOL bSkip; fail(pFile == NULL); pCurr = pFontInfo; bSkip = FALSE; for (;;) { usChar = usNextChar(pFile, eListID, &ulFileOffset, &ulCharPos, &usPropMod); if (usChar == (USHORT)EOF) { return (ULONG)EOF; } vUpdateCounters(); eRowInfo = ePropMod2RowInfo(usPropMod, iWordVersion); if (!bStartRow) {#if 0 bStartRow = eRowInfo == found_a_cell || (pRowInfo != NULL && ulFileOffset == pRowInfo->ulFileOffsetStart && eRowInfo != found_not_a_cell);#else bStartRow = pRowInfo != NULL && ulFileOffset == pRowInfo->ulFileOffsetStart;#endif NO_DBG_HEX_C(bStartRow, pRowInfo->ulFileOffsetStart); } if (!bEndRowNorm) {#if 0 bEndRow = eRowInfo == found_end_of_row || (pRowInfo != NULL && ulFileOffset == pRowInfo->ulFileOffsetEnd && eRowInfo != found_not_end_of_row);#else bEndRowNorm = pRowInfo != NULL && ulFileOffset == pRowInfo->ulFileOffsetEnd;#endif NO_DBG_HEX_C(bEndRowNorm, pRowInfo->ulFileOffsetEnd); } if (!bEndRowFast) { bEndRowFast = eRowInfo == found_end_of_row; NO_DBG_HEX_C(bEndRowFast, pRowInfo->ulFileOffsetEnd); } if (!bStartStyle) { bStartStyle = pStyleInfo != NULL && ulFileOffset == pStyleInfo->ulFileOffset; NO_DBG_HEX_C(bStartStyle, ulFileOffset); } if (pCurr != NULL && ulFileOffset == pCurr->ulFileOffset) { bStartFont = TRUE; NO_DBG_HEX(ulFileOffset); pFontInfo = pCurr; pCurr = pGetNextFontInfoListItem(pCurr); } /* Skip embedded characters */ if (usChar == START_EMBEDDED) { bSkip = TRUE; continue; } if (usChar == END_IGNORE || usChar == END_EMBEDDED) { bSkip = FALSE; continue; } if (bSkip) { continue; } ulChar = ulTranslateCharacters(usChar, ulFileOffset, iWordVersion, tOptions.eConversionType, tOptions.eEncoding, bOldMacFile); if (ulChar == IGNORE_CHARACTER) { continue; } if (ulChar == PICTURE) { ulFileOffsetImage = ulGetPictInfoListItem(ulFileOffset); } else { ulFileOffsetImage = FC_INVALID; } if (ulChar == PAR_END) { /* End of paragraph seen, prepare for the next */ vFillStyleFromStylesheet(usIstdNext, &tStyleNext); vCorrectStyleValues(&tStyleNext); bStartStyleNext = TRUE; vFillFontFromStylesheet(usIstdNext, &tFontNext); vCorrectFontValues(&tFontNext); bStartFontNext = TRUE; } if (ulChar == PAGE_BREAK) { /* Might be the start of a new section */ pSectionNext = pGetSectionInfo(pSection, ulCharPos); } return ulChar; }} /* end of ulGetChar *//* * lGetWidthMax - get the maximum line width from the paragraph break value * * Returns the maximum line width in millipoints */static longlGetWidthMax(int iParagraphBreak){ fail(iParagraphBreak < 0); if (iParagraphBreak == 0) { return LONG_MAX; } if (iParagraphBreak < MIN_SCREEN_WIDTH) { return lChar2MilliPoints(MIN_SCREEN_WIDTH); } if (iParagraphBreak > MAX_SCREEN_WIDTH) { return lChar2MilliPoints(MAX_SCREEN_WIDTH); } return lChar2MilliPoints(iParagraphBreak);} /* end of lGetWidthMax *//* * bWordDecryptor - turn Word to something more useful * * returns TRUE when succesful, otherwise FALSE */BOOLbWordDecryptor(FILE *pFile, long lFilesize, diagram_type *pDiag){ imagedata_type tImage; const style_block_type *pStyleTmp; const font_block_type *pFontTmp; const char *szListChar; output_type *pAnchor, *pOutput, *pLeftOver; ULONG ulChar; long lBeforeIndentation, lAfterIndentation; long lLeftIndentation, lLeftIndentation1, lRightIndentation; long lWidthCurr, lWidthMax, lDefaultTabWidth, lHalfSpaceWidth, lTmp; list_id_enum eListID; image_info_enum eRes; UINT uiFootnoteNumber, uiEndnoteNumber, uiTmp; int iListSeqNumber; BOOL bWasTableRow, bTableFontClosed, bWasEndOfParagraph; BOOL bInList, bWasInList, bNoMarks, bFirstLine; BOOL bAllCapitals, bHiddenText, bMarkDelText, bSuccess; USHORT usListNumber; USHORT usFontStyle, usFontStyleMinimal, usFontSize, usTmp; UCHAR ucFontNumber, ucFontColor; UCHAR ucNFC, ucAlignment; fail(pFile == NULL || lFilesize <= 0 || pDiag == NULL); TRACE_MSG("bWordDecryptor"); iWordVersion = iInitDocument(pFile, lFilesize); if (iWordVersion < 0) { DBG_DEC(iWordVersion); return FALSE; } vGetOptions(&tOptions); bOldMacFile = bIsOldMacFile(); vPrepareHdrFtrText(pFile); vPrepareFootnoteText(pFile); vPrologue2(pDiag, iWordVersion); /* Initialisation */#if defined(__riscos) ulCharCounter = 0; iCurrPct = 0; iPrevPct = -1; ulDocumentLength = ulGetDocumentLength();#endif /* __riscos */ pSection = pGetSectionInfo(NULL, 0); pSectionNext = pSection; lDefaultTabWidth = lGetDefaultTabWidth(); DBG_DEC_C(lDefaultTabWidth != 36000, lDefaultTabWidth); pRowInfo = pGetNextRowInfoListItem(); DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetStart); DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetEnd); DBG_MSG_C(pRowInfo == NULL, "No rows at all"); bStartRow = FALSE; bEndRowNorm = FALSE; bEndRowFast = FALSE; bIsTableRow = FALSE; bWasTableRow = FALSE; vResetStyles(); pStyleInfo = pGetNextTextStyle(NULL); bStartStyle = FALSE; bInList = FALSE; bWasInList = FALSE; iListSeqNumber = 0; usIstdNext = ISTD_NORMAL; pAnchor = NULL; pFontInfo = pGetNextFontInfoListItem(NULL); DBG_HEX_C(pFontInfo != NULL, pFontInfo->ulFileOffset); DBG_MSG_C(pFontInfo == NULL, "No fonts at all"); bStartFont = FALSE; ucFontNumber = 0; usFontStyleMinimal = FONT_REGULAR; usFontStyle = FONT_REGULAR; usFontSize = DEFAULT_FONT_SIZE; ucFontColor = FONT_COLOR_DEFAULT; pAnchor = pStartNewOutput(pAnchor, NULL); pOutput = pAnchor; pOutput->ucFontColor = ucFontColor; pOutput->usFontStyle = usFontStyle; pOutput->tFontRef = tOpenFont(ucFontNumber, usFontStyle, usFontSize); pOutput->usFontSize = usFontSize; bTableFontClosed = TRUE; lBeforeIndentation = 0; lAfterIndentation = 0; lLeftIndentation = 0; lLeftIndentation1 = 0; lRightIndentation = 0; bWasEndOfParagraph = TRUE; bNoMarks = TRUE; bFirstLine = TRUE; ucNFC = LIST_BULLETS; if (pStyleInfo != NULL) { szListChar = pStyleInfo->szListChar; pStyleTmp = pStyleInfo; } else { if (tStyleNext.szListChar[0] == '\0') { vGetBulletValue(tOptions.eConversionType, tOptions.eEncoding, tStyleNext.szListChar, 4); } szListChar = tStyleNext.szListChar; pStyleTmp = &tStyleNext; } usListNumber = 0; ucAlignment = ALIGNMENT_LEFT; bAllCapitals = FALSE; bHiddenText = FALSE; bMarkDelText = FALSE; lWidthMax = lGetWidthMax(tOptions.iParagraphBreak); NO_DBG_DEC(lWidthMax); Hourglass_On(); uiFootnoteNumber = 0; uiEndnoteNumber = 0; eListID = text_list; for(;;) { ulChar = ulGetChar(pFile, eListID); if (ulChar == (ULONG)EOF) { if (bOutputContainsText(pAnchor)) { OUTPUT_LINE(); } else { RESET_LINE(); } switch (eListID) { case text_list: if (tOptions.eConversionType != conversion_xml) { eListID = footnote_list; if (uiFootnoteNumber != 0) { vPutSeparatorLine(pAnchor); OUTPUT_LINE(); uiFootnoteNumber = 0; } break; } /* No break or return */ case footnote_list: eListID = endnote_list; if (uiEndnoteNumber != 0) { vPutSeparatorLine(pAnchor); OUTPUT_LINE(); uiEndnoteNumber = 0; } break; case endnote_list: eListID = textbox_list; if (bExistsTextBox()) { vPutSeparatorLine(pAnchor); OUTPUT_LINE(); } break; case textbox_list: eListID = hdrtextbox_list; if (bExistsHdrTextBox()) { vPutSeparatorLine(pAnchor); OUTPUT_LINE(); } break; case hdrtextbox_list: default: eListID = end_of_lists; break; } if (eListID == end_of_lists) { break; } continue; } if (ulChar == UNKNOWN_NOTE_CHAR) { switch (eListID) { case footnote_list: ulChar = FOOTNOTE_CHAR; break; case endnote_list: ulChar = ENDNOTE_CHAR; break; default: break; } } if (bStartRow) { /* Begin of a tablerow found */ if (bOutputContainsText(pAnchor)) { OUTPUT_LINE(); } else { RESET_LINE(); } fail(pAnchor != pOutput); if (bTableFontClosed) { /* Start special table font */ vCloseFont(); /* * Compensate for the fact that Word uses * proportional fonts for its tables and we * only one fixed-width font */ uiTmp = ((UINT)usFontSize * 5 + 3) / 6; if (uiTmp < MIN_TABLEFONT_SIZE) { uiTmp = MIN_TABLEFONT_SIZE; } else if (uiTmp > MAX_TABLEFONT_SIZE) { uiTmp = MAX_TABLEFONT_SIZE; } pOutput->usFontSize = (USHORT)uiTmp; pOutput->tFontRef = tOpenTableFont(pOutput->usFontSize); pOutput->usFontStyle = FONT_REGULAR; pOutput->ucFontColor = FONT_COLOR_BLACK; bTableFontClosed = FALSE; } bIsTableRow = TRUE; bStartRow = FALSE; } if (bWasTableRow && !bIsTableRow && ulChar != PAR_END && ulChar != HARD_RETURN && ulChar != PAGE_BREAK && ulChar != COLUMN_FEED) { /* * The end of a table should be followed by an * empty line, like the end of a paragraph */ OUTPUT_LINE(); vEndOfParagraph(pDiag, pOutput->tFontRef, pOutput->usFontSize, (long)pOutput->usFontSize * 600); } switch (ulChar) { case PAGE_BREAK: case COLUMN_FEED: if (bIsTableRow) { /* Ignore when in a table */ break; } if (bOutputContainsText(pAnchor)) { OUTPUT_LINE(); } else { RESET_LINE(); } if (ulChar == PAGE_BREAK) { vEndOfPage(pDiag, lAfterIndentation, pSection != pSectionNext); } else { vEndOfParagraph(pDiag, pOutput->tFontRef, pOutput->usFontSize, lAfterIndentation); } break; default: break; } if (bStartFont || (bStartFontNext && ulChar != PAR_END)) { /* Begin of a font found */ if (bStartFont) { /* bStartFont takes priority */ fail(pFontInfo == NULL); pFontTmp = pFontInfo; } else { pFontTmp = &tFontNext; } bAllCapitals = bIsCapitals(pFontTmp->usFontStyle); bHiddenText = bIsHidden(pFontTmp->usFontStyle); bMarkDelText = bIsMarkDel(pFontTmp->usFontStyle); usTmp = pFontTmp->usFontStyle & (FONT_BOLD|FONT_ITALIC|FONT_UNDERLINE| FONT_STRIKE|FONT_MARKDEL| FONT_SUPERSCRIPT|FONT_SUBSCRIPT); if (!bIsTableRow && (usFontSize != pFontTmp->usFontSize || ucFontNumber != pFontTmp->ucFontNumber || usFontStyleMinimal != usTmp || ucFontColor != pFontTmp->ucFontColor)) { pOutput = pStartNextOutput(pOutput); vCloseFont(); pOutput->ucFontColor = pFontTmp->ucFontColor; pOutput->usFontStyle = pFontTmp->usFontStyle; pOutput->usFontSize = pFontTmp->usFontSize; pOutput->tFontRef = tOpenFont( pFontTmp->ucFontNumber, pFontTmp->usFontStyle, pFontTmp->usFontSize); fail(!bCheckDoubleLinkedList(pAnchor)); } ucFontNumber = pFontTmp->ucFontNumber; usFontSize = pFontTmp->usFontSize; ucFontColor = pFontTmp->ucFontColor; usFontStyle = pFontTmp->usFontStyle; usFontStyleMinimal = usTmp; if (bStartFont) { /* Get the next font info */ pFontInfo = pGetNextFontInfoListItem(pFontInfo); NO_DBG_HEX_C(pFontInfo != NULL, pFontInfo->ulFileOffset); DBG_MSG_C(pFontInfo == NULL, "No more fonts"); } bStartFont = FALSE; bStartFontNext = FALSE; } if (bStartStyle || (bStartStyleNext && ulChar != PAR_END)) { bFirstLine = TRUE; /* Begin of a style found */ if (bStartStyle) { /* bStartStyle takes priority */ fail(pStyleInfo == NULL); pStyleTmp = pStyleInfo; } else { pStyleTmp = &tStyleNext; } if (!bIsTableRow) { vStoreStyle(pDiag, pOutput, pStyleTmp); } usIstdNext = pStyleTmp->usIstdNext; lBeforeIndentation = lTwips2MilliPoints(pStyleTmp->usBeforeIndent); lAfterIndentation = lTwips2MilliPoints(pStyleTmp->usAfterIndent); lLeftIndentation = lTwips2MilliPoints(pStyleTmp->sLeftIndent); lLeftIndentation1 = lTwips2MilliPoints(pStyleTmp->sLeftIndent1); lRightIndentation = lTwips2MilliPoints(pStyleTmp->sRightIndent); bInList = bStyleImpliesList(pStyleTmp, iWordVersion); bNoMarks = !bInList || pStyleTmp->bNumPause; ucNFC = pStyleTmp->ucNFC; szListChar = pStyleTmp->szListChar; ucAlignment = pStyleTmp->ucAlignment; if (bInList && !bWasInList) { /* Start of a list */ iListSeqNumber++; vStartOfList(pDiag, ucNFC, bWasTableRow && !bIsTableRow); } if (!bInList && bWasInList) { /* End of a list */ vEndOfList(pDiag); } bWasInList = bInList; if (bStartStyle) { pStyleInfo = pGetNextTextStyle(pStyleInfo); NO_DBG_HEX_C(pStyleInfo != NULL, pStyleInfo->ulFileOffset); DBG_MSG_C(pStyleInfo == NULL, "No more styles"); } bStartStyle = FALSE; bStartStyleNext = FALSE; } if (bWasEndOfParagraph) { vStartOfParagraph1(pDiag, lBeforeIndentation); } if (!bIsTableRow && lTotalStringWidth(pAnchor) == 0) { if (!bNoMarks) { usListNumber = usGetListValue(iListSeqNumber,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -