📄 word2text.c
字号:
iWordVersion, pStyleTmp); } if (bInList && bFirstLine) { vStartOfListItem(pDiag, bNoMarks); } vPutIndentation(pDiag, pAnchor, bNoMarks, bFirstLine, usListNumber, ucNFC, szListChar, lLeftIndentation, lLeftIndentation1); bFirstLine = FALSE; /* One number or mark per paragraph will do */ bNoMarks = TRUE; } if (bWasEndOfParagraph) { vStartOfParagraph2(pDiag); bWasEndOfParagraph = FALSE; } switch (ulChar) { case PICTURE: (void)memset(&tImage, 0, sizeof(tImage)); eRes = eExamineImage(pFile, ulFileOffsetImage, &tImage); switch (eRes) { case image_no_information: bSuccess = FALSE; break; case image_minimal_information: case image_full_information:#if 0 if (bOutputContainsText(pAnchor)) { OUTPUT_LINE(); } else { RESET_LINE(); }#endif bSuccess = bTranslateImage(pDiag, pFile, eRes == image_minimal_information, ulFileOffsetImage, &tImage); break; default: DBG_DEC(eRes); bSuccess = FALSE; break; } if (!bSuccess) { vStoreString("[pic]", 5, pOutput); } break; case FOOTNOTE_CHAR: uiFootnoteNumber++; if (tOptions.eConversionType == conversion_xml) { vStoreCharacter((ULONG)FOOTNOTE_OR_ENDNOTE, pOutput); break; } vStoreCharacter((ULONG)'[', pOutput); vStoreNumberAsDecimal(uiFootnoteNumber, pOutput); vStoreCharacter((ULONG)']', pOutput); break; case ENDNOTE_CHAR: uiEndnoteNumber++; vStoreCharacter((ULONG)'[', pOutput); vStoreNumberAsRoman(uiEndnoteNumber, pOutput); vStoreCharacter((ULONG)']', pOutput); break; case UNKNOWN_NOTE_CHAR: vStoreString("[?]", 3, pOutput); break; case PAR_END: if (bIsTableRow) { vStoreCharacter((ULONG)'\n', pOutput); break; } if (bOutputContainsText(pAnchor)) { OUTPUT_LINE(); } else { vMove2NextLine(pDiag, pOutput->tFontRef, pOutput->usFontSize); RESET_LINE(); } vEndOfParagraph(pDiag, pOutput->tFontRef, pOutput->usFontSize, lAfterIndentation); bWasEndOfParagraph = TRUE; break; case HARD_RETURN: if (bIsTableRow) { vStoreCharacter((ULONG)'\n', pOutput); break; } if (bOutputContainsText(pAnchor)) { OUTPUT_LINE(); } else { vMove2NextLine(pDiag, pOutput->tFontRef, pOutput->usFontSize); RESET_LINE(); } break; case PAGE_BREAK: case COLUMN_FEED: pSection = pSectionNext; break; case TABLE_SEPARATOR: if (bIsTableRow) { vStoreCharacter(ulChar, pOutput); break; } vStoreCharacter((ULONG)' ', pOutput); vStoreCharacter((ULONG)TABLE_SEPARATOR_CHAR, pOutput); break; case TAB: if (bIsTableRow || tOptions.eConversionType == conversion_xml) { vStoreCharacter((ULONG)' ', pOutput); break; } if (tOptions.iParagraphBreak == 0 && (tOptions.eConversionType == conversion_text || tOptions.eConversionType == conversion_fmt_text)) { /* No logical lines, so no tab expansion */ vStoreCharacter(TAB, pOutput); break; } lHalfSpaceWidth = (lComputeSpaceWidth( pOutput->tFontRef, pOutput->usFontSize) + 1) / 2; lTmp = lTotalStringWidth(pAnchor); lTmp += lDrawUnits2MilliPoints(pDiag->lXleft); lTmp /= lDefaultTabWidth; do { vStoreCharacter((ULONG)FILLER_CHAR, pOutput); lWidthCurr = lTotalStringWidth(pAnchor); lWidthCurr += lDrawUnits2MilliPoints(pDiag->lXleft); } while (lTmp == lWidthCurr / lDefaultTabWidth && lWidthCurr < lWidthMax + lRightIndentation); break; default: if (bHiddenText && tOptions.bHideHiddenText) { continue; } if (bMarkDelText && tOptions.bRemoveRemovedText) { continue; } if (ulChar == UNICODE_ELLIPSIS && tOptions.eEncoding != encoding_utf_8) { vStoreString("...", 3, pOutput); } else { if (bAllCapitals) { ulChar = ulToUpper(ulChar); } vStoreCharacter(ulChar, pOutput); } break; } if (bWasTableRow && !bIsTableRow) { /* End of a table */ vEndOfTable(pDiag); /* Resume normal font */ NO_DBG_MSG("End of table font"); vCloseFont(); bTableFontClosed = TRUE; pOutput->ucFontColor = ucFontColor; pOutput->usFontStyle = usFontStyle; pOutput->usFontSize = usFontSize; pOutput->tFontRef = tOpenFont( ucFontNumber, usFontStyle, usFontSize); } bWasTableRow = bIsTableRow; if (bIsTableRow) { fail(pAnchor != pOutput); if (!bEndRowNorm && !bEndRowFast) { continue; } /* End of a table row */ if (bEndRowNorm) { fail(pRowInfo == NULL); vTableRow2Window(pDiag, pAnchor, pRowInfo, tOptions.eConversionType, tOptions.iParagraphBreak); } else { fail(!bEndRowFast); } /* Reset */ pAnchor = pStartNewOutput(pAnchor, NULL); pOutput = pAnchor; if (bEndRowNorm) { pRowInfo = pGetNextRowInfoListItem(); } bIsTableRow = FALSE; bEndRowNorm = FALSE; bEndRowFast = FALSE; NO_DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetStart); NO_DBG_HEX_C(pRowInfo != NULL, pRowInfo->ulFileOffsetEnd); continue; } lWidthCurr = lTotalStringWidth(pAnchor); lWidthCurr += lDrawUnits2MilliPoints(pDiag->lXleft); if (lWidthCurr < lWidthMax + lRightIndentation) { continue; } pLeftOver = pSplitList(pAnchor); vJustify2Window(pDiag, pAnchor, lWidthMax, lRightIndentation, ucAlignment); pAnchor = pStartNewOutput(pAnchor, pLeftOver); for (pOutput = pAnchor; pOutput->pNext != NULL; pOutput = pOutput->pNext) ; /* EMPTY */ fail(pOutput == NULL); if (lTotalStringWidth(pAnchor) > 0) { vSetLeftIndentation(pDiag, lLeftIndentation); } } pAnchor = pStartNewOutput(pAnchor, NULL); pAnchor->szStorage = xfree(pAnchor->szStorage); pAnchor = xfree(pAnchor); vCloseFont(); vFreeDocument(); Hourglass_Off(); return TRUE;} /* end of bWordDecryptor *//* * lLastStringWidth - compute the width of the last part of the output string */static longlLastStringWidth(const output_type *pAnchor){ const output_type *pCurr, *pStart; pStart = NULL; for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) { if (pCurr->tNextFree == 1 && (pCurr->szStorage[0] == PAR_END || pCurr->szStorage[0] == HARD_RETURN)) { /* Found a separator. Start after the separator */ pStart = pCurr->pNext; } } if (pStart == NULL) { /* No separators. Use the whole output string */ pStart = pAnchor; } return lTotalStringWidth(pStart);} /* end of lLastStringWidth *//* * pHdrFtrDecryptor - turn a header/footer list element to something useful */output_type *pHdrFtrDecryptor(FILE *pFile, ULONG ulCharPosStart, ULONG ulCharPosNext){ output_type *pAnchor, *pOutput, *pLeftOver; ULONG ulChar, ulFileOffset, ulCharPos; long lWidthCurr, lWidthMax; long lRightIndentation; USHORT usChar; UCHAR ucAlignment; BOOL bSkip; fail(iWordVersion < 0); fail(tOptions.eConversionType == conversion_unknown); fail(tOptions.eEncoding == 0); if (ulCharPosStart == ulCharPosNext) { /* There are no bytes to decrypt */ return NULL; } lRightIndentation = 0; ucAlignment = ALIGNMENT_LEFT; bSkip = FALSE; lWidthMax = lGetWidthMax(tOptions.iParagraphBreak); pAnchor = pStartNewOutput(NULL, NULL); pOutput = pAnchor; pOutput->tFontRef = tOpenFont(0, FONT_REGULAR, DEFAULT_FONT_SIZE); usChar = usToHdrFtrPosition(pFile, ulCharPosStart); ulCharPos = ulCharPosStart; ulFileOffset = ulCharPos2FileOffset(ulCharPos); while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext) { /* Skip embedded characters */ if (usChar == START_EMBEDDED) { bSkip = TRUE; } else if (usChar == END_IGNORE || usChar == END_EMBEDDED) { bSkip = FALSE; } /* Translate character */ if (bSkip || usChar == END_IGNORE || usChar == END_EMBEDDED) { ulChar = IGNORE_CHARACTER; } else { ulChar = ulTranslateCharacters(usChar, ulFileOffset, iWordVersion, tOptions.eConversionType, tOptions.eEncoding, bOldMacFile); } /* Process character */ if (ulChar != IGNORE_CHARACTER) { switch (ulChar) { case PICTURE: vStoreString("[pic]", 5, pOutput); break; case PAR_END: case HARD_RETURN: case PAGE_BREAK: case COLUMN_FEED: /* To the next substring */ pOutput = pStartNextOutput(pOutput); vCloseFont(); pOutput->tFontRef = tOpenFont(0, FONT_REGULAR, DEFAULT_FONT_SIZE); /* A substring with just one character */ if (ulChar == HARD_RETURN) { vStoreCharacter(HARD_RETURN, pOutput); } else { vStoreCharacter(PAR_END, pOutput); } /* To the next substring */ pOutput = pStartNextOutput(pOutput); vCloseFont(); pOutput->tFontRef = tOpenFont(0, FONT_REGULAR, DEFAULT_FONT_SIZE); fail(!bCheckDoubleLinkedList(pAnchor)); break; case TABLE_SEPARATOR: vStoreCharacter((ULONG)' ', pOutput); vStoreCharacter((ULONG)TABLE_SEPARATOR_CHAR, pOutput); break; case TAB: vStoreCharacter((ULONG)FILLER_CHAR, pOutput); break; default: vStoreCharacter(ulChar, pOutput); break; } } lWidthCurr = lLastStringWidth(pAnchor); if (lWidthCurr >= lWidthMax + lRightIndentation) { pLeftOver = pSplitList(pAnchor); for (pOutput = pAnchor; pOutput->pNext != NULL; pOutput = pOutput->pNext) ; /* EMPTY */ fail(pOutput == NULL); /* To the next substring */ pOutput = pStartNextOutput(pOutput); /* A substring with just one HARD_RETURN */ vStoreCharacter(HARD_RETURN, pOutput); /* Put the leftover piece(s) at the end */ pOutput->pNext = pLeftOver; if (pLeftOver != NULL) { pLeftOver->pPrev = pOutput; } fail(!bCheckDoubleLinkedList(pAnchor)); for (pOutput = pAnchor; pOutput->pNext != NULL; pOutput = pOutput->pNext) ; /* EMPTY */ fail(pOutput == NULL); } usChar = usNextChar(pFile, hdrftr_list, &ulFileOffset, &ulCharPos, NULL); } vCloseFont(); if (bOutputContainsText(pAnchor)) { return pAnchor; } pAnchor = pStartNewOutput(pAnchor, NULL); pAnchor->szStorage = xfree(pAnchor->szStorage); pAnchor = xfree(pAnchor); return NULL;} /* end of pHdrFtrDecryptor *//* * pFootnoteDecryptor - turn a footnote text list element into text */char *szFootnoteDecryptor(FILE *pFile, ULONG ulCharPosStart, ULONG ulCharPosNext){ char *szText; ULONG ulChar, ulFileOffset, ulCharPos; USHORT usChar; size_t tLen, tIndex, tNextFree, tStorageSize; char szResult[6]; BOOL bSkip; fail(iWordVersion < 0); fail(tOptions.eConversionType == conversion_unknown); fail(tOptions.eEncoding == 0); if (ulCharPosStart == ulCharPosNext) { /* There are no bytes to decrypt */ return NULL; } if (tOptions.eConversionType != conversion_xml) { /* Only implemented for XML output */ return NULL; } bSkip = FALSE; /* Initialise the text buffer */ tStorageSize = INITIAL_SIZE; szText = xmalloc(tStorageSize); tNextFree = 0; szText[tNextFree] = '\0'; /* Goto the start */ usChar = usToFootnotePosition(pFile, ulCharPosStart); ulCharPos = ulCharPosStart; ulFileOffset = ulCharPos2FileOffset(ulCharPos); /* Skip the unwanted starting characters */ while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext && (usChar == FOOTNOTE_OR_ENDNOTE || usChar == PAR_END || usChar == TAB || usChar == (USHORT)' ')) { usChar = usNextChar(pFile, footnote_list, &ulFileOffset, &ulCharPos, NULL); } /* Process the footnote text */ while (usChar != (USHORT)EOF && ulCharPos != ulCharPosNext) { /* Skip embedded characters */ if (usChar == START_EMBEDDED) { bSkip = TRUE; } else if (usChar == END_IGNORE || usChar == END_EMBEDDED) { bSkip = FALSE; } /* Translate character */ if (bSkip || usChar == END_IGNORE || usChar == END_EMBEDDED || usChar == FOOTNOTE_OR_ENDNOTE) { ulChar = IGNORE_CHARACTER; } else { ulChar = ulTranslateCharacters(usChar, ulFileOffset, iWordVersion, tOptions.eConversionType, tOptions.eEncoding, bOldMacFile); } /* Process character */ if (ulChar == PICTURE) { tLen = 5; strcpy(szResult, "[pic]"); } else if (ulChar == IGNORE_CHARACTER) { tLen = 0; szResult[0] = '\0'; } else { switch (ulChar) { case PAR_END: case HARD_RETURN: case PAGE_BREAK: case COLUMN_FEED: ulChar = (ULONG)PAR_END; break; case TAB: ulChar = (ULONG)' '; break; default: break; } tLen = tUcs2Utf8(ulChar, szResult, sizeof(szResult)); } /* Add the results to the text */ if (tNextFree + tLen + 1 > tStorageSize) { tStorageSize += EXTENTION_SIZE; szText = xrealloc(szText, tStorageSize); } for (tIndex = 0; tIndex < tLen; tIndex++) { szText[tNextFree++] = szResult[tIndex]; } szText[tNextFree] = '\0'; /* Next character */ usChar = usNextChar(pFile, footnote_list, &ulFileOffset, &ulCharPos, NULL); } /* Remove redundant spaces */ while (tNextFree != 0 && szText[tNextFree - 1] == ' ') { szText[tNextFree - 1] = '\0'; tNextFree--; } if (tNextFree == 0) { /* No text */ szText = xfree(szText); return NULL; } return szText;} /* end of szFootnoteDecryptor */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -