📄 textoutputdev.cc
字号:
line3 = line4 = NULL; if (rawOrder) { if (lineList->yMin < yLimit && lineList->xMax > blk1->xMin && lineList->xMin < blk1->xMax) { line3 = NULL; line4 = lineList; } } else { for (line1 = NULL, line2 = lineList; line2 && line2->yMin < yLimit; line1 = line2, line2 = line2->next) { if (line2->xMax > blk1->xMin && line2->xMin < blk1->xMax) { line3 = line1; line4 = line2; break; } } } // if there is an overlapping line and it fits in the block, add // it to the block if (line4 && blockFit(blk1, line4)) { if (line3) { line3->next = line4->next; } else { lineList = line4->next; } line0->next = line0->flowNext = line4; line4->next = NULL; if (line4->xMin < blk1->xMin) { blk1->xMin = line4->xMin; } else if (line4->xMax > blk1->xMax) { blk1->xMax = line4->xMax; } if (line4->yMax > blk1->yMax) { blk1->yMax = line4->yMax; } if (line4->xSpaceL > blk1->xSpaceL) { blk1->xSpaceL = line4->xSpaceL; } if (line4->xSpaceR < blk1->xSpaceR) { blk1->xSpaceR = line4->xSpaceR; } if (line4->fontSize > blk1->maxFontSize) { blk1->maxFontSize = line4->fontSize; } line0 = line4; // otherwise, we're done with this block } else { break; } } // insert block on list, in yx order if (rawOrder) { blk2 = blk0; blk3 = NULL; blk0 = blk1; } else { for (blk2 = NULL, blk3 = yxBlocks; blk3 && !blk1->yxBefore(blk3); blk2 = blk3, blk3 = blk3->next) ; } blk1->next = blk3; if (blk2) { blk2->next = blk1; } else { yxBlocks = blk1; } }#if 0 // for debugging printf("*** blocks in yx order ***\n"); for (blk0 = yxBlocks; blk0; blk0 = blk0->next) { printf("[block: x=%.2f..%.2f y=%.2f..%.2f]\n", blk0->xMin, blk0->xMax, blk0->yMin, blk0->yMax); for (line0 = blk0->lines; line0; line0 = line0->next) { printf(" [line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n", line0->xMin, line0->xMax, line0->yMin, line0->yMax, line0->yBase, line0->len); for (word0 = line0->words; word0; word0 = word0->next) { printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f space=%d: '", word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase, word0->spaceAfter); for (i = 0; i < word0->len; ++i) { fputc(word0->text[i] & 0xff, stdout); } printf("'\n"); } } } printf("\n"); fflush(stdout);#endif //----- merge lines and blocks, sort blocks into reading order if (rawOrder) { blocks = yxBlocks; } else { blocks = NULL; blk0 = NULL; blkStack = NULL; while (yxBlocks) { // find the next two blocks: // - if the depth-first traversal stack is empty, take the first // (upper-left-most) two blocks on the yx-sorted block list // - otherwise, find the two upper-left-most blocks under the top // block on the stack if (blkStack) { blk3 = blk4 = blk5 = blk6 = NULL; for (blk1 = NULL, blk2 = yxBlocks; blk2; blk1 = blk2, blk2 = blk2->next) { if (blk2->yMin > blkStack->yMin && blk2->xMax > blkStack->xMin && blk2->xMin < blkStack->xMax) { if (!blk4 || blk2->yxBefore(blk4)) { blk5 = blk3; blk6 = blk4; blk3 = blk1; blk4 = blk2; } else if (!blk6 || blk2->yxBefore(blk6)) { blk5 = blk1; blk6 = blk2; } } } } else { blk3 = NULL; blk4 = yxBlocks; blk5 = yxBlocks; blk6 = yxBlocks->next; } // merge case 1: // | | | // | blkStack | | blkStack // +---------------------+ --> +-------------- // +------+ +------+ +-----------+ // | blk4 | | blk6 | ... | blk4+blk6 | // +------+ +------+ +-----------+ if (blkStack) { yLimit = blkStack->yMax + blkMaxSpacing * blkStack->lines->fontSize; } if (blkStack && blk4 && blk6 && !blk4->lines->next && !blk6->lines->next && lineFit2(blk4->lines, blk6->lines) && blk4->yMin < yLimit && blk4->xMin > blkStack->xSpaceL && blkStack->xMin > blk4->xSpaceL && blk6->xMax < blkStack->xSpaceR) { blk4->mergeRight(blk6); if (blk5) { blk5->next = blk6->next; } else { yxBlocks = blk6->next; } delete blk6; // merge case 2: // | | | | // | blkStack | | | // +---------------------+ --> | blkStack+blk2 | // +---------------------+ | | // | blk4 | | | // | | | | } else if (blkStack && blk4 && blk4->yMin < yLimit && blockFit2(blkStack, blk4)) { blkStack->mergeBelow(blk4); if (blk3) { blk3->next = blk4->next; } else { yxBlocks = blk4->next; } delete blk4; // if any of: // 1. no block found // 2. non-fully overlapping block found // 3. large vertical gap above the overlapping block // then pop the stack and try again } else if (!blk4 || (blkStack && (blk4->xMin < blkStack->xSpaceL || blk4->xMax > blkStack->xSpaceR || blk4->yMin - blkStack->yMax > blkMaxSortSpacing * blkStack->maxFontSize))) { blkStack = blkStack->stackNext; // add a block to the sorted list } else { // remove the block from the yx-sorted list if (blk3) { blk3->next = blk4->next; } else { yxBlocks = blk4->next; } blk4->next = NULL; // append the block to the reading-order list if (blk0) { blk0->next = blk4; } else { blocks = blk4; } blk0 = blk4; // push the block on the traversal stack blk4->stackNext = blkStack; blkStack = blk4; } } } // (!rawOrder)#if 0 // for debugging printf("*** blocks in reading order (after merging) ***\n"); for (blk0 = blocks; blk0; blk0 = blk0->next) { printf("[block: x=%.2f..%.2f y=%.2f..%.2f]\n", blk0->xMin, blk0->xMax, blk0->yMin, blk0->yMax); for (line0 = blk0->lines; line0; line0 = line0->next) { printf(" [line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n", line0->xMin, line0->xMax, line0->yMin, line0->yMax, line0->yBase, line0->len); for (word0 = line0->words; word0; word0 = word0->next) { printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f space=%d: '", word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase, word0->spaceAfter); for (i = 0; i < word0->len; ++i) { fputc(word0->text[i] & 0xff, stdout); } printf("'\n"); } } } printf("\n"); fflush(stdout);#endif //----- assemble blocks into flows if (rawOrder) { // one flow per block flow0 = NULL; while (blocks) { flow1 = new TextFlow(); flow1->blocks = blocks; flow1->lines = blocks->lines; flow1->yMin = blocks->yMin; flow1->yMax = blocks->yMax; blocks = blocks->next; flow1->blocks->next = NULL; if (flow0) { flow0->next = flow1; } else { flows = flow1; } flow0 = flow1; } } else { // compute whitespace above and below each block for (blk0 = blocks; blk0; blk0 = blk0->next) { blk0->ySpaceT = 0; blk0->ySpaceB = pageHeight; // check each horizontally overlapping block for (blk1 = blocks; blk1; blk1 = blk1->next) { if (blk1 != blk0 && blk1->xMin < blk0->xMax && blk1->xMax > blk0->xMin) { if (blk1->yMax < blk0->yMin) { if (blk1->yMax > blk0->ySpaceT) { blk0->ySpaceT = blk1->yMax; } } else if (blk1->yMin > blk0->yMax) { if (blk1->yMin < blk0->ySpaceB) { blk0->ySpaceB = blk1->yMin; } } } } } flow0 = NULL; while (blocks) { // build a new flow object flow1 = new TextFlow(); flow1->blocks = blocks; flow1->lines = blocks->lines; flow1->yMin = blocks->yMin; flow1->yMax = blocks->yMax; flow1->ySpaceT = blocks->ySpaceT; flow1->ySpaceB = blocks->ySpaceB; // find subsequent blocks in the flow for (blk1 = blocks, blk2 = blocks->next; blk2 && flowFit(flow1, blk2); blk1 = blk2, blk2 = blk2->next) { if (blk2->yMin < flow1->yMin) { flow1->yMin = blk2->yMin; } if (blk2->yMax > flow1->yMax) { flow1->yMax = blk2->yMax; } if (blk2->ySpaceT > flow1->ySpaceT) { flow1->ySpaceT = blk2->ySpaceT; } if (blk2->ySpaceB < flow1->ySpaceB) { flow1->ySpaceB = blk2->ySpaceB; } for (line1 = blk1->lines; line1->next; line1 = line1->next) ; line1->flowNext = blk2->lines; } // chop the block list blocks = blk1->next; blk1->next = NULL; // append the flow to the list if (flow0) { flow0->next = flow1; } else { flows = flow1; } flow0 = flow1; } }#if 0 // for debugging printf("*** flows ***\n"); for (flow0 = flows; flow0; flow0 = flow0->next) { printf("[flow]\n"); for (blk0 = flow0->blocks; blk0; blk0 = blk0->next) { printf(" [block: x=%.2f..%.2f y=%.2f..%.2f ySpaceT=%.2f ySpaceB=%.2f]\n", blk0->xMin, blk0->xMax, blk0->yMin, blk0->yMax, blk0->ySpaceT, blk0->ySpaceB); for (line0 = blk0->lines; line0; line0 = line0->next) { printf(" [line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n", line0->xMin, line0->xMax, line0->yMin, line0->yMax, line0->yBase, line0->len); for (word0 = line0->words; word0; word0 = word0->next) { printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f space=%d: '", word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase, word0->spaceAfter); for (i = 0; i < word0->len; ++i) { fputc(word0->text[i] & 0xff, stdout); } printf("'\n"); } } } } printf("\n"); fflush(stdout);#endif //----- sort lines into yx order // (the block/line merging process doesn't maintain the full-page // linked list of lines) lines = NULL; if (rawOrder) { line0 = NULL; for (flow0 = flows; flow0; flow0 = flow0->next) { for (line1 = flow0->lines; line1; line1 = line1->flowNext) { if (line0) { line0->pageNext = line1; } else { lines = line1; } line0 = line1; } } } else { for (flow0 = flows; flow0; flow0 = flow0->next) { for (line0 = flow0->lines; line0; line0 = line0->flowNext) { for (line1 = NULL, line2 = lines; line2 && !line0->yxBefore(line2); line1 = line2, line2 = line2->pageNext) ; if (line1) { line1->pageNext = line0; } else { lines = line0; } line0->pageNext = line2; } } }#if 0 // for debugging printf("*** lines in yx order ***\n"); for (line0 = lines; line0; line0 = line0->pageNext) { printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f xSpaceL=%.2f xSpaceR=%.2f col=%d len=%d]\n", line0->xMin, line0->xMax, line0->yMin, line0->yMax, line0->yBase, line0->xSpaceL, line0->xSpaceR, line0->col[0], line0->len); for (word0 = line0->words; word0; word0 = word0->next) { printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f space=%d: '", word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase, word0->spaceAfter); for (i = 0; i < word0->len; ++i) { fputc(word0->text[i] & 0xff, stdout); } printf("'\n"); } } printf("\n"); fflush(stdout);#endif}// Returns a non-negative number if <word> can be added to <line>// (whose last word is <lastWord>). A smaller return value indicates// a better fit. If <word> cannot be added to <line> at all, returns// a negative number.double TextPage::lineFit(TextLine *line, TextWord *lastWord, TextWord *word) { double fontSize0, fontSize1; double dx, dxLimit; fontSize0 = line->fontSize; fontSize1 = word->fontSize; dx = word->xMin - lastWord->xMax; dxLimit = fontSize0 * line->font->maxSpaceWidth; // check inter-word spacing if (dx < fontSize0 * lineMinDeltaX || dx > dxLimit) { return -1; } // ensure a non-negative return value if (dx < 0) { dx = 0; } // look for adjacent words with close baselines and close font sizes if (fabs(line->yBase - word->yBase) < lineMaxBaselineDelta * fontSize0 && fontSize0 < lineMaxFontSizeRatio * fontSize1 && fontSize1 < lineMaxFontSizeRatio * fontSize0) { return dx; } // look for a superscript if (fontSize1 > lineMinSuperscriptFontSizeRatio * fontSize0 && fontSize1 < lineMaxSuperscriptFontSizeRatio * fontSize0 && (word->yMax < lastWord->yMax || word->yBase < lastWord->yBase) && word->yMax - lastWord->yMin > lineMinSuperscriptOverlap * fontSize0 && dx < fontSize0 * lineMaxSuperscriptDeltaX) { return dx; } // look for a subscript if (fontSize1 > lineMinSubscriptFontSizeRatio * fontSize0 && fontSize1 < lineMaxSubscriptFontSizeRatio * fontSize0 && (word->yMin > lastWord->yMin || word->yBase > lastWord->yBase) && line->yMax - word->yMin > lineMinSubscriptOverlap * fontSize0 && dx < fontSize0 * lineMaxSubscriptDeltaX) { return dx; } return -1;}// Returns true if <line0> and <line1> can be merged into a single// line, ignoring max word spacing.GBool TextPage::lineFit2(TextLine *line0, TextLine *line1) { double fontSize0, fontSize1; double dx; fontSize0 = line0->fontSize; fontSize1 = line1->fontSize; dx = line1->xMin - line0->xMax; // check inter-word spacing if (dx < fontSize0 * lineMinDeltaX) { return gFalse; } // look for close baselines and close font sizes if (fabs(line0->yBase - line1->yBase) < lineMaxBaselineDelta * fontSize0 && fontSize0 < lineMaxFontSizeRatio * fontSize1 && fontSize1 < lineMaxFontSizeRatio * fontSize0) { return gTrue; } return gFalse;}// Returns true if <line> can be added to <blk>. Assumes the y// coordinates are within range.GBool TextPage::blockFit(TextBlock *blk, TextLine *line) { double fontSize0, fontSize1; // check edges if (line->xMin < blk->xSpaceL || line->xMax > blk->xSpaceR || blk->xMin < line->xSpaceL || blk->xMax > line->xSpaceR) { return gFalse; } // check font sizes fontSize0 = blk->lines->fontSize; fontSize1 = line->fontSize; if (fontSize0 > blkMaxFontSizeRatio * fontSize1 ||
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -