📄 textoutputdev.cc
字号:
h1 = -h1; } // add the characters to the current word w1 /= uLen; h1 /= uLen; for (i = 0; i < uLen; ++i) { curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]); } } if (curWord) { curWord->charLen += nBytes; } charPos += nBytes;}void TextPage::endWord() { // This check is needed because Type 3 characters can contain // text-drawing operations (when TextPage is being used via // {X,Win}SplashOutputDev rather than TextOutputDev). if (nest > 0) { --nest; return; } if (curWord) { addWord(curWord); curWord = NULL; }}void TextPage::addWord(TextWord *word) { // throw away zero-length words -- they don't have valid xMin/xMax // values, and they're useless anyway if (word->len == 0) { delete word; return; } if (rawOrder) { if (rawLastWord) { rawLastWord->next = word; } else { rawWords = word; } rawLastWord = word; } else { pools[word->rot]->addWord(word); }}void TextPage::addUnderline(double x0, double y0, double x1, double y1) { underlines->append(new TextUnderline(x0, y0, x1, y1));}void TextPage::addLink(int xMin, int yMin, int xMax, int yMax, Link *link) { links->append(new TextLink(xMin, yMin, xMax, yMax, link));}void TextPage::coalesce(GBool physLayout, GBool doHTML) { UnicodeMap *uMap; TextPool *pool; TextWord *word0, *word1, *word2; TextLine *line; TextBlock *blkList, *blkStack, *blk, *lastBlk, *blk0, *blk1; TextBlock **blkArray; TextFlow *flow, *lastFlow; TextUnderline *underline; TextLink *link; int rot, poolMinBaseIdx, baseIdx, startBaseIdx, endBaseIdx; double minBase, maxBase, newMinBase, newMaxBase; double fontSize, colSpace1, colSpace2, lineSpace, intraLineSpace, blkSpace; GBool found; int count[4]; int lrCount; int firstBlkIdx, nBlocksLeft; int col1, col2; int i, j, n; if (rawOrder) { primaryRot = 0; primaryLR = gTrue; return; } uMap = globalParams->getTextEncoding(); blkList = NULL; lastBlk = NULL; nBlocks = 0; primaryRot = -1;#if 0 // for debugging printf("*** initial words ***\n"); for (rot = 0; rot < 4; ++rot) { pool = pools[rot]; for (baseIdx = pool->minBaseIdx; baseIdx <= pool->maxBaseIdx; ++baseIdx) { for (word0 = pool->getPool(baseIdx); word0; word0 = word0->next) { printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f rot=%d link=%p '", word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->base, word0->fontSize, rot*90, word0->link); for (i = 0; i < word0->len; ++i) { fputc(word0->text[i] & 0xff, stdout); } printf("'\n"); } } } printf("\n");#endif#if 0 //~ for debugging for (i = 0; i < underlines->getLength(); ++i) { underline = (TextUnderline *)underlines->get(i); printf("underline: x=%g..%g y=%g..%g horiz=%d\n", underline->x0, underline->x1, underline->y0, underline->y1, underline->horiz); }#endif if (doHTML) { //----- handle underlining for (i = 0; i < underlines->getLength(); ++i) { underline = (TextUnderline *)underlines->get(i); if (underline->horiz) { // rot = 0 if (pools[0]->minBaseIdx <= pools[0]->maxBaseIdx) { startBaseIdx = pools[0]->getBaseIdx(underline->y0 + minUnderlineGap); endBaseIdx = pools[0]->getBaseIdx(underline->y0 + maxUnderlineGap); for (j = startBaseIdx; j <= endBaseIdx; ++j) { for (word0 = pools[0]->getPool(j); word0; word0 = word0->next) { //~ need to check the y value against the word baseline if (underline->x0 < word0->xMin + underlineSlack && word0->xMax - underlineSlack < underline->x1) { word0->underlined = gTrue; } } } } // rot = 2 if (pools[2]->minBaseIdx <= pools[2]->maxBaseIdx) { startBaseIdx = pools[2]->getBaseIdx(underline->y0 - maxUnderlineGap); endBaseIdx = pools[2]->getBaseIdx(underline->y0 - minUnderlineGap); for (j = startBaseIdx; j <= endBaseIdx; ++j) { for (word0 = pools[2]->getPool(j); word0; word0 = word0->next) { if (underline->x0 < word0->xMin + underlineSlack && word0->xMax - underlineSlack < underline->x1) { word0->underlined = gTrue; } } } } } else { // rot = 1 if (pools[1]->minBaseIdx <= pools[1]->maxBaseIdx) { startBaseIdx = pools[1]->getBaseIdx(underline->x0 - maxUnderlineGap); endBaseIdx = pools[1]->getBaseIdx(underline->x0 - minUnderlineGap); for (j = startBaseIdx; j <= endBaseIdx; ++j) { for (word0 = pools[1]->getPool(j); word0; word0 = word0->next) { if (underline->y0 < word0->yMin + underlineSlack && word0->yMax - underlineSlack < underline->y1) { word0->underlined = gTrue; } } } } // rot = 3 if (pools[3]->minBaseIdx <= pools[3]->maxBaseIdx) { startBaseIdx = pools[3]->getBaseIdx(underline->x0 + minUnderlineGap); endBaseIdx = pools[3]->getBaseIdx(underline->x0 + maxUnderlineGap); for (j = startBaseIdx; j <= endBaseIdx; ++j) { for (word0 = pools[3]->getPool(j); word0; word0 = word0->next) { if (underline->y0 < word0->yMin + underlineSlack && word0->yMax - underlineSlack < underline->y1) { word0->underlined = gTrue; } } } } } } //----- handle links for (i = 0; i < links->getLength(); ++i) { link = (TextLink *)links->get(i); // rot = 0 if (pools[0]->minBaseIdx <= pools[0]->maxBaseIdx) { startBaseIdx = pools[0]->getBaseIdx(link->yMin); endBaseIdx = pools[0]->getBaseIdx(link->yMax); for (j = startBaseIdx; j <= endBaseIdx; ++j) { for (word0 = pools[0]->getPool(j); word0; word0 = word0->next) { if (link->xMin < word0->xMin + hyperlinkSlack && word0->xMax - hyperlinkSlack < link->xMax && link->yMin < word0->yMin + hyperlinkSlack && word0->yMax - hyperlinkSlack < link->yMax) { word0->link = link->link; } } } } // rot = 2 if (pools[2]->minBaseIdx <= pools[2]->maxBaseIdx) { startBaseIdx = pools[2]->getBaseIdx(link->yMin); endBaseIdx = pools[2]->getBaseIdx(link->yMax); for (j = startBaseIdx; j <= endBaseIdx; ++j) { for (word0 = pools[2]->getPool(j); word0; word0 = word0->next) { if (link->xMin < word0->xMin + hyperlinkSlack && word0->xMax - hyperlinkSlack < link->xMax && link->yMin < word0->yMin + hyperlinkSlack && word0->yMax - hyperlinkSlack < link->yMax) { word0->link = link->link; } } } } // rot = 1 if (pools[1]->minBaseIdx <= pools[1]->maxBaseIdx) { startBaseIdx = pools[1]->getBaseIdx(link->xMin); endBaseIdx = pools[1]->getBaseIdx(link->xMax); for (j = startBaseIdx; j <= endBaseIdx; ++j) { for (word0 = pools[1]->getPool(j); word0; word0 = word0->next) { if (link->yMin < word0->yMin + hyperlinkSlack && word0->yMax - hyperlinkSlack < link->yMax && link->xMin < word0->xMin + hyperlinkSlack && word0->xMax - hyperlinkSlack < link->xMax) { word0->link = link->link; } } } } // rot = 3 if (pools[3]->minBaseIdx <= pools[3]->maxBaseIdx) { startBaseIdx = pools[3]->getBaseIdx(link->xMin); endBaseIdx = pools[3]->getBaseIdx(link->xMax); for (j = startBaseIdx; j <= endBaseIdx; ++j) { for (word0 = pools[3]->getPool(j); word0; word0 = word0->next) { if (link->yMin < word0->yMin + hyperlinkSlack && word0->yMax - hyperlinkSlack < link->yMax && link->xMin < word0->xMin + hyperlinkSlack && word0->xMax - hyperlinkSlack < link->xMax) { word0->link = link->link; } } } } } } //----- assemble the blocks //~ add an outer loop for writing mode (vertical text) // build blocks for each rotation value for (rot = 0; rot < 4; ++rot) { pool = pools[rot]; poolMinBaseIdx = pool->minBaseIdx; count[rot] = 0; // add blocks until no more words are left while (1) { // find the first non-empty line in the pool for (; poolMinBaseIdx <= pool->maxBaseIdx && !pool->getPool(poolMinBaseIdx); ++poolMinBaseIdx) ; if (poolMinBaseIdx > pool->maxBaseIdx) { break; } // look for the left-most word in the first four lines of the // pool -- this avoids starting with a superscript word startBaseIdx = poolMinBaseIdx; for (baseIdx = poolMinBaseIdx + 1; baseIdx < poolMinBaseIdx + 4 && baseIdx <= pool->maxBaseIdx; ++baseIdx) { if (!pool->getPool(baseIdx)) { continue; } if (pool->getPool(baseIdx)->primaryCmp(pool->getPool(startBaseIdx)) < 0) { startBaseIdx = baseIdx; } } // create a new block word0 = pool->getPool(startBaseIdx); pool->setPool(startBaseIdx, word0->next); word0->next = NULL; blk = new TextBlock(this, rot); blk->addWord(word0); fontSize = word0->fontSize; minBase = maxBase = word0->base; colSpace1 = minColSpacing1 * fontSize; colSpace2 = minColSpacing2 * fontSize; lineSpace = maxLineSpacingDelta * fontSize; intraLineSpace = maxIntraLineDelta * fontSize; // add words to the block do { found = gFalse; // look for words on the line above the current top edge of // the block newMinBase = minBase; for (baseIdx = pool->getBaseIdx(minBase); baseIdx >= pool->getBaseIdx(minBase - lineSpace); --baseIdx) { word0 = NULL; word1 = pool->getPool(baseIdx); while (word1) { if (word1->base < minBase && word1->base >= minBase - lineSpace && ((rot == 0 || rot == 2) ? (word1->xMin < blk->xMax && word1->xMax > blk->xMin) : (word1->yMin < blk->yMax && word1->yMax > blk->yMin)) && fabs(word1->fontSize - fontSize) < maxBlockFontSizeDelta1 * fontSize) { word2 = word1; if (word0) { word0->next = word1->next; } else { pool->setPool(baseIdx, word1->next); } word1 = word1->next; word2->next = NULL; blk->addWord(word2); found = gTrue; newMinBase = word2->base; } else { word0 = word1; word1 = word1->next; } } } minBase = newMinBase; // look for words on the line below the current bottom edge of // the block newMaxBase = maxBase; for (baseIdx = pool->getBaseIdx(maxBase); baseIdx <= pool->getBaseIdx(maxBase + lineSpace); ++baseIdx) { word0 = NULL; word1 = pool->getPool(baseIdx); while (word1) { if (word1->base > maxBase && word1->base <= maxBase + lineSpace && ((rot == 0 || rot == 2) ? (word1->xMin < blk->xMax && word1->xMax > blk->xMin) : (word1->yMin < blk->yMax && word1->yMax > blk->yMin)) && fabs(word1->fontSize - fontSize) < maxBlockFontSizeDelta1 * fontSize) { word2 = word1; if (word0) { word0->next = word1->next; } else { pool->setPool(baseIdx, word1->next); } word1 = word1->next; word2->next = NULL; blk->addWord(word2); found = gTrue; newMaxBase = word2->base; } else { word0 = word1; word1 = word1->next; } } } maxBase = newMaxBase; // look for words that are on lines already in the block, and // that overlap the block horizontally for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); ++baseIdx) { word0 = NULL; word1 = pool->getPool(baseIdx); while (word1) { if (word1->base >= minBase - intraLineSpace && word1->base <= maxBase + intraLineSpace && ((rot == 0 || rot == 2) ? (word1->xMin < blk->xMax + colSpace1 && word1->xMax > blk->xMin - colSpace1) : (word1->yMin < blk->yMax + colSpace1 && word1->yMax > blk->yMin - colSpace1)) && fabs(word1->fontSize - fontSize) < maxBlockFontSizeDelta2 * fontSize) { word2 = word1; if (word0) { word0->next = word1->next; } else { pool->setPool(baseIdx, word1->next); } word1 = word1->next; word2->next = NULL; blk->addWord(word2); found = gTrue; } else { word0 = word1; word1 = word1->next; } } } // only check for outlying words (the next two chunks of code) // if we didn't find anything else if (found) { continue; } // scan down the left side of the block, looking for words // that are near (but not overlapping) the block; if there are // three or fewer, add them to the block n = 0; for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); ++baseIdx) { word1 = pool->getPool(baseIdx); while (word1) { if (word1->base >= minBase - intraLineSpace && word1->base <= maxBase + intraLineSpace && ((rot == 0 || rot == 2) ? (word1->xMax <= blk->xMin && word1->xMax > blk->xMin - colSpace2) : (word1->yMax <= blk->yMin && word1->yMax > blk->yMin - colSpace2)) && fabs(word1->fontSize - fontSize) < maxBlockFontSizeDelta3 * fontSize) { ++n; break; } word1 = word1->next; } } if (n > 0 && n <= 3) { for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); ++baseIdx) { word0 = NULL; word1 = pool->getPool(baseIdx); while (word1) { if (word1->base >= minBase - intraLineSpace && word1->base <= maxBase + intraLineSpace && ((rot == 0 || rot == 2) ? (word1->xMax <= blk->xMin && word1->xMax > blk->xMin - colSpace2) : (word1->yMax <= blk->yMin && word1->yMax > blk->yMin - colSpace2)) && fabs(word1->fontSize - fontSize) < maxBlockFontSizeDelta3 * fontSize) { word2 = word1; if (word0) { word0->next = word1->next; } else { pool->setPool(baseIdx, word1->next); } word1 = word1->next; word2->next = NULL; blk->addWord(word2); if (word2->base < minBase) { minBase = word2->base; } else if (word2->base > maxBase) { maxBase = word2->base; } found = gTrue; break; } else { word0 = word1; word1 = word1->next; } } } } // scan down the right side of the block, looking for words // that are near (but not overlapping) the block; if there are // three or fewer, add them to the block n = 0; for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); ++baseIdx) { word1 = pool->getPool(baseIdx); while (word1) { if (word1->base >= minBase - intraLineSpace && word1->base <= maxBase + intraLineSpace && ((rot == 0 || rot == 2) ? (word1->xMin >= blk->xMax && word1->xMin < blk->xMax + colSpace2) : (word1->yMin >= blk->yMax && word1->yMin < blk->yMax + colSpace2)) && fabs(word1->fontSize - fontSize) < maxBlockFontSizeDelta3 * fontSize) { ++n; break; } word1 = word1->next; } } if (n > 0 && n <= 3) { for (baseIdx = pool->getBaseIdx(minBase - intraLineSpace); baseIdx <= pool->getBaseIdx(maxBase + intraLineSpace); ++baseIdx) { word0 = NU
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -