📄 textoutputdev.cc
字号:
} }}TextWordList::~TextWordList() { delete words;}int TextWordList::getLength() { return words->getLength();}TextWord *TextWordList::get(int idx) { if (idx < 0 || idx >= words->getLength()) { return NULL; } return (TextWord *)words->get(idx);}#endif // TEXTOUT_WORD_LIST//------------------------------------------------------------------------// TextPage//------------------------------------------------------------------------TextPage::TextPage(GBool rawOrderA) { int rot; rawOrder = rawOrderA; curWord = NULL; charPos = 0; curFont = NULL; curFontSize = 0; nest = 0; nTinyChars = 0; lastCharOverlap = gFalse; if (!rawOrder) { for (rot = 0; rot < 4; ++rot) { pools[rot] = new TextPool(); } } flows = NULL; blocks = NULL; rawWords = NULL; rawLastWord = NULL; fonts = new GList(); lastFindXMin = lastFindYMin = 0; haveLastFind = gFalse;}TextPage::~TextPage() { int rot; clear(); if (!rawOrder) { for (rot = 0; rot < 4; ++rot) { delete pools[rot]; } } delete fonts;}void TextPage::startPage(GfxState *state) { clear(); if (state) { pageWidth = state->getPageWidth(); pageHeight = state->getPageHeight(); } else { pageWidth = pageHeight = 0; }}void TextPage::endPage() { if (curWord) { endWord(); }}void TextPage::clear() { int rot; TextFlow *flow; TextWord *word; if (curWord) { delete curWord; curWord = NULL; } if (rawOrder) { while (rawWords) { word = rawWords; rawWords = rawWords->next; delete word; } } else { for (rot = 0; rot < 4; ++rot) { delete pools[rot]; } while (flows) { flow = flows; flows = flows->next; delete flow; } gfree(blocks); } deleteGList(fonts, TextFontInfo); curWord = NULL; charPos = 0; curFont = NULL; curFontSize = 0; nest = 0; nTinyChars = 0; if (!rawOrder) { for (rot = 0; rot < 4; ++rot) { pools[rot] = new TextPool(); } } flows = NULL; blocks = NULL; rawWords = NULL; rawLastWord = NULL; fonts = new GList();}void TextPage::updateFont(GfxState *state) { GfxFont *gfxFont; double *fm; char *name; int code, mCode, letterCode, anyCode; double w; int i; // get the font info object curFont = NULL; for (i = 0; i < fonts->getLength(); ++i) { curFont = (TextFontInfo *)fonts->get(i); if (curFont->matches(state)) { break; } curFont = NULL; } if (!curFont) { curFont = new TextFontInfo(state); fonts->append(curFont); } // adjust the font size gfxFont = state->getFont(); curFontSize = state->getTransformedFontSize(); if (gfxFont && gfxFont->getType() == fontType3) { // This is a hack which makes it possible to deal with some Type 3 // fonts. The problem is that it's impossible to know what the // base coordinate system used in the font is without actually // rendering the font. This code tries to guess by looking at the // width of the character 'm' (which breaks if the font is a // subset that doesn't contain 'm'). mCode = letterCode = anyCode = -1; for (code = 0; code < 256; ++code) { name = ((Gfx8BitFont *)gfxFont)->getCharName(code); if (name && name[0] == 'm' && name[1] == '\0') { mCode = code; } if (letterCode < 0 && name && name[1] == '\0' && ((name[0] >= 'A' && name[0] <= 'Z') || (name[0] >= 'a' && name[0] <= 'z'))) { letterCode = code; } if (anyCode < 0 && name && ((Gfx8BitFont *)gfxFont)->getWidth(code) > 0) { anyCode = code; } } if (mCode >= 0 && (w = ((Gfx8BitFont *)gfxFont)->getWidth(mCode)) > 0) { // 0.6 is a generic average 'm' width -- yes, this is a hack curFontSize *= w / 0.6; } else if (letterCode >= 0 && (w = ((Gfx8BitFont *)gfxFont)->getWidth(letterCode)) > 0) { // even more of a hack: 0.5 is a generic letter width curFontSize *= w / 0.5; } else if (anyCode >= 0 && (w = ((Gfx8BitFont *)gfxFont)->getWidth(anyCode)) > 0) { // better than nothing: 0.5 is a generic character width curFontSize *= w / 0.5; } fm = gfxFont->getFontMatrix(); if (fm[0] != 0) { curFontSize *= fabs(fm[3] / fm[0]); } }}void TextPage::beginWord(GfxState *state, double x0, double y0) { double *fontm; double m[4], m2[4]; int rot; // This check is needed because Type 3 characters can contain // text-drawing operations (when TextPage is being used via // {X,Win}SplashOutputDev rather than TextOutputDev). if (curWord) { ++nest; return; } // compute the rotation state->getFontTransMat(&m[0], &m[1], &m[2], &m[3]); if (state->getFont()->getType() == fontType3) { fontm = state->getFont()->getFontMatrix(); m2[0] = fontm[0] * m[0] + fontm[1] * m[2]; m2[1] = fontm[0] * m[1] + fontm[1] * m[3]; m2[2] = fontm[2] * m[0] + fontm[3] * m[2]; m2[3] = fontm[2] * m[1] + fontm[3] * m[3]; m[0] = m2[0]; m[1] = m2[1]; m[2] = m2[2]; m[3] = m2[3]; } if (fabs(m[0] * m[3]) > fabs(m[1] * m[2])) { rot = (m[3] < 0) ? 0 : 2; } else { rot = (m[2] > 0) ? 1 : 3; } curWord = new TextWord(state, rot, x0, y0, charPos, curFont, curFontSize);}void TextPage::addChar(GfxState *state, double x, double y, double dx, double dy, CharCode c, int nBytes, Unicode *u, int uLen) { double x1, y1, w1, h1, dx2, dy2, base, sp, delta; GBool overlap; int i; // throw away chars that aren't inside the page bounds state->transform(x, y, &x1, &y1); if (x1 < 0 || x1 > pageWidth || y1 < 0 || y1 > pageHeight) { charPos += nBytes; return; } // subtract char and word spacing from the dx,dy values sp = state->getCharSpace(); if (c == (CharCode)0x20) { sp += state->getWordSpace(); } state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2); dx -= dx2; dy -= dy2; state->transformDelta(dx, dy, &w1, &h1); // check the tiny chars limit if (!globalParams->getTextKeepTinyChars() && fabs(w1) < 3 && fabs(h1) < 3) { if (++nTinyChars > 50000) { charPos += nBytes; return; } } // break words at space character if (uLen == 1 && u[0] == (Unicode)0x20) { if (curWord) { ++curWord->charLen; } charPos += nBytes; endWord(); return; } // start a new word if: // (1) this character doesn't fall in the right place relative to // the end of the previous word (this places upper and lower // constraints on the position deltas along both the primary // and secondary axes), or // (2) this character overlaps the previous one (duplicated text), or // (3) the previous character was an overlap (we want each duplicated // character to be in a word by itself at this stage) if (curWord && curWord->len > 0) { base = sp = delta = 0; // make gcc happy switch (curWord->rot) { case 0: base = y1; sp = x1 - curWord->xMax; delta = x1 - curWord->edge[curWord->len - 1]; break; case 1: base = x1; sp = y1 - curWord->yMax; delta = y1 - curWord->edge[curWord->len - 1]; break; case 2: base = y1; sp = curWord->xMin - x1; delta = curWord->edge[curWord->len - 1] - x1; break; case 3: base = x1; sp = curWord->yMin - y1; delta = curWord->edge[curWord->len - 1] - y1; break; } overlap = fabs(delta) < dupMaxPriDelta * curWord->fontSize && fabs(base - curWord->base) < dupMaxSecDelta * curWord->fontSize; if (overlap || lastCharOverlap || sp < -minDupBreakOverlap * curWord->fontSize || sp > minWordBreakSpace * curWord->fontSize || fabs(base - curWord->base) > 0.5) { endWord(); } lastCharOverlap = overlap; } else { lastCharOverlap = gFalse; } if (uLen != 0) { // start a new word if needed if (!curWord) { beginWord(state, x, y); } // page rotation and/or transform matrices can cause text to be // drawn in reverse order -- in this case, swap the begin/end // coordinates and break text into individual chars if ((curWord->rot == 0 && w1 < 0) || (curWord->rot == 1 && h1 < 0) || (curWord->rot == 2 && w1 > 0) || (curWord->rot == 3 && h1 > 0)) { endWord(); beginWord(state, x + dx, y + dy); x1 += w1; y1 += h1; w1 = -w1; h1 = -h1; } // add the characters to the current word w1 /= uLen; h1 /= uLen; for (i = 0; i < uLen; ++i) { curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]); } } if (curWord) { curWord->charLen += nBytes; } charPos += nBytes;}void TextPage::endWord() { // This check is needed because Type 3 characters can contain // text-drawing operations (when TextPage is being used via // {X,Win}SplashOutputDev rather than TextOutputDev). if (nest > 0) { --nest; return; } if (curWord) { addWord(curWord); curWord = NULL; }}void TextPage::addWord(TextWord *word) { // throw away zero-length words -- they don't have valid xMin/xMax // values, and they're useless anyway if (word->len == 0) { delete word; return; } if (rawOrder) { if (rawLastWord) { rawLastWord->next = word; } else { rawWords = word; } rawLastWord = word; } else { pools[word->rot]->addWord(word); }}void TextPage::coalesce(GBool physLayout) { UnicodeMap *uMap; TextPool *pool; TextWord *word0, *word1, *word2; TextLine *line; TextBlock *blkList, *blkStack, *blk, *lastBlk, *blk0, *blk1; TextBlock **blkArray; TextFlow *flow, *lastFlow; int rot, poolMinBaseIdx, baseIdx, startBaseIdx; double minBase, maxBase, newMinBase, newMaxBase; double fontSize, colSpace1, colSpace2, lineSpace, intraLineSpace, blkSpace; GBool found; int count[4]; int lrCount; int firstBlkIdx, nBlocksLeft; int col1, col2; int i, j, n; if (rawOrder) { primaryRot = 0; primaryLR = gTrue; return; } uMap = globalParams->getTextEncoding(); blkList = NULL; lastBlk = NULL; nBlocks = 0; primaryRot = -1;#if 0 // for debugging printf("*** initial words ***\n"); for (rot = 0; rot < 4; ++rot) { pool = pools[rot]; for (baseIdx = pool->minBaseIdx; baseIdx <= pool->maxBaseIdx; ++baseIdx) { for (word0 = pool->getPool(baseIdx); word0; word0 = word0->next) { printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSize=%.2f rot=%d '", word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->base, word0->fontSize, rot*90); for (i = 0; i < word0->len; ++i) { fputc(word0->text[i] & 0xff, stdout); } printf("'\n"); } } } printf("\n");#endif //----- assemble the blocks //~ add an outer loop for writing mode (vertical text) // build blocks for each rotation value for (rot = 0; rot < 4; ++rot) { pool = pools[rot]; poolMinBaseIdx = pool->minBaseIdx; count[rot] = 0; // add blocks until no more words are left while (1) { // find the first non-empty line in the pool for (; poolMinBaseIdx <= pool->maxBaseIdx && !pool->getPool(poolMinBaseIdx); ++poolMinBaseIdx) ; if (poolMinBaseIdx > pool->maxBaseIdx) { break; } // look for the left-most word in the first four lines of the // pool -- this avoids starting with a superscript word startBaseIdx = poolMinBaseIdx; for (baseIdx = poolMinBaseIdx + 1; baseIdx < poolMinBaseIdx + 4 && baseIdx <= pool->maxBaseIdx; ++baseIdx) { if (!pool->getPool(baseIdx)) { continue; } if (pool->getPool(baseIdx)->primaryCmp(pool->getPool(startBaseIdx)) < 0) { startBaseIdx = baseIdx; } } // create a new block word0 = pool->getPool(startBaseIdx); pool->setPool(startBaseIdx, word0->next); word0->next = NULL; blk = new TextBlock(this, rot); blk->addWord(word0); fontSize = word0->fontSize; minBase = maxBase = word0->base; colSpace1 = minColSpacing1 * fontSize; colSpace2 = minColSpacing2 * fontSize; lineSpace = maxLineSpacingDelta * fontSize; intraLineSpace = maxIntraLineDelta * fontSize; // add words to the block do { found = gFalse; // look for words on the line above the current top edge of // the block newMinBase = minBase; for (baseIdx = pool->getBaseIdx(minBase); baseIdx >= pool->getBaseIdx(minBase - lineSpace); --baseIdx) { word0 = NULL; word1 = pool->getPool(baseIdx); while (word1) { if (word1->base < minBase && word1->base >= minBase - lineSpace && ((rot == 0 || rot == 2) ? (word1->xMin < blk->xMax && word1->xMax > blk->xMin) : (word1->yMin < blk->yMax && word1->yMax > blk->yMin)) && fabs(word1->fontSize - fontSize) < maxBlockFontSizeDelta1 * fontSize) { word2 = word1; if (word0) { word0->next = word1->next; } else { pool->setPool(baseIdx, word1->next); } word1 = word1->next; word2->next = NULL; blk->addWord(word2); found = gTrue; newMinBase = word2->base; } else { word0 = word1; word1 = word1->next; } } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -