📄 textoutputdev.cc
字号:
delta = xMin - blk->xMax; break; case 2: delta = yMin - blk->yMax; break; case 3: delta = blk->xMin - xMax; break; } return delta;}GBool TextBlock::isBelow(TextBlock *blk) { GBool below; below = gFalse; // make gcc happy switch (page->primaryRot) { case 0: below = xMin >= blk->priMin && xMax <= blk->priMax && yMin > blk->yMin; break; case 1: below = yMin >= blk->priMin && yMax <= blk->priMax && xMax < blk->xMax; break; case 2: below = xMin >= blk->priMin && xMax <= blk->priMax && yMax < blk->yMax; break; case 3: below = yMin >= blk->priMin && yMax <= blk->priMax && xMin > blk->xMin; break; } return below;}//------------------------------------------------------------------------// TextFlow//------------------------------------------------------------------------TextFlow::TextFlow(TextPage *pageA, TextBlock *blk) { page = pageA; xMin = blk->xMin; xMax = blk->xMax; yMin = blk->yMin; yMax = blk->yMax; priMin = blk->priMin; priMax = blk->priMax; blocks = lastBlk = blk; next = NULL;}TextFlow::~TextFlow() { TextBlock *blk; while (blocks) { blk = blocks; blocks = blocks->next; delete blk; }}void TextFlow::addBlock(TextBlock *blk) { if (lastBlk) { lastBlk->next = blk; } else { blocks = blk; } lastBlk = blk; if (blk->xMin < xMin) { xMin = blk->xMin; } if (blk->xMax > xMax) { xMax = blk->xMax; } if (blk->yMin < yMin) { yMin = blk->yMin; } if (blk->yMax > yMax) { yMax = blk->yMax; }}GBool TextFlow::blockFits(TextBlock *blk, TextBlock *prevBlk) { GBool fits; // lower blocks must use smaller fonts if (blk->lines->words->fontSize > lastBlk->lines->words->fontSize) { return gFalse; } fits = gFalse; // make gcc happy switch (page->primaryRot) { case 0: fits = blk->xMin >= priMin && blk->xMax <= priMax; break; case 1: fits = blk->yMin >= priMin && blk->yMax <= priMax; break; case 2: fits = blk->xMin >= priMin && blk->xMax <= priMax; break; case 3: fits = blk->yMin >= priMin && blk->yMax <= priMax; break; } return fits;}#if TEXTOUT_WORD_LIST//------------------------------------------------------------------------// TextWordList//------------------------------------------------------------------------TextWordList::TextWordList(TextPage *text, GBool physLayout) { TextFlow *flow; TextBlock *blk; TextLine *line; TextWord *word; TextWord **wordArray; int nWords, i; words = new GList(); if (text->rawOrder) { for (word = text->rawWords; word; word = word->next) { words->append(word); } } else if (physLayout) { // this is inefficient, but it's also the least useful of these // three cases nWords = 0; for (flow = text->flows; flow; flow = flow->next) { for (blk = flow->blocks; blk; blk = blk->next) { for (line = blk->lines; line; line = line->next) { for (word = line->words; word; word = word->next) { ++nWords; } } } } wordArray = (TextWord **)gmallocn(nWords, sizeof(TextWord *)); i = 0; for (flow = text->flows; flow; flow = flow->next) { for (blk = flow->blocks; blk; blk = blk->next) { for (line = blk->lines; line; line = line->next) { for (word = line->words; word; word = word->next) { wordArray[i++] = word; } } } } qsort(wordArray, nWords, sizeof(TextWord *), &TextWord::cmpYX); for (i = 0; i < nWords; ++i) { words->append(wordArray[i]); } gfree(wordArray); } else { for (flow = text->flows; flow; flow = flow->next) { for (blk = flow->blocks; blk; blk = blk->next) { for (line = blk->lines; line; line = line->next) { for (word = line->words; word; word = word->next) { words->append(word); } } } } }}TextWordList::~TextWordList() { delete words;}int TextWordList::getLength() { return words->getLength();}TextWord *TextWordList::get(int idx) { if (idx < 0 || idx >= words->getLength()) { return NULL; } return (TextWord *)words->get(idx);}#endif // TEXTOUT_WORD_LIST//------------------------------------------------------------------------// TextPage//------------------------------------------------------------------------TextPage::TextPage(GBool rawOrderA) { int rot; rawOrder = rawOrderA; curWord = NULL; charPos = 0; curFont = NULL; curFontSize = 0; nest = 0; nTinyChars = 0; lastCharOverlap = gFalse; if (!rawOrder) { for (rot = 0; rot < 4; ++rot) { pools[rot] = new TextPool(); } } flows = NULL; blocks = NULL; rawWords = NULL; rawLastWord = NULL; fonts = new GList(); lastFindXMin = lastFindYMin = 0; haveLastFind = gFalse; underlines = new GList(); links = new GList();}TextPage::~TextPage() { int rot; clear(); if (!rawOrder) { for (rot = 0; rot < 4; ++rot) { delete pools[rot]; } } delete fonts; deleteGList(underlines, TextUnderline); deleteGList(links, TextLink);}void TextPage::startPage(GfxState *state) { clear(); if (state) { pageWidth = state->getPageWidth(); pageHeight = state->getPageHeight(); } else { pageWidth = pageHeight = 0; }}void TextPage::endPage() { if (curWord) { endWord(); }}void TextPage::clear() { int rot; TextFlow *flow; TextWord *word; if (curWord) { delete curWord; curWord = NULL; } if (rawOrder) { while (rawWords) { word = rawWords; rawWords = rawWords->next; delete word; } } else { for (rot = 0; rot < 4; ++rot) { delete pools[rot]; } while (flows) { flow = flows; flows = flows->next; delete flow; } gfree(blocks); } deleteGList(fonts, TextFontInfo); curWord = NULL; charPos = 0; curFont = NULL; curFontSize = 0; nest = 0; nTinyChars = 0; if (!rawOrder) { for (rot = 0; rot < 4; ++rot) { pools[rot] = new TextPool(); } } flows = NULL; blocks = NULL; rawWords = NULL; rawLastWord = NULL; fonts = new GList();}void TextPage::updateFont(GfxState *state) { GfxFont *gfxFont; double *fm; char *name; int code, mCode, letterCode, anyCode; double w; int i; // get the font info object curFont = NULL; for (i = 0; i < fonts->getLength(); ++i) { curFont = (TextFontInfo *)fonts->get(i); if (curFont->matches(state)) { break; } curFont = NULL; } if (!curFont) { curFont = new TextFontInfo(state); fonts->append(curFont); } // adjust the font size gfxFont = state->getFont(); curFontSize = state->getTransformedFontSize(); if (gfxFont && gfxFont->getType() == fontType3) { // This is a hack which makes it possible to deal with some Type 3 // fonts. The problem is that it's impossible to know what the // base coordinate system used in the font is without actually // rendering the font. This code tries to guess by looking at the // width of the character 'm' (which breaks if the font is a // subset that doesn't contain 'm'). mCode = letterCode = anyCode = -1; for (code = 0; code < 256; ++code) { name = ((Gfx8BitFont *)gfxFont)->getCharName(code); if (name && name[0] == 'm' && name[1] == '\0') { mCode = code; } if (letterCode < 0 && name && name[1] == '\0' && ((name[0] >= 'A' && name[0] <= 'Z') || (name[0] >= 'a' && name[0] <= 'z'))) { letterCode = code; } if (anyCode < 0 && name && ((Gfx8BitFont *)gfxFont)->getWidth(code) > 0) { anyCode = code; } } if (mCode >= 0 && (w = ((Gfx8BitFont *)gfxFont)->getWidth(mCode)) > 0) { // 0.6 is a generic average 'm' width -- yes, this is a hack curFontSize *= w / 0.6; } else if (letterCode >= 0 && (w = ((Gfx8BitFont *)gfxFont)->getWidth(letterCode)) > 0) { // even more of a hack: 0.5 is a generic letter width curFontSize *= w / 0.5; } else if (anyCode >= 0 && (w = ((Gfx8BitFont *)gfxFont)->getWidth(anyCode)) > 0) { // better than nothing: 0.5 is a generic character width curFontSize *= w / 0.5; } fm = gfxFont->getFontMatrix(); if (fm[0] != 0) { curFontSize *= fabs(fm[3] / fm[0]); } }}void TextPage::beginWord(GfxState *state, double x0, double y0) { double *fontm; double m[4], m2[4]; int rot; // This check is needed because Type 3 characters can contain // text-drawing operations (when TextPage is being used via // {X,Win}SplashOutputDev rather than TextOutputDev). if (curWord) { ++nest; return; } // compute the rotation state->getFontTransMat(&m[0], &m[1], &m[2], &m[3]); if (state->getFont()->getType() == fontType3) { fontm = state->getFont()->getFontMatrix(); m2[0] = fontm[0] * m[0] + fontm[1] * m[2]; m2[1] = fontm[0] * m[1] + fontm[1] * m[3]; m2[2] = fontm[2] * m[0] + fontm[3] * m[2]; m2[3] = fontm[2] * m[1] + fontm[3] * m[3]; m[0] = m2[0]; m[1] = m2[1]; m[2] = m2[2]; m[3] = m2[3]; } if (fabs(m[0] * m[3]) > fabs(m[1] * m[2])) { rot = (m[3] < 0) ? 0 : 2; } else { rot = (m[2] > 0) ? 1 : 3; } curWord = new TextWord(state, rot, x0, y0, charPos, curFont, curFontSize);}void TextPage::addChar(GfxState *state, double x, double y, double dx, double dy, CharCode c, int nBytes, Unicode *u, int uLen) { double x1, y1, w1, h1, dx2, dy2, base, sp, delta; GBool overlap; int i; // subtract char and word spacing from the dx,dy values sp = state->getCharSpace(); if (c == (CharCode)0x20) { sp += state->getWordSpace(); } state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2); dx -= dx2; dy -= dy2; state->transformDelta(dx, dy, &w1, &h1); // throw away chars that aren't inside the page bounds // (and also do a sanity check on the character size) state->transform(x, y, &x1, &y1); if (x1 + w1 < 0 || x1 > pageWidth || y1 + h1 < 0 || y1 > pageHeight || w1 > pageWidth || h1 > pageHeight) { charPos += nBytes; return; } // check the tiny chars limit if (!globalParams->getTextKeepTinyChars() && fabs(w1) < 3 && fabs(h1) < 3) { if (++nTinyChars > 50000) { charPos += nBytes; return; } } // break words at space character if (uLen == 1 && u[0] == (Unicode)0x20) { if (curWord) { ++curWord->charLen; } charPos += nBytes; endWord(); return; } // start a new word if: // (1) this character doesn't fall in the right place relative to // the end of the previous word (this places upper and lower // constraints on the position deltas along both the primary // and secondary axes), or // (2) this character overlaps the previous one (duplicated text), or // (3) the previous character was an overlap (we want each duplicated // character to be in a word by itself at this stage), // (4) the font size has changed if (curWord && curWord->len > 0) { base = sp = delta = 0; // make gcc happy switch (curWord->rot) { case 0: base = y1; sp = x1 - curWord->xMax; delta = x1 - curWord->edge[curWord->len - 1]; break; case 1: base = x1; sp = y1 - curWord->yMax; delta = y1 - curWord->edge[curWord->len - 1]; break; case 2: base = y1; sp = curWord->xMin - x1; delta = curWord->edge[curWord->len - 1] - x1; break; case 3: base = x1; sp = curWord->yMin - y1; delta = curWord->edge[curWord->len - 1] - y1; break; } overlap = fabs(delta) < dupMaxPriDelta * curWord->fontSize && fabs(base - curWord->base) < dupMaxSecDelta * curWord->fontSize; if (overlap || lastCharOverlap || sp < -minDupBreakOverlap * curWord->fontSize || sp > minWordBreakSpace * curWord->fontSize || fabs(base - curWord->base) > 0.5 || curFontSize != curWord->fontSize) { endWord(); } lastCharOverlap = overlap; } else { lastCharOverlap = gFalse; } if (uLen != 0) { // start a new word if needed if (!curWord) { beginWord(state, x, y); } // page rotation and/or transform matrices can cause text to be // drawn in reverse order -- in this case, swap the begin/end // coordinates and break text into individual chars if ((curWord->rot == 0 && w1 < 0) || (curWord->rot == 1 && h1 < 0) || (curWord->rot == 2 && w1 > 0) || (curWord->rot == 3 && h1 > 0)) { endWord(); beginWord(state, x + dx, y + dy); x1 += w1; y1 += h1; w1 = -w1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -