📄 textoutputdev.cc
字号:
//========================================================================//// TextOutputDev.cc//// Copyright 1997-2002 Glyph & Cog, LLC////========================================================================#include <aconf.h>#ifdef USE_GCC_PRAGMAS#pragma implementation#endif#include <stdio.h>#include <stdlib.h>#include <stddef.h>#include <math.h>#include <ctype.h>#include "gmem.h"#include "GString.h"#include "GList.h"#include "config.h"#include "Error.h"#include "GlobalParams.h"#include "UnicodeMap.h"#include "GfxState.h"#include "TextOutputDev.h"#ifdef MACOS// needed for setting type/creator of MacOS files#include "ICSupport.h"#endif//------------------------------------------------------------------------// parameters//------------------------------------------------------------------------// Minium and maximum inter-word spacing (as a fraction of the average// character width).#define wordMinSpaceWidth 0.3#define wordMaxSpaceWidth 2.0// Default min and max inter-word spacing (when the average character// width is unknown).#define wordDefMinSpaceWidth 0.2#define wordDefMaxSpaceWidth 1.5// Max difference in x,y coordinates (as a fraction of the font size)// allowed for duplicated text (fake boldface, drop shadows) which is// to be discarded.#define dupMaxDeltaX 0.2#define dupMaxDeltaY 0.2// Min overlap (as a fraction of the font size) required for two// lines to be considered vertically overlapping.#define lineOverlapSlack 0.5// Max difference in baseline y coordinates (as a fraction of the font// size) allowed for words which are to be grouped into a line, not// including sub/superscripts.#define lineMaxBaselineDelta 0.1// Max ratio of font sizes allowed for words which are to be grouped// into a line, not including sub/superscripts.#define lineMaxFontSizeRatio 1.4// Min spacing (as a fraction of the font size) allowed between words// which are to be grouped into a line.#define lineMinDeltaX -0.5// Minimum vertical overlap (as a fraction of the font size) required// for superscript and subscript words.#define lineMinSuperscriptOverlap 0.3#define lineMinSubscriptOverlap 0.3// Min/max ratio of font sizes allowed for sub/superscripts compared to// the base text.#define lineMinSubscriptFontSizeRatio 0.4#define lineMaxSubscriptFontSizeRatio 1.01#define lineMinSuperscriptFontSizeRatio 0.4#define lineMaxSuperscriptFontSizeRatio 1.01// Max horizontal spacing (as a fraction of the font size) allowed// before sub/superscripts.#define lineMaxSubscriptDeltaX 0.2#define lineMaxSuperscriptDeltaX 0.2// Maximum vertical spacing (as a fraction of the font size) allowed// for lines which are to be grouped into a block.#define blkMaxSpacing 2.0// Max ratio of primary font sizes allowed for lines which are to be// grouped into a block.#define blkMaxFontSizeRatio 1.3// Min overlap (as a fraction of the font size) required for two// blocks to be considered vertically overlapping.#define blkOverlapSlack 0.5// Max vertical spacing (as a fraction of the font size) allowed// between blocks which are 'adjacent' when sorted by reading order.#define blkMaxSortSpacing 2.0// Max vertical offset (as a fraction of the font size) of the top and// bottom edges allowed for blocks which are to be grouped into a// flow.#define flowMaxDeltaY 1.0//------------------------------------------------------------------------// TextFontInfo//------------------------------------------------------------------------TextFontInfo::TextFontInfo(GfxState *state) { double *textMat; double t1, t2, avgWidth, w; int n, i; gfxFont = state->getFont(); textMat = state->getTextMat(); horizScaling = state->getHorizScaling(); if ((t1 = fabs(textMat[0])) > 0.01 && (t2 = fabs(textMat[3])) > 0.01) { horizScaling *= t1 / t2; } if (!gfxFont) { minSpaceWidth = horizScaling * wordDefMinSpaceWidth; maxSpaceWidth = horizScaling * wordDefMaxSpaceWidth; } else if (gfxFont->isCIDFont()) { //~ handle 16-bit fonts minSpaceWidth = horizScaling * wordDefMinSpaceWidth; maxSpaceWidth = horizScaling * wordDefMaxSpaceWidth; } else { avgWidth = 0; n = 0; for (i = 0; i < 256; ++i) { w = ((Gfx8BitFont *)gfxFont)->getWidth(i); if (w > 0) { avgWidth += w; ++n; } } avgWidth /= n; minSpaceWidth = horizScaling * wordMinSpaceWidth * avgWidth; maxSpaceWidth = horizScaling * wordMaxSpaceWidth * avgWidth; }}TextFontInfo::~TextFontInfo() {}GBool TextFontInfo::matches(GfxState *state) { double *textMat; double t1, t2, h; textMat = state->getTextMat(); h = state->getHorizScaling(); if ((t1 = fabs(textMat[0])) > 0.01 && (t2 = fabs(textMat[3])) > 0.01) { h *= t1 / t2; } return state->getFont() == gfxFont && fabs(h - horizScaling) < 0.01;}//------------------------------------------------------------------------// TextWord//------------------------------------------------------------------------TextWord::TextWord(GfxState *state, double x0, double y0, TextFontInfo *fontA, double fontSizeA) { GfxFont *gfxFont; double x, y; font = fontA; fontSize = fontSizeA; state->transform(x0, y0, &x, &y); if ((gfxFont = font->gfxFont)) { yMin = y - gfxFont->getAscent() * fontSize; yMax = y - gfxFont->getDescent() * fontSize; } else { // this means that the PDF file draws text without a current font, // which should never happen yMin = y - 0.95 * fontSize; yMax = y + 0.35 * fontSize; } if (yMin == yMax) { // this is a sanity check for a case that shouldn't happen -- but // if it does happen, we want to avoid dividing by zero later yMin = y; yMax = y + 1; } yBase = y; text = NULL; xRight = NULL; len = size = 0; spaceAfter = gFalse; next = NULL;}TextWord::~TextWord() { gfree(text); gfree(xRight);}void TextWord::addChar(GfxState *state, double x, double y, double dx, double dy, Unicode u) { if (len == size) { size += 16; text = (Unicode *)grealloc(text, size * sizeof(Unicode)); xRight = (double *)grealloc(xRight, size * sizeof(double)); } text[len] = u; if (len == 0) { xMin = x; } xMax = xRight[len] = x + dx; ++len;}// Returns true if <this> comes before <word2> in xy order.GBool TextWord::xyBefore(TextWord *word2) { return xMin < word2->xMin || (xMin == word2->xMin && yMin < word2->yMin);}// Merge another word onto the end of this one.void TextWord::merge(TextWord *word2) { int i; xMax = word2->xMax; if (word2->yMin < yMin) { yMin = word2->yMin; } if (word2->yMax > yMax) { yMax = word2->yMax; } if (len + word2->len > size) { size = len + word2->len; text = (Unicode *)grealloc(text, size * sizeof(Unicode)); xRight = (double *)grealloc(xRight, size * sizeof(double)); } for (i = 0; i < word2->len; ++i) { text[len + i] = word2->text[i]; xRight[len + i] = word2->xRight[i]; } len += word2->len;}//------------------------------------------------------------------------// TextLine//------------------------------------------------------------------------TextLine::TextLine() { words = NULL; text = NULL; xRight = NULL; col = NULL; len = 0; hyphenated = gFalse; pageNext = NULL; next = NULL; flowNext = NULL;}TextLine::~TextLine() { TextWord *w1, *w2; for (w1 = words; w1; w1 = w2) { w2 = w1->next; delete w1; } gfree(text); gfree(xRight); gfree(col);}// Returns true if <this> comes before <line2> in yx order, allowing// slack for vertically overlapping lines.GBool TextLine::yxBefore(TextLine *line2) { double dy; dy = lineOverlapSlack * fontSize; // non-overlapping case if (line2->yMin > yMax - dy || line2->yMax < yMin + dy) { return yMin < line2->yMin || (yMin == line2->yMin && xMin < line2->xMin); } // overlapping case return xMin < line2->xMin;}// Merge another line's words onto the end of this line.void TextLine::merge(TextLine *line2) { TextWord *word; int newLen, i; xMax = line2->xMax; if (line2->yMin < yMin) { yMin = line2->yMin; } if (line2->yMax > yMax) { yMax = line2->yMax; } xSpaceR = line2->xSpaceR; for (word = words; word->next; word = word->next) ; word->spaceAfter = gTrue; word->next = line2->words; line2->words = NULL; newLen = len + 1 + line2->len; text = (Unicode *)grealloc(text, newLen * sizeof(Unicode)); xRight = (double *)grealloc(xRight, newLen * sizeof(double)); text[len] = (Unicode)0x0020; xRight[len] = line2->xMin; for (i = 0; i < line2->len; ++i) { text[len + 1 + i] = line2->text[i]; xRight[len + 1 + i] = line2->xRight[i]; } len = newLen; convertedLen += line2->convertedLen; hyphenated = line2->hyphenated;}//------------------------------------------------------------------------// TextBlock//------------------------------------------------------------------------TextBlock::TextBlock() { lines = NULL; next = NULL;}TextBlock::~TextBlock() { TextLine *l1, *l2; for (l1 = lines; l1; l1 = l2) { l2 = l1->next; delete l1; }}// Returns true if <this> comes before <blk2> in xy order, allowing// slack for vertically overlapping blocks.GBool TextBlock::yxBefore(TextBlock *blk2) { double dy; dy = blkOverlapSlack * lines->fontSize; // non-overlapping case if (blk2->yMin > yMax - dy || blk2->yMax < yMin + dy) { return yMin < blk2->yMin || (yMin == blk2->yMin && xMin < blk2->xMin); } // overlapping case return xMin < blk2->xMin;}// Merge another block's line onto the right of this one.void TextBlock::mergeRight(TextBlock *blk2) { lines->merge(blk2->lines); xMax = lines->xMax; yMin = lines->yMin; yMax = lines->yMax; xSpaceR = lines->xSpaceR;}// Merge another block's lines onto the bottom of this block.void TextBlock::mergeBelow(TextBlock *blk2) { TextLine *line; if (blk2->xMin < xMin) { xMin = blk2->xMin; } if (blk2->xMax > xMax) { xMax = blk2->xMax; } yMax = blk2->yMax; if (blk2->xSpaceL > xSpaceL) { xSpaceL = blk2->xSpaceL; } if (blk2->xSpaceR < xSpaceR) { xSpaceR = blk2->xSpaceR; } if (blk2->maxFontSize > maxFontSize) { maxFontSize = blk2->maxFontSize; } for (line = lines; line->next; line = line->next) ; line->next = line->flowNext = blk2->lines; blk2->lines = NULL;}//------------------------------------------------------------------------// TextFlow//------------------------------------------------------------------------TextFlow::TextFlow() { blocks = NULL; next = NULL;}TextFlow::~TextFlow() { TextBlock *b1, *b2; for (b1 = blocks; b1; b1 = b2) { b2 = b1->next; delete b1; }}//------------------------------------------------------------------------// TextPage//------------------------------------------------------------------------TextPage::TextPage(GBool rawOrderA) { rawOrder = rawOrderA; curWord = NULL; font = NULL; fontSize = 0; nest = 0; nTinyChars = 0; words = wordPtr = NULL; lines = NULL; flows = NULL; fonts = new GList();}TextPage::~TextPage() { clear(); delete fonts;}void TextPage::updateFont(GfxState *state) { GfxFont *gfxFont; double *fm; char *name; int code, mCode, letterCode, anyCode; double w; int i; // get the font info object font = NULL; for (i = 0; i < fonts->getLength(); ++i) { font = (TextFontInfo *)fonts->get(i); if (font->matches(state)) { break; } font = NULL; } if (!font) { font = new TextFontInfo(state); fonts->append(font); } // adjust the font size gfxFont = state->getFont(); fontSize = state->getTransformedFontSize(); if (gfxFont && gfxFont->getType() == fontType3) { // This is a hack which makes it possible to deal with some Type 3 // fonts. The problem is that it's impossible to know what the // base coordinate system used in the font is without actually // rendering the font. This code tries to guess by looking at the // width of the character 'm' (which breaks if the font is a // subset that doesn't contain 'm'). mCode = letterCode = anyCode = -1; for (code = 0; code < 256; ++code) { name = ((Gfx8BitFont *)gfxFont)->getCharName(code); if (name && name[0] == 'm' && name[1] == '\0') { mCode = code; } if (letterCode < 0 && name && name[1] == '\0' && ((name[0] >= 'A' && name[0] <= 'Z') || (name[0] >= 'a' && name[0] <= 'z'))) { letterCode = code; } if (anyCode < 0 && name && ((Gfx8BitFont *)gfxFont)->getWidth(code) > 0) { anyCode = code; } } if (mCode >= 0 && (w = ((Gfx8BitFont *)gfxFont)->getWidth(mCode)) > 0) { // 0.6 is a generic average 'm' width -- yes, this is a hack fontSize *= w / 0.6; } else if (letterCode >= 0 && (w = ((Gfx8BitFont *)gfxFont)->getWidth(letterCode)) > 0) { // even more of a hack: 0.5 is a generic letter width fontSize *= w / 0.5; } else if (anyCode >= 0 && (w = ((Gfx8BitFont *)gfxFont)->getWidth(anyCode)) > 0) { // better than nothing: 0.5 is a generic character width fontSize *= w / 0.5; } fm = gfxFont->getFontMatrix(); if (fm[0] != 0) { fontSize *= fabs(fm[3] / fm[0]); } }}void TextPage::beginWord(GfxState *state, double x0, double y0) { // This check is needed because Type 3 characters can contain // text-drawing operations (when TextPage is being used via // XOutputDev rather than TextOutputDev). if (curWord) { ++nest; return; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -