📄 textoutputdev.cc
字号:
//========================================================================//// TextOutputDev.cc//// Copyright 1997-2003 Glyph & Cog, LLC////========================================================================#include <aconf.h>#ifdef USE_GCC_PRAGMAS#pragma implementation#endif#include <stdio.h>#include <stdlib.h>#include <stddef.h>#include <math.h>#include <ctype.h>#ifdef WIN32#include <fcntl.h> // for O_BINARY#include <io.h> // for setmode#endif#include "gmem.h"#include "GString.h"#include "GList.h"#include "config.h"#include "Error.h"#include "GlobalParams.h"#include "UnicodeMap.h"#include "UnicodeTypeTable.h"#include "GfxState.h"#include "Link.h"#include "TextOutputDev.h"#ifdef MACOS// needed for setting type/creator of MacOS files#include "ICSupport.h"#endif//------------------------------------------------------------------------// parameters//------------------------------------------------------------------------// Each bucket in a text pool includes baselines within a range of// this many points.#define textPoolStep 4// Inter-character space width which will cause addChar to start a new// word.#define minWordBreakSpace 0.1// Negative inter-character space width, i.e., overlap, which will// cause addChar to start a new word.#define minDupBreakOverlap 0.2// Max distance between baselines of two lines within a block, as a// fraction of the font size.#define maxLineSpacingDelta 1.5// Max difference in primary font sizes on two lines in the same// block. Delta1 is used when examining new lines above and below the// current block; delta2 is used when examining text that overlaps the// current block; delta3 is used when examining text to the left and// right of the current block.#define maxBlockFontSizeDelta1 0.05#define maxBlockFontSizeDelta2 0.6#define maxBlockFontSizeDelta3 0.2// Max difference in font sizes inside a word.#define maxWordFontSizeDelta 0.05// Maximum distance between baselines of two words on the same line,// e.g., distance between subscript or superscript and the primary// baseline, as a fraction of the font size.#define maxIntraLineDelta 0.5// Minimum inter-word spacing, as a fraction of the font size. (Only// used for raw ordering.)#define minWordSpacing 0.15// Maximum inter-word spacing, as a fraction of the font size.#define maxWordSpacing 1.5// Maximum horizontal spacing which will allow a word to be pulled// into a block.#define minColSpacing1 0.3// Minimum spacing between columns, as a fraction of the font size.#define minColSpacing2 1.0// Maximum vertical spacing between blocks within a flow, as a// multiple of the font size.#define maxBlockSpacing 2.5// Minimum spacing between characters within a word, as a fraction of// the font size.#define minCharSpacing -0.2// Maximum spacing between characters within a word, as a fraction of// the font size, when there is no obvious extra-wide character// spacing.#define maxCharSpacing 0.03// When extra-wide character spacing is detected, the inter-character// space threshold is set to the minimum inter-character space// multiplied by this constant.#define maxWideCharSpacingMul 1.3// Upper limit on spacing between characters in a word.#define maxWideCharSpacing 0.4// Max difference in primary,secondary coordinates (as a fraction of// the font size) allowed for duplicated text (fake boldface, drop// shadows) which is to be discarded.#define dupMaxPriDelta 0.1#define dupMaxSecDelta 0.2// Max width of underlines (in points).#define maxUnderlineWidth 3// Min distance between baseline and underline (in points).//~ this should be font-size-dependent#define minUnderlineGap -2// Max distance between baseline and underline (in points).//~ this should be font-size-dependent#define maxUnderlineGap 4// Max horizontal distance between edge of word and start of underline// (in points).//~ this should be font-size-dependent#define underlineSlack 1// Max distance between edge of text and edge of link border#define hyperlinkSlack 2//------------------------------------------------------------------------// TextUnderline//------------------------------------------------------------------------class TextUnderline {public: TextUnderline(double x0A, double y0A, double x1A, double y1A) { x0 = x0A; y0 = y0A; x1 = x1A; y1 = y1A; horiz = y0 == y1; } ~TextUnderline() {} double x0, y0, x1, y1; GBool horiz;};//------------------------------------------------------------------------// TextLink//------------------------------------------------------------------------class TextLink {public: TextLink(int xMinA, int yMinA, int xMaxA, int yMaxA, Link *linkA) { xMin = xMinA; yMin = yMinA; xMax = xMaxA; yMax = yMaxA; link = linkA; } ~TextLink() {} int xMin, yMin, xMax, yMax; Link *link;};//------------------------------------------------------------------------// TextFontInfo//------------------------------------------------------------------------TextFontInfo::TextFontInfo(GfxState *state) { gfxFont = state->getFont();#if TEXTOUT_WORD_LIST fontName = (gfxFont && gfxFont->getOrigName()) ? gfxFont->getOrigName()->copy() : (GString *)NULL; flags = gfxFont ? gfxFont->getFlags() : 0;#endif}TextFontInfo::~TextFontInfo() {#if TEXTOUT_WORD_LIST if (fontName) { delete fontName; }#endif}GBool TextFontInfo::matches(GfxState *state) { return state->getFont() == gfxFont;}//------------------------------------------------------------------------// TextWord//------------------------------------------------------------------------TextWord::TextWord(GfxState *state, int rotA, double x0, double y0, int charPosA, TextFontInfo *fontA, double fontSizeA) { GfxFont *gfxFont; double x, y, ascent, descent; rot = rotA; charPos = charPosA; charLen = 0; font = fontA; fontSize = fontSizeA; state->transform(x0, y0, &x, &y); if ((gfxFont = font->gfxFont)) { ascent = gfxFont->getAscent() * fontSize; descent = gfxFont->getDescent() * fontSize; } else { // this means that the PDF file draws text without a current font, // which should never happen ascent = 0.95 * fontSize; descent = -0.35 * fontSize; } switch (rot) { case 0: yMin = y - ascent; yMax = y - descent; if (yMin == yMax) { // this is a sanity check for a case that shouldn't happen -- but // if it does happen, we want to avoid dividing by zero later yMin = y; yMax = y + 1; } base = y; break; case 1: xMin = x + descent; xMax = x + ascent; if (xMin == xMax) { // this is a sanity check for a case that shouldn't happen -- but // if it does happen, we want to avoid dividing by zero later xMin = x; xMax = x + 1; } base = x; break; case 2: yMin = y + descent; yMax = y + ascent; if (yMin == yMax) { // this is a sanity check for a case that shouldn't happen -- but // if it does happen, we want to avoid dividing by zero later yMin = y; yMax = y + 1; } base = y; break; case 3: xMin = x - ascent; xMax = x - descent; if (xMin == xMax) { // this is a sanity check for a case that shouldn't happen -- but // if it does happen, we want to avoid dividing by zero later xMin = x; xMax = x + 1; } base = x; break; } text = NULL; edge = NULL; len = size = 0; spaceAfter = gFalse; next = NULL;#if TEXTOUT_WORD_LIST GfxRGB rgb; if ((state->getRender() & 3) == 1) { state->getStrokeRGB(&rgb); } else { state->getFillRGB(&rgb); } colorR = colToDbl(rgb.r); colorG = colToDbl(rgb.g); colorB = colToDbl(rgb.b);#endif underlined = gFalse; link = NULL;}TextWord::~TextWord() { gfree(text); gfree(edge);}void TextWord::addChar(GfxState *state, double x, double y, double dx, double dy, Unicode u) { if (len == size) { size += 16; text = (Unicode *)greallocn(text, size, sizeof(Unicode)); edge = (double *)greallocn(edge, size + 1, sizeof(double)); } text[len] = u; switch (rot) { case 0: if (len == 0) { xMin = x; } edge[len] = x; xMax = edge[len+1] = x + dx; break; case 1: if (len == 0) { yMin = y; } edge[len] = y; yMax = edge[len+1] = y + dy; break; case 2: if (len == 0) { xMax = x; } edge[len] = x; xMin = edge[len+1] = x + dx; break; case 3: if (len == 0) { yMax = y; } edge[len] = y; yMin = edge[len+1] = y + dy; break; } ++len;}void TextWord::merge(TextWord *word) { int i; if (word->xMin < xMin) { xMin = word->xMin; } if (word->yMin < yMin) { yMin = word->yMin; } if (word->xMax > xMax) { xMax = word->xMax; } if (word->yMax > yMax) { yMax = word->yMax; } if (len + word->len > size) { size = len + word->len; text = (Unicode *)greallocn(text, size, sizeof(Unicode)); edge = (double *)greallocn(edge, size + 1, sizeof(double)); } for (i = 0; i < word->len; ++i) { text[len + i] = word->text[i]; edge[len + i] = word->edge[i]; } edge[len + word->len] = word->edge[word->len]; len += word->len; charLen += word->charLen;}inline int TextWord::primaryCmp(TextWord *word) { double cmp; cmp = 0; // make gcc happy switch (rot) { case 0: cmp = xMin - word->xMin; break; case 1: cmp = yMin - word->yMin; break; case 2: cmp = word->xMax - xMax; break; case 3: cmp = word->yMax - yMax; break; } return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}double TextWord::primaryDelta(TextWord *word) { double delta; delta = 0; // make gcc happy switch (rot) { case 0: delta = word->xMin - xMax; break; case 1: delta = word->yMin - yMax; break; case 2: delta = xMin - word->xMax; break; case 3: delta = yMin - word->yMax; break; } return delta;}int TextWord::cmpYX(const void *p1, const void *p2) { TextWord *word1 = *(TextWord **)p1; TextWord *word2 = *(TextWord **)p2; double cmp; cmp = word1->yMin - word2->yMin; if (cmp == 0) { cmp = word1->xMin - word2->xMin; } return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}#if TEXTOUT_WORD_LISTGString *TextWord::getText() { GString *s; UnicodeMap *uMap; char buf[8]; int n, i; s = new GString(); if (!(uMap = globalParams->getTextEncoding())) { return s; } for (i = 0; i < len; ++i) { n = uMap->mapUnicode(text[i], buf, sizeof(buf)); s->append(buf, n); } uMap->decRefCnt(); return s;}void TextWord::getCharBBox(int charIdx, double *xMinA, double *yMinA, double *xMaxA, double *yMaxA) { if (charIdx < 0 || charIdx >= len) { return; } switch (rot) { case 0: *xMinA = edge[charIdx]; *xMaxA = edge[charIdx + 1]; *yMinA = yMin; *yMaxA = yMax; break; case 1: *xMinA = xMin; *xMaxA = xMax; *yMinA = edge[charIdx]; *yMaxA = edge[charIdx + 1]; break; case 2: *xMinA = edge[charIdx + 1]; *xMaxA = edge[charIdx]; *yMinA = yMin; *yMaxA = yMax; break; case 3: *xMinA = xMin; *xMaxA = xMax; *yMinA = edge[charIdx + 1]; *yMaxA = edge[charIdx]; break; }}#endif // TEXTOUT_WORD_LIST//------------------------------------------------------------------------// TextPool//------------------------------------------------------------------------TextPool::TextPool() { minBaseIdx = 0; maxBaseIdx = -1; pool = NULL; cursor = NULL; cursorBaseIdx = -1;}TextPool::~TextPool() { int baseIdx; TextWord *word, *word2; for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) { for (word = pool[baseIdx - minBaseIdx]; word; word = word2) { word2 = word->next; delete word; } } gfree(pool);}int TextPool::getBaseIdx(double base) { int baseIdx; baseIdx = (int)(base / textPoolStep); if (baseIdx < minBaseIdx) { return minBaseIdx; } if (baseIdx > maxBaseIdx) { return maxBaseIdx; } return baseIdx;}void TextPool::addWord(TextWord *word) { TextWord **newPool; int wordBaseIdx, newMinBaseIdx, newMaxBaseIdx, baseIdx; TextWord *w0, *w1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -