📄 textoutputdev.cc
字号:
//========================================================================//// TextOutputDev.cc//// Copyright 1997-2003 Glyph & Cog, LLC////========================================================================#include <aconf.h>#ifdef USE_GCC_PRAGMAS#pragma implementation#endif#include <stdio.h>#include <stdlib.h>#include <stddef.h>#include <math.h>#include <ctype.h>#ifdef WIN32#include <fcntl.h> // for O_BINARY#include <io.h> // for setmode#endif#include "gmem.h"#include "GString.h"#include "GList.h"#include "config.h"#include "Error.h"#include "GlobalParams.h"#include "UnicodeMap.h"#include "UnicodeTypeTable.h"#include "GfxState.h"#include "TextOutputDev.h"#ifdef MACOS// needed for setting type/creator of MacOS files#include "ICSupport.h"#endif//------------------------------------------------------------------------// parameters//------------------------------------------------------------------------// Each bucket in a text pool includes baselines within a range of// this many points.#define textPoolStep 4// Inter-character space width which will cause addChar to start a new// word.#define minWordBreakSpace 0.1// Negative inter-character space width, i.e., overlap, which will// cause addChar to start a new word.#define minDupBreakOverlap 0.2// Max distance between baselines of two lines within a block, as a// fraction of the font size.#define maxLineSpacingDelta 1.5// Max difference in primary font sizes on two lines in the same// block. Delta1 is used when examining new lines above and below the// current block; delta2 is used when examining text that overlaps the// current block; delta3 is used when examining text to the left and// right of the current block.#define maxBlockFontSizeDelta1 0.05#define maxBlockFontSizeDelta2 0.6#define maxBlockFontSizeDelta3 0.2// Max difference in font sizes inside a word.#define maxWordFontSizeDelta 0.05// Maximum distance between baselines of two words on the same line,// e.g., distance between subscript or superscript and the primary// baseline, as a fraction of the font size.#define maxIntraLineDelta 0.5// Minimum inter-word spacing, as a fraction of the font size. (Only// used for raw ordering.)#define minWordSpacing 0.15// Maximum inter-word spacing, as a fraction of the font size.#define maxWordSpacing 1.5// Maximum horizontal spacing which will allow a word to be pulled// into a block.#define minColSpacing1 0.3// Minimum spacing between columns, as a fraction of the font size.#define minColSpacing2 1.0// Maximum vertical spacing between blocks within a flow, as a// multiple of the font size.#define maxBlockSpacing 2.5// Minimum spacing between characters within a word, as a fraction of// the font size.#define minCharSpacing -0.2// Maximum spacing between characters within a word, as a fraction of// the font size, when there is no obvious extra-wide character// spacing.#define maxCharSpacing 0.03// When extra-wide character spacing is detected, the inter-character// space threshold is set to the minimum inter-character space// multiplied by this constant.#define maxWideCharSpacingMul 1.3// Max difference in primary,secondary coordinates (as a fraction of// the font size) allowed for duplicated text (fake boldface, drop// shadows) which is to be discarded.#define dupMaxPriDelta 0.1#define dupMaxSecDelta 0.2//------------------------------------------------------------------------// TextFontInfo//------------------------------------------------------------------------TextFontInfo::TextFontInfo(GfxState *state) { gfxFont = state->getFont();#if TEXTOUT_WORD_LIST fontName = (gfxFont && gfxFont->getOrigName()) ? gfxFont->getOrigName()->copy() : (GString *)NULL;#endif}TextFontInfo::~TextFontInfo() {#if TEXTOUT_WORD_LIST if (fontName) { delete fontName; }#endif}GBool TextFontInfo::matches(GfxState *state) { return state->getFont() == gfxFont;}//------------------------------------------------------------------------// TextWord//------------------------------------------------------------------------TextWord::TextWord(GfxState *state, int rotA, double x0, double y0, int charPosA, TextFontInfo *fontA, double fontSizeA) { GfxFont *gfxFont; double x, y, ascent, descent; rot = rotA; charPos = charPosA; charLen = 0; font = fontA; fontSize = fontSizeA; state->transform(x0, y0, &x, &y); if ((gfxFont = font->gfxFont)) { ascent = gfxFont->getAscent() * fontSize; descent = gfxFont->getDescent() * fontSize; } else { // this means that the PDF file draws text without a current font, // which should never happen ascent = 0.95 * fontSize; descent = -0.35 * fontSize; } switch (rot) { case 0: yMin = y - ascent; yMax = y - descent; if (yMin == yMax) { // this is a sanity check for a case that shouldn't happen -- but // if it does happen, we want to avoid dividing by zero later yMin = y; yMax = y + 1; } base = y; break; case 1: xMin = x + descent; xMax = x + ascent; if (xMin == xMax) { // this is a sanity check for a case that shouldn't happen -- but // if it does happen, we want to avoid dividing by zero later xMin = x; xMax = x + 1; } base = x; break; case 2: yMin = y + descent; yMax = y + ascent; if (yMin == yMax) { // this is a sanity check for a case that shouldn't happen -- but // if it does happen, we want to avoid dividing by zero later yMin = y; yMax = y + 1; } base = y; break; case 3: xMin = x - ascent; xMax = x - descent; if (xMin == xMax) { // this is a sanity check for a case that shouldn't happen -- but // if it does happen, we want to avoid dividing by zero later xMin = x; xMax = x + 1; } base = x; break; } text = NULL; edge = NULL; len = size = 0; spaceAfter = gFalse; next = NULL;#if TEXTOUT_WORD_LIST GfxRGB rgb; if ((state->getRender() & 3) == 1) { state->getStrokeRGB(&rgb); } else { state->getFillRGB(&rgb); } colorR = colToDbl(rgb.r); colorG = colToDbl(rgb.g); colorB = colToDbl(rgb.b);#endif}TextWord::~TextWord() { gfree(text); gfree(edge);}void TextWord::addChar(GfxState *state, double x, double y, double dx, double dy, Unicode u) { if (len == size) { size += 16; text = (Unicode *)greallocn(text, size, sizeof(Unicode)); edge = (double *)greallocn(edge, size + 1, sizeof(double)); } text[len] = u; switch (rot) { case 0: if (len == 0) { xMin = x; } edge[len] = x; xMax = edge[len+1] = x + dx; break; case 1: if (len == 0) { yMin = y; } edge[len] = y; yMax = edge[len+1] = y + dy; break; case 2: if (len == 0) { xMax = x; } edge[len] = x; xMin = edge[len+1] = x + dx; break; case 3: if (len == 0) { yMax = y; } edge[len] = y; yMin = edge[len+1] = y + dy; break; } ++len;}void TextWord::merge(TextWord *word) { int i; if (word->xMin < xMin) { xMin = word->xMin; } if (word->yMin < yMin) { yMin = word->yMin; } if (word->xMax > xMax) { xMax = word->xMax; } if (word->yMax > yMax) { yMax = word->yMax; } if (len + word->len > size) { size = len + word->len; text = (Unicode *)greallocn(text, size, sizeof(Unicode)); edge = (double *)greallocn(edge, size + 1, sizeof(double)); } for (i = 0; i < word->len; ++i) { text[len + i] = word->text[i]; edge[len + i] = word->edge[i]; } edge[len + word->len] = word->edge[word->len]; len += word->len; charLen += word->charLen;}inline int TextWord::primaryCmp(TextWord *word) { double cmp; cmp = 0; // make gcc happy switch (rot) { case 0: cmp = xMin - word->xMin; break; case 1: cmp = yMin - word->yMin; break; case 2: cmp = word->xMax - xMax; break; case 3: cmp = word->yMax - yMax; break; } return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}double TextWord::primaryDelta(TextWord *word) { double delta; delta = 0; // make gcc happy switch (rot) { case 0: delta = word->xMin - xMax; break; case 1: delta = word->yMin - yMax; break; case 2: delta = xMin - word->xMax; break; case 3: delta = yMin - word->yMax; break; } return delta;}int TextWord::cmpYX(const void *p1, const void *p2) { TextWord *word1 = *(TextWord **)p1; TextWord *word2 = *(TextWord **)p2; double cmp; cmp = word1->yMin - word2->yMin; if (cmp == 0) { cmp = word1->xMin - word2->xMin; } return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}#if TEXTOUT_WORD_LISTGString *TextWord::getText() { GString *s; UnicodeMap *uMap; char buf[8]; int n, i; s = new GString(); if (!(uMap = globalParams->getTextEncoding())) { return s; } for (i = 0; i < len; ++i) { n = uMap->mapUnicode(text[i], buf, sizeof(buf)); s->append(buf, n); } uMap->decRefCnt(); return s;}#endif // TEXTOUT_WORD_LIST//------------------------------------------------------------------------// TextPool//------------------------------------------------------------------------TextPool::TextPool() { minBaseIdx = 0; maxBaseIdx = -1; pool = NULL; cursor = NULL; cursorBaseIdx = -1;}TextPool::~TextPool() { int baseIdx; TextWord *word, *word2; for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) { for (word = pool[baseIdx - minBaseIdx]; word; word = word2) { word2 = word->next; delete word; } } gfree(pool);}int TextPool::getBaseIdx(double base) { int baseIdx; baseIdx = (int)(base / textPoolStep); if (baseIdx < minBaseIdx) { return minBaseIdx; } if (baseIdx > maxBaseIdx) { return maxBaseIdx; } return baseIdx;}void TextPool::addWord(TextWord *word) { TextWord **newPool; int wordBaseIdx, newMinBaseIdx, newMaxBaseIdx, baseIdx; TextWord *w0, *w1; // expand the array if needed wordBaseIdx = (int)(word->base / textPoolStep); if (minBaseIdx > maxBaseIdx) { minBaseIdx = wordBaseIdx - 128; maxBaseIdx = wordBaseIdx + 128; pool = (TextWord **)gmallocn(maxBaseIdx - minBaseIdx + 1, sizeof(TextWord *)); for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) { pool[baseIdx - minBaseIdx] = NULL; } } else if (wordBaseIdx < minBaseIdx) { newMinBaseIdx = wordBaseIdx - 128; newPool = (TextWord **)gmallocn(maxBaseIdx - newMinBaseIdx + 1, sizeof(TextWord *)); for (baseIdx = newMinBaseIdx; baseIdx < minBaseIdx; ++baseIdx) { newPool[baseIdx - newMinBaseIdx] = NULL; } memcpy(&newPool[minBaseIdx - newMinBaseIdx], pool, (maxBaseIdx - minBaseIdx + 1) * sizeof(TextWord *)); gfree(pool); pool = newPool; minBaseIdx = newMinBaseIdx; } else if (wordBaseIdx > maxBaseIdx) { newMaxBaseIdx = wordBaseIdx + 128; pool = (TextWord **)greallocn(pool, newMaxBaseIdx - minBaseIdx + 1, sizeof(TextWord *)); for (baseIdx = maxBaseIdx + 1; baseIdx <= newMaxBaseIdx; ++baseIdx) { pool[baseIdx - minBaseIdx] = NULL; } maxBaseIdx = newMaxBaseIdx; } // insert the new word if (cursor && wordBaseIdx == cursorBaseIdx && word->primaryCmp(cursor) > 0) { w0 = cursor; w1 = cursor->next; } else { w0 = NULL; w1 = pool[wordBaseIdx - minBaseIdx]; } for (; w1 && word->primaryCmp(w1) > 0; w0 = w1, w1 = w1->next) ; word->next = w1; if (w0) { w0->next = word; } else { pool[wordBaseIdx - minBaseIdx] = word; } cursor = word; cursorBaseIdx = wordBaseIdx;}//------------------------------------------------------------------------// TextLine//------------------------------------------------------------------------TextLine::TextLine(TextBlock *blkA, int rotA, double baseA) { blk = blkA; rot = rotA; xMin = yMin = 0; xMax = yMax = -1; base = baseA; words = lastWord = NULL; text = NULL; edge = NULL; col = NULL; len = 0; convertedLen = 0; hyphenated = gFalse; next = NULL;}TextLine::~TextLine() { TextWord *word; while (words) { word = words; words = words->next; delete word; } gfree(text); gfree(edge); gfree(col);}void TextLine::addWord(TextWord *word) { if (lastWord) { lastWord->next = word; } else { words = word; } lastWord = word; if (xMin > xMax) { xMin = word->xMin; xMax = word->xMax; yMin = word->yMin; yMax = word->yMax; } else {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -