⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 textoutputdev.cc

📁 source code: Covert TXT to PDF
💻 CC
📖 第 1 页 / 共 4 页
字号:
//========================================================================//// TextOutputDev.cc//// Copyright 1997-2002 Glyph & Cog, LLC////========================================================================#include <aconf.h>#ifdef USE_GCC_PRAGMAS#pragma implementation#endif#include <stdio.h>#include <stdlib.h>#include <stddef.h>#include <math.h>#include <ctype.h>#include "gmem.h"#include "GString.h"#include "GList.h"#include "config.h"#include "Error.h"#include "GlobalParams.h"#include "UnicodeMap.h"#include "GfxState.h"#include "TextOutputDev.h"#ifdef MACOS// needed for setting type/creator of MacOS files#include "ICSupport.h"#endif//------------------------------------------------------------------------// parameters//------------------------------------------------------------------------// Minium and maximum inter-word spacing (as a fraction of the average// character width).#define wordMinSpaceWidth 0.3#define wordMaxSpaceWidth 2.0// Default min and max inter-word spacing (when the average character// width is unknown).#define wordDefMinSpaceWidth 0.2#define wordDefMaxSpaceWidth 1.5// Max difference in x,y coordinates (as a fraction of the font size)// allowed for duplicated text (fake boldface, drop shadows) which is// to be discarded.#define dupMaxDeltaX 0.2#define dupMaxDeltaY 0.2// Min overlap (as a fraction of the font size) required for two// lines to be considered vertically overlapping.#define lineOverlapSlack 0.5// Max difference in baseline y coordinates (as a fraction of the font// size) allowed for words which are to be grouped into a line, not// including sub/superscripts.#define lineMaxBaselineDelta 0.1// Max ratio of font sizes allowed for words which are to be grouped// into a line, not including sub/superscripts.#define lineMaxFontSizeRatio 1.4// Min spacing (as a fraction of the font size) allowed between words// which are to be grouped into a line.#define lineMinDeltaX -0.5// Minimum vertical overlap (as a fraction of the font size) required// for superscript and subscript words.#define lineMinSuperscriptOverlap 0.3#define lineMinSubscriptOverlap   0.3// Min/max ratio of font sizes allowed for sub/superscripts compared to// the base text.#define lineMinSubscriptFontSizeRatio   0.4#define lineMaxSubscriptFontSizeRatio   1.01#define lineMinSuperscriptFontSizeRatio 0.4#define lineMaxSuperscriptFontSizeRatio 1.01// Max horizontal spacing (as a fraction of the font size) allowed// before sub/superscripts.#define lineMaxSubscriptDeltaX   0.2#define lineMaxSuperscriptDeltaX 0.2// Maximum vertical spacing (as a fraction of the font size) allowed// for lines which are to be grouped into a block.#define blkMaxSpacing 2.0// Max ratio of primary font sizes allowed for lines which are to be// grouped into a block.#define blkMaxFontSizeRatio 1.3// Min overlap (as a fraction of the font size) required for two// blocks to be considered vertically overlapping.#define blkOverlapSlack 0.5// Max vertical spacing (as a fraction of the font size) allowed// between blocks which are 'adjacent' when sorted by reading order.#define blkMaxSortSpacing 2.0// Max vertical offset (as a fraction of the font size) of the top and// bottom edges allowed for blocks which are to be grouped into a// flow.#define flowMaxDeltaY 1.0//------------------------------------------------------------------------// TextFontInfo//------------------------------------------------------------------------TextFontInfo::TextFontInfo(GfxState *state) {  double *textMat;  double t1, t2, avgWidth, w;  int n, i;  gfxFont = state->getFont();  textMat = state->getTextMat();  horizScaling = state->getHorizScaling();  if ((t1 = fabs(textMat[0])) > 0.01 &&      (t2 = fabs(textMat[3])) > 0.01) {    horizScaling *= t1 / t2;  }  if (!gfxFont) {    minSpaceWidth = horizScaling * wordDefMinSpaceWidth;    maxSpaceWidth = horizScaling * wordDefMaxSpaceWidth;  } else if (gfxFont->isCIDFont()) {    //~ handle 16-bit fonts    minSpaceWidth = horizScaling * wordDefMinSpaceWidth;    maxSpaceWidth = horizScaling * wordDefMaxSpaceWidth;  } else {    avgWidth = 0;    n = 0;    for (i = 0; i < 256; ++i) {      w = ((Gfx8BitFont *)gfxFont)->getWidth(i);      if (w > 0) {	avgWidth += w;	++n;      }    }    avgWidth /= n;    minSpaceWidth = horizScaling * wordMinSpaceWidth * avgWidth;    maxSpaceWidth = horizScaling * wordMaxSpaceWidth * avgWidth;  }}TextFontInfo::~TextFontInfo() {}GBool TextFontInfo::matches(GfxState *state) {  double *textMat;  double t1, t2, h;  textMat = state->getTextMat();  h = state->getHorizScaling();  if ((t1 = fabs(textMat[0])) > 0.01 &&      (t2 = fabs(textMat[3])) > 0.01) {    h *= t1 / t2;  }  return state->getFont() == gfxFont &&         fabs(h - horizScaling) < 0.01;}//------------------------------------------------------------------------// TextWord//------------------------------------------------------------------------TextWord::TextWord(GfxState *state, double x0, double y0,		   TextFontInfo *fontA, double fontSizeA) {  GfxFont *gfxFont;  double x, y;  font = fontA;  fontSize = fontSizeA;  state->transform(x0, y0, &x, &y);  if ((gfxFont = font->gfxFont)) {    yMin = y - gfxFont->getAscent() * fontSize;    yMax = y - gfxFont->getDescent() * fontSize;  } else {    // this means that the PDF file draws text without a current font,    // which should never happen    yMin = y - 0.95 * fontSize;    yMax = y + 0.35 * fontSize;  }  if (yMin == yMax) {    // this is a sanity check for a case that shouldn't happen -- but    // if it does happen, we want to avoid dividing by zero later    yMin = y;    yMax = y + 1;  }  yBase = y;  text = NULL;  xRight = NULL;  len = size = 0;  spaceAfter = gFalse;  next = NULL;}TextWord::~TextWord() {  gfree(text);  gfree(xRight);}void TextWord::addChar(GfxState *state, double x, double y,		       double dx, double dy, Unicode u) {  if (len == size) {    size += 16;    text = (Unicode *)grealloc(text, size * sizeof(Unicode));    xRight = (double *)grealloc(xRight, size * sizeof(double));  }  text[len] = u;  if (len == 0) {    xMin = x;  }  xMax = xRight[len] = x + dx;  ++len;}// Returns true if <this> comes before <word2> in xy order.GBool TextWord::xyBefore(TextWord *word2) {  return xMin < word2->xMin ||	 (xMin == word2->xMin && yMin < word2->yMin);}// Merge another word onto the end of this one.void TextWord::merge(TextWord *word2) {  int i;  xMax = word2->xMax;  if (word2->yMin < yMin) {    yMin = word2->yMin;  }  if (word2->yMax > yMax) {    yMax = word2->yMax;  }  if (len + word2->len > size) {    size = len + word2->len;    text = (Unicode *)grealloc(text, size * sizeof(Unicode));    xRight = (double *)grealloc(xRight, size * sizeof(double));  }  for (i = 0; i < word2->len; ++i) {    text[len + i] = word2->text[i];    xRight[len + i] = word2->xRight[i];  }  len += word2->len;}//------------------------------------------------------------------------// TextLine//------------------------------------------------------------------------TextLine::TextLine() {  words = NULL;  text = NULL;  xRight = NULL;  col = NULL;  len = 0;  hyphenated = gFalse;  pageNext = NULL;  next = NULL;  flowNext = NULL;}TextLine::~TextLine() {  TextWord *w1, *w2;  for (w1 = words; w1; w1 = w2) {    w2 = w1->next;    delete w1;  }  gfree(text);  gfree(xRight);  gfree(col);}// Returns true if <this> comes before <line2> in yx order, allowing// slack for vertically overlapping lines.GBool TextLine::yxBefore(TextLine *line2) {  double dy;  dy = lineOverlapSlack * fontSize;  // non-overlapping case  if (line2->yMin > yMax - dy ||      line2->yMax < yMin + dy) {    return yMin < line2->yMin ||           (yMin == line2->yMin && xMin < line2->xMin);  }  // overlapping case  return xMin < line2->xMin;}// Merge another line's words onto the end of this line.void TextLine::merge(TextLine *line2) {  TextWord *word;  int newLen, i;  xMax = line2->xMax;  if (line2->yMin < yMin) {    yMin = line2->yMin;  }  if (line2->yMax > yMax) {    yMax = line2->yMax;  }  xSpaceR = line2->xSpaceR;  for (word = words; word->next; word = word->next) ;  word->spaceAfter = gTrue;  word->next = line2->words;  line2->words = NULL;  newLen = len + 1 + line2->len;  text = (Unicode *)grealloc(text, newLen * sizeof(Unicode));  xRight = (double *)grealloc(xRight, newLen * sizeof(double));  text[len] = (Unicode)0x0020;  xRight[len] = line2->xMin;  for (i = 0; i < line2->len; ++i) {    text[len + 1 + i] = line2->text[i];    xRight[len + 1 + i] = line2->xRight[i];  }  len = newLen;  convertedLen += line2->convertedLen;  hyphenated = line2->hyphenated;}//------------------------------------------------------------------------// TextBlock//------------------------------------------------------------------------TextBlock::TextBlock() {  lines = NULL;  next = NULL;}TextBlock::~TextBlock() {  TextLine *l1, *l2;  for (l1 = lines; l1; l1 = l2) {    l2 = l1->next;    delete l1;  }}// Returns true if <this> comes before <blk2> in xy order, allowing// slack for vertically overlapping blocks.GBool TextBlock::yxBefore(TextBlock *blk2) {  double dy;  dy = blkOverlapSlack * lines->fontSize;  // non-overlapping case  if (blk2->yMin > yMax - dy ||      blk2->yMax < yMin + dy) {    return yMin < blk2->yMin ||           (yMin == blk2->yMin && xMin < blk2->xMin);  }  // overlapping case  return xMin < blk2->xMin;}// Merge another block's line onto the right of this one.void TextBlock::mergeRight(TextBlock *blk2) {  lines->merge(blk2->lines);  xMax = lines->xMax;  yMin = lines->yMin;  yMax = lines->yMax;  xSpaceR = lines->xSpaceR;}// Merge another block's lines onto the bottom of this block.void TextBlock::mergeBelow(TextBlock *blk2) {  TextLine *line;  if (blk2->xMin < xMin) {    xMin = blk2->xMin;  }  if (blk2->xMax > xMax) {    xMax = blk2->xMax;  }  yMax = blk2->yMax;  if (blk2->xSpaceL > xSpaceL) {    xSpaceL = blk2->xSpaceL;  }  if (blk2->xSpaceR < xSpaceR) {    xSpaceR = blk2->xSpaceR;  }  if (blk2->maxFontSize > maxFontSize) {    maxFontSize = blk2->maxFontSize;  }  for (line = lines; line->next; line = line->next) ;  line->next = line->flowNext = blk2->lines;  blk2->lines = NULL;}//------------------------------------------------------------------------// TextFlow//------------------------------------------------------------------------TextFlow::TextFlow() {  blocks = NULL;  next = NULL;}TextFlow::~TextFlow() {  TextBlock *b1, *b2;  for (b1 = blocks; b1; b1 = b2) {    b2 = b1->next;    delete b1;  }}//------------------------------------------------------------------------// TextPage//------------------------------------------------------------------------TextPage::TextPage(GBool rawOrderA) {  rawOrder = rawOrderA;  curWord = NULL;  font = NULL;  fontSize = 0;  nest = 0;  nTinyChars = 0;  words = wordPtr = NULL;  lines = NULL;  flows = NULL;  fonts = new GList();}TextPage::~TextPage() {  clear();  delete fonts;}void TextPage::updateFont(GfxState *state) {  GfxFont *gfxFont;  double *fm;  char *name;  int code, mCode, letterCode, anyCode;  double w;  int i;  // get the font info object  font = NULL;  for (i = 0; i < fonts->getLength(); ++i) {    font = (TextFontInfo *)fonts->get(i);    if (font->matches(state)) {      break;    }    font = NULL;  }  if (!font) {    font = new TextFontInfo(state);    fonts->append(font);  }  // adjust the font size  gfxFont = state->getFont();  fontSize = state->getTransformedFontSize();  if (gfxFont && gfxFont->getType() == fontType3) {    // This is a hack which makes it possible to deal with some Type 3    // fonts.  The problem is that it's impossible to know what the    // base coordinate system used in the font is without actually    // rendering the font.  This code tries to guess by looking at the    // width of the character 'm' (which breaks if the font is a    // subset that doesn't contain 'm').    mCode = letterCode = anyCode = -1;    for (code = 0; code < 256; ++code) {      name = ((Gfx8BitFont *)gfxFont)->getCharName(code);      if (name && name[0] == 'm' && name[1] == '\0') {	mCode = code;      }      if (letterCode < 0 && name && name[1] == '\0' &&	  ((name[0] >= 'A' && name[0] <= 'Z') ||	   (name[0] >= 'a' && name[0] <= 'z'))) {	letterCode = code;      }      if (anyCode < 0 && name &&	  ((Gfx8BitFont *)gfxFont)->getWidth(code) > 0) {	anyCode = code;      }    }    if (mCode >= 0 &&	(w = ((Gfx8BitFont *)gfxFont)->getWidth(mCode)) > 0) {      // 0.6 is a generic average 'm' width -- yes, this is a hack      fontSize *= w / 0.6;    } else if (letterCode >= 0 &&	       (w = ((Gfx8BitFont *)gfxFont)->getWidth(letterCode)) > 0) {      // even more of a hack: 0.5 is a generic letter width      fontSize *= w / 0.5;    } else if (anyCode >= 0 &&	       (w = ((Gfx8BitFont *)gfxFont)->getWidth(anyCode)) > 0) {      // better than nothing: 0.5 is a generic character width      fontSize *= w / 0.5;    }    fm = gfxFont->getFontMatrix();    if (fm[0] != 0) {      fontSize *= fabs(fm[3] / fm[0]);    }  }}void TextPage::beginWord(GfxState *state, double x0, double y0) {  // This check is needed because Type 3 characters can contain  // text-drawing operations (when TextPage is being used via  // XOutputDev rather than TextOutputDev).  if (curWord) {    ++nest;    return;  }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -