⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 textoutputdev.cc

📁 将pdf文档转换为高质量的html文档
💻 CC
📖 第 1 页 / 共 5 页
字号:
    if (word->xMin < xMin) {      xMin = word->xMin;    }    if (word->xMax > xMax) {      xMax = word->xMax;    }    if (word->yMin < yMin) {      yMin = word->yMin;    }    if (word->yMax > yMax) {      yMax = word->yMax;    }  }}double TextLine::primaryDelta(TextLine *line) {  double delta;  delta = 0; // make gcc happy  switch (rot) {  case 0:    delta = line->xMin - xMax;    break;  case 1:    delta = line->yMin - yMax;    break;  case 2:    delta = xMin - line->xMax;    break;  case 3:    delta = yMin - line->yMax;    break;  }  return delta;}int TextLine::primaryCmp(TextLine *line) {  double cmp;  cmp = 0; // make gcc happy  switch (rot) {  case 0:    cmp = xMin - line->xMin;    break;  case 1:    cmp = yMin - line->yMin;    break;  case 2:    cmp = line->xMax - xMax;    break;  case 3:    cmp = line->yMax - yMax;    break;  }  return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}int TextLine::secondaryCmp(TextLine *line) {  double cmp;  cmp = (rot == 0 || rot == 3) ? base - line->base : line->base - base;  return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}int TextLine::cmpYX(TextLine *line) {  int cmp;  if ((cmp = secondaryCmp(line))) {    return cmp;  }  return primaryCmp(line);}int TextLine::cmpXY(const void *p1, const void *p2) {  TextLine *line1 = *(TextLine **)p1;  TextLine *line2 = *(TextLine **)p2;  int cmp;  if ((cmp = line1->primaryCmp(line2))) {    return cmp;  }  return line1->secondaryCmp(line2);}void TextLine::coalesce(UnicodeMap *uMap) {  TextWord *word0, *word1;  double space, delta, minSpace;  GBool isUnicode;  char buf[8];  int i, j;  if (words->next) {    // compute the inter-word space threshold    if (words->len > 1 || words->next->len > 1) {      minSpace = 0;    } else {      minSpace = words->primaryDelta(words->next);      for (word0 = words->next, word1 = word0->next;	   word1 && minSpace > 0;	   word0 = word1, word1 = word0->next) {	if (word1->len > 1) {	  minSpace = 0;	}	delta = word0->primaryDelta(word1);	if (delta < minSpace) {	  minSpace = delta;	}      }    }    if (minSpace <= 0) {      space = maxCharSpacing * words->fontSize;    } else {      space = maxWideCharSpacingMul * minSpace;    }    // merge words    word0 = words;    word1 = words->next;    while (word1) {      if (word0->primaryDelta(word1) >= space) {	word0->spaceAfter = gTrue;	word0 = word1;	word1 = word1->next;      } else if (word0->font == word1->font &&		 fabs(word0->fontSize - word1->fontSize) <		 maxWordFontSizeDelta * words->fontSize &&		 word1->charPos == word0->charPos + word0->charLen) {	word0->merge(word1);	word0->next = word1->next;	delete word1;	word1 = word0->next;      } else {	word0 = word1;	word1 = word1->next;      }    }  }  // build the line text  isUnicode = uMap ? uMap->isUnicode() : gFalse;  len = 0;  for (word1 = words; word1; word1 = word1->next) {    len += word1->len;    if (word1->spaceAfter) {      ++len;    }  }  text = (Unicode *)gmallocn(len, sizeof(Unicode));  edge = (double *)gmallocn(len + 1, sizeof(double));  i = 0;  for (word1 = words; word1; word1 = word1->next) {    for (j = 0; j < word1->len; ++j) {      text[i] = word1->text[j];      edge[i] = word1->edge[j];      ++i;    }    edge[i] = word1->edge[word1->len];    if (word1->spaceAfter) {      text[i] = (Unicode)0x0020;      ++i;    }  }  // compute convertedLen and set up the col array  col = (int *)gmallocn(len + 1, sizeof(int));  convertedLen = 0;  for (i = 0; i < len; ++i) {    col[i] = convertedLen;    if (isUnicode) {      ++convertedLen;    } else if (uMap) {      convertedLen += uMap->mapUnicode(text[i], buf, sizeof(buf));    }  }  col[len] = convertedLen;  // check for hyphen at end of line  //~ need to check for other chars used as hyphens  hyphenated = text[len - 1] == (Unicode)'-';}//------------------------------------------------------------------------// TextLineFrag//------------------------------------------------------------------------class TextLineFrag {public:  TextLine *line;		// the line object  int start, len;		// offset and length of this fragment				//   (in Unicode chars)  double xMin, xMax;		// bounding box coordinates  double yMin, yMax;  double base;			// baseline virtual coordinate  int col;			// first column  void init(TextLine *lineA, int startA, int lenA);  void computeCoords(GBool oneRot);  static int cmpYXPrimaryRot(const void *p1, const void *p2);  static int cmpYXLineRot(const void *p1, const void *p2);  static int cmpXYLineRot(const void *p1, const void *p2);};void TextLineFrag::init(TextLine *lineA, int startA, int lenA) {  line = lineA;  start = startA;  len = lenA;  col = line->col[start];}void TextLineFrag::computeCoords(GBool oneRot) {  TextBlock *blk;  double d0, d1, d2, d3, d4;  if (oneRot) {    switch (line->rot) {    case 0:      xMin = line->edge[start];      xMax = line->edge[start + len];      yMin = line->yMin;      yMax = line->yMax;      break;    case 1:      xMin = line->xMin;      xMax = line->xMax;      yMin = line->edge[start];      yMax = line->edge[start + len];      break;    case 2:      xMin = line->edge[start + len];      xMax = line->edge[start];      yMin = line->yMin;      yMax = line->yMax;      break;    case 3:      xMin = line->xMin;      xMax = line->xMax;      yMin = line->edge[start + len];      yMax = line->edge[start];      break;    }    base = line->base;  } else {    if (line->rot == 0 && line->blk->page->primaryRot == 0) {      xMin = line->edge[start];      xMax = line->edge[start + len];      yMin = line->yMin;      yMax = line->yMax;      base = line->base;    } else {      blk = line->blk;      d0 = line->edge[start];      d1 = line->edge[start + len];      d2 = d3 = d4 = 0; // make gcc happy      switch (line->rot) {      case 0:	d2 = line->yMin;	d3 = line->yMax;	d4 = line->base;	d0 = (d0 - blk->xMin) / (blk->xMax - blk->xMin);	d1 = (d1 - blk->xMin) / (blk->xMax - blk->xMin);	d2 = (d2 - blk->yMin) / (blk->yMax - blk->yMin);	d3 = (d3 - blk->yMin) / (blk->yMax - blk->yMin);	d4 = (d4 - blk->yMin) / (blk->yMax - blk->yMin);	break;      case 1:	d2 = line->xMax;	d3 = line->xMin;	d4 = line->base;	d0 = (d0 - blk->yMin) / (blk->yMax - blk->yMin);	d1 = (d1 - blk->yMin) / (blk->yMax - blk->yMin);	d2 = (blk->xMax - d2) / (blk->xMax - blk->xMin);	d3 = (blk->xMax - d3) / (blk->xMax - blk->xMin);	d4 = (blk->xMax - d4) / (blk->xMax - blk->xMin);	break;      case 2:	d2 = line->yMax;	d3 = line->yMin;	d4 = line->base;	d0 = (blk->xMax - d0) / (blk->xMax - blk->xMin);	d1 = (blk->xMax - d1) / (blk->xMax - blk->xMin);	d2 = (blk->yMax - d2) / (blk->yMax - blk->yMin);	d3 = (blk->yMax - d3) / (blk->yMax - blk->yMin);	d4 = (blk->yMax - d4) / (blk->yMax - blk->yMin);	break;      case 3:	d2 = line->xMin;	d3 = line->xMax;	d4 = line->base;	d0 = (blk->yMax - d0) / (blk->yMax - blk->yMin);	d1 = (blk->yMax - d1) / (blk->yMax - blk->yMin);	d2 = (d2 - blk->xMin) / (blk->xMax - blk->xMin);	d3 = (d3 - blk->xMin) / (blk->xMax - blk->xMin);	d4 = (d4 - blk->xMin) / (blk->xMax - blk->xMin);	break;      }      switch (line->blk->page->primaryRot) {      case 0:	xMin = blk->xMin + d0 * (blk->xMax - blk->xMin);	xMax = blk->xMin + d1 * (blk->xMax - blk->xMin);	yMin = blk->yMin + d2 * (blk->yMax - blk->yMin);	yMax = blk->yMin + d3 * (blk->yMax - blk->yMin);	base = blk->yMin + base * (blk->yMax - blk->yMin);	break;      case 1:	xMin = blk->xMax - d3 * (blk->xMax - blk->xMin);	xMax = blk->xMax - d2 * (blk->xMax - blk->xMin);	yMin = blk->yMin + d0 * (blk->yMax - blk->yMin);	yMax = blk->yMin + d1 * (blk->yMax - blk->yMin);	base = blk->xMax - d4 * (blk->xMax - blk->xMin);	break;      case 2:	xMin = blk->xMax - d1 * (blk->xMax - blk->xMin);	xMax = blk->xMax - d0 * (blk->xMax - blk->xMin);	yMin = blk->yMax - d3 * (blk->yMax - blk->yMin);	yMax = blk->yMax - d2 * (blk->yMax - blk->yMin);	base = blk->yMax - d4 * (blk->yMax - blk->yMin);	break;      case 3:	xMin = blk->xMin + d2 * (blk->xMax - blk->xMin);	xMax = blk->xMin + d3 * (blk->xMax - blk->xMin);	yMin = blk->yMax - d1 * (blk->yMax - blk->yMin);	yMax = blk->yMax - d0 * (blk->yMax - blk->yMin);	base = blk->xMin + d4 * (blk->xMax - blk->xMin);	break;      }    }  }}int TextLineFrag::cmpYXPrimaryRot(const void *p1, const void *p2) {  TextLineFrag *frag1 = (TextLineFrag *)p1;  TextLineFrag *frag2 = (TextLineFrag *)p2;  double cmp;  cmp = 0; // make gcc happy  switch (frag1->line->blk->page->primaryRot) {  case 0:    if (fabs(cmp = frag1->yMin - frag2->yMin) < 0.01) {      cmp = frag1->xMin - frag2->xMin;    }    break;  case 1:    if (fabs(cmp = frag2->xMax - frag1->xMax) < 0.01) {      cmp = frag1->yMin - frag2->yMin;    }    break;  case 2:    if (fabs(cmp = frag2->yMin - frag1->yMin) < 0.01) {      cmp = frag2->xMax - frag1->xMax;    }    break;  case 3:    if (fabs(cmp = frag1->xMax - frag2->xMax) < 0.01) {      cmp = frag2->yMax - frag1->yMax;    }    break;  }  return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}int TextLineFrag::cmpYXLineRot(const void *p1, const void *p2) {  TextLineFrag *frag1 = (TextLineFrag *)p1;  TextLineFrag *frag2 = (TextLineFrag *)p2;  double cmp;  cmp = 0; // make gcc happy  switch (frag1->line->rot) {  case 0:    if ((cmp = frag1->yMin - frag2->yMin) == 0) {      cmp = frag1->xMin - frag2->xMin;    }    break;  case 1:    if ((cmp = frag2->xMax - frag1->xMax) == 0) {      cmp = frag1->yMin - frag2->yMin;    }    break;  case 2:    if ((cmp = frag2->yMin - frag1->yMin) == 0) {      cmp = frag2->xMax - frag1->xMax;    }    break;  case 3:    if ((cmp = frag1->xMax - frag2->xMax) == 0) {      cmp = frag2->yMax - frag1->yMax;    }    break;  }  return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}int TextLineFrag::cmpXYLineRot(const void *p1, const void *p2) {  TextLineFrag *frag1 = (TextLineFrag *)p1;  TextLineFrag *frag2 = (TextLineFrag *)p2;  double cmp;  cmp = 0; // make gcc happy  switch (frag1->line->rot) {  case 0:    if ((cmp = frag1->xMin - frag2->xMin) == 0) {      cmp = frag1->yMin - frag2->yMin;    }    break;  case 1:    if ((cmp = frag1->yMin - frag2->yMin) == 0) {      cmp = frag2->xMax - frag1->xMax;    }    break;  case 2:    if ((cmp = frag2->xMax - frag1->xMax) == 0) {      cmp = frag2->yMin - frag1->yMin;    }    break;  case 3:    if ((cmp = frag2->yMax - frag1->yMax) == 0) {      cmp = frag1->xMax - frag2->xMax;    }    break;  }  return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}//------------------------------------------------------------------------// TextBlock//------------------------------------------------------------------------TextBlock::TextBlock(TextPage *pageA, int rotA) {  page = pageA;  rot = rotA;  xMin = yMin = 0;  xMax = yMax = -1;  priMin = 0;  priMax = page->pageWidth;  pool = new TextPool();  lines = NULL;  curLine = NULL;  next = NULL;  stackNext = NULL;}TextBlock::~TextBlock() {  TextLine *line;  delete pool;  while (lines) {    line = lines;    lines = lines->next;    delete line;  }}void TextBlock::addWord(TextWord *word) {  pool->addWord(word);  if (xMin > xMax) {    xMin = word->xMin;    xMax = word->xMax;    yMin = word->yMin;    yMax = word->yMax;  } else {    if (word->xMin < xMin) {      xMin = word->xMin;    }    if (word->xMax > xMax) {      xMax = word->xMax;    }    if (word->yMin < yMin) {      yMin = word->yMin;    }    if (word->yMax > yMax) {      yMax = word->yMax;    }  }}void TextBlock::coalesce(UnicodeMap *uMap) {  TextWord *word0, *word1, *word2, *bestWord0, *bestWord1, *lastWord;  TextLine *line, *line0, *line1;  int poolMinBaseIdx, startBaseIdx, minBaseIdx, maxBaseIdx;  int baseIdx, bestWordBaseIdx, idx0, idx1;  double minBase, maxBase;  double fontSize, delta, priDelta, secDelta;  TextLine **lineArray;  GBool found;  int col1, col2;  int i, j, k;  // discard duplicated text (fake boldface, drop shadows)  for (idx0 = pool->minBaseIdx; idx0 <= pool->maxBaseIdx; ++idx0) {    word0 = pool->getPool(idx0);    while (word0) {      priDelta = dupMaxPriDelta * word0->fontSize;      secDelta = dupMaxSecDelta * word0->fontSize;      if (rot == 0 || rot == 3) {	maxBaseIdx = pool->getBaseIdx(word0->base + secDelta);      } else {	maxBaseIdx = pool->getBaseIdx(word0->base - secDelta);      }      found = gFalse;      word1 = word2 = NULL; // make gcc happy      for (idx1 = idx0; idx1 <= maxBaseIdx; ++idx1) {	if (idx1 == idx0) {	  word1 = word0;	  word2 = word0->next;	} else {	  word1 = NULL;	  word2 = pool->getPool(idx1);	}	for (; word2; word1 = word2, word2 = word2->next) {	  if (word2->len == word0->len &&	      !memcmp(word2->text, word0->text,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -