📄 textoutputdev.cc

📁 source code: Covert TXT to PDF
💻 CC
📖 第 1 页 / 共 4 页
字号:
  curWord = new TextWord(state, x0, y0, font, fontSize);}void TextPage::addChar(GfxState *state, double x, double y,		       double dx, double dy,		       CharCode c, Unicode *u, int uLen) {  double x1, y1, w1, h1, dx2, dy2, sp;  int n, i;  // if the previous char was a space, addChar will have called  // endWord, so we need to start a new word  if (!curWord) {    beginWord(state, x, y);  }  // throw away chars that aren't inside the page bounds  state->transform(x, y, &x1, &y1);  if (x1 < 0 || x1 > pageWidth ||      y1 < 0 || y1 > pageHeight) {    return;  }  // subtract char and word spacing from the dx,dy values  sp = state->getCharSpace();  if (c == (CharCode)0x20) {    sp += state->getWordSpace();  }  state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2);  dx -= dx2;  dy -= dy2;  state->transformDelta(dx, dy, &w1, &h1);  // check the tiny chars limit  if (!globalParams->getTextKeepTinyChars() &&      fabs(w1) < 3 && fabs(h1) < 3) {    if (++nTinyChars > 20000) {      return;    }  }  // break words at space character  if (uLen == 1 && u[0] == (Unicode)0x20) {    endWord();    return;  }  // large char spacing is sometimes used to move text around -- in  // this case, break text into individual chars and let the coalesce  // function deal with it later  n = curWord->len;  if (n > 0 && x1 - curWord->xRight[n-1] >                    curWord->font->minSpaceWidth * curWord->fontSize) {    // large char spacing is sometimes used to move text around    endWord();    beginWord(state, x, y);  }  // add the characters to the current word  if (uLen != 0) {    w1 /= uLen;    h1 /= uLen;  }  for (i = 0; i < uLen; ++i) {    curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);  }}void TextPage::endWord() {  // This check is needed because Type 3 characters can contain  // text-drawing operations (when TextPage is being used via  // XOutputDev rather than TextOutputDev).  if (nest > 0) {    --nest;    return;  }  if (curWord) {    addWord(curWord);    curWord = NULL;  }}void TextPage::addWord(TextWord *word) {  TextWord *p1, *p2;  // throw away zero-length words -- they don't have valid xMin/xMax  // values, and they're useless anyway  if (word->len == 0) {    delete word;    return;  }  // insert word in xy list  if (rawOrder) {    p1 = wordPtr;    p2 = NULL;  } else {    if (wordPtr && wordPtr->xyBefore(word)) {      p1 = wordPtr;      p2 = wordPtr->next;    } else {      p1 = NULL;      p2 = words;    }    for (; p2; p1 = p2, p2 = p2->next) {      if (word->xyBefore(p2)) {	break;      }    }  }  if (p1) {    p1->next = word;  } else {    words = word;  }  word->next = p2;  wordPtr = word;}void TextPage::coalesce() {  TextWord *word0, *word1, *word2, *word3, *word4;  TextLine *line0, *line1, *line2, *line3, *line4, *lineList;  TextBlock *blk0, *blk1, *blk2, *blk3, *blk4, *blk5, *blk6;  TextBlock *yxBlocks, *blocks, *blkStack;  TextFlow *flow0, *flow1;  double sz, xLimit, minSpace, maxSpace, yLimit;  double fit1, fit2;  GBool found;  UnicodeMap *uMap;  GBool isUnicode;  char buf[8];  int col1, col2, d, i, j;#if 0 // for debugging  printf("*** initial word list ***\n");  for (word0 = words; word0; word0 = word0->next) {    printf("word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f: '",	   word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase);    for (i = 0; i < word0->len; ++i) {      fputc(word0->text[i] & 0xff, stdout);    }    printf("'\n");  }  printf("\n");  fflush(stdout);#endif  //----- discard duplicated text (fake boldface, drop shadows)  word0 = words;  while (word0) {    sz = word0->fontSize;    xLimit = word0->xMin + sz * dupMaxDeltaX;    found = gFalse;    for (word1 = word0, word2 = word0->next;	 word2 && word2->xMin < xLimit;	 word1 = word2, word2 = word2->next) {      if (word2->len == word0->len &&	  !memcmp(word2->text, word0->text, word0->len * sizeof(Unicode)) &&	  fabs(word2->yMin - word0->yMin) < sz * dupMaxDeltaY &&	  fabs(word2->yMax - word0->yMax) < sz * dupMaxDeltaY &&	  fabs(word2->xMax - word0->xMax) < sz * dupMaxDeltaX) {	found = gTrue;	break;      }    }    if (found) {      word1->next = word2->next;      delete word2;    } else {      word0 = word0->next;    }  }#if 0 // for debugging  printf("*** words after removing duplicate text ***\n");  for (word0 = words; word0; word0 = word0->next) {    printf("word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f: '",	   word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase);    for (i = 0; i < word0->len; ++i) {      fputc(word0->text[i] & 0xff, stdout);    }    printf("'\n");  }  printf("\n");  fflush(stdout);#endif  //----- merge words  word0 = words;  while (word0) {    sz = word0->fontSize;    // look for adjacent text which is part of the same word, and    // merge it into this word    xLimit = word0->xMax + sz * word0->font->minSpaceWidth;    if (rawOrder) {      word1 = word0;      word2 = word0->next;      found = word2 &&	      word2->xMin < xLimit &&	      word2->font == word0->font &&	      fabs(word2->fontSize - sz) < 0.05 &&	      fabs(word2->yBase - word0->yBase) < 0.05;    } else {      found = gFalse;      for (word1 = word0, word2 = word0->next;	   word2 && word2->xMin < xLimit;	   word1 = word2, word2 = word2->next) {	if (word2->font == word0->font &&	    fabs(word2->fontSize - sz) < 0.05 &&	    fabs(word2->yBase - word0->yBase) < 0.05) {	  found = gTrue;	  break;	}      }    }    if (found) {      word0->merge(word2);      word1->next = word2->next;      delete word2;      continue;    }    word0 = word0->next;  }#if 0 // for debugging  printf("*** after merging words ***\n");  for (word0 = words; word0; word0 = word0->next) {    printf("word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f: '",	   word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase);    for (i = 0; i < word0->len; ++i) {      fputc(word0->text[i] & 0xff, stdout);    }    printf("'\n");  }  printf("\n");  fflush(stdout);#endif  //----- assemble words into lines  uMap = globalParams->getTextEncoding();  isUnicode = uMap ? uMap->isUnicode() : gFalse;  lineList = NULL;  line0 = NULL;  while (words) {    // build a new line object    word0 = words;    words = words->next;    word0->next = NULL;    line1 = new TextLine();    line1->words = word0;    line1->xMin = word0->xMin;    line1->xMax = word0->xMax;    line1->yMin = word0->yMin;    line1->yMax = word0->yMax;    line1->yBase = word0->yBase;    line1->font = word0->font;    line1->fontSize = word0->fontSize;    line1->len = word0->len;    minSpace = line1->fontSize * word0->font->minSpaceWidth;    maxSpace = line1->fontSize * word0->font->maxSpaceWidth;    // find subsequent words in the line    while (words) {      xLimit = line1->xMax + maxSpace;      fit1 = fit2 = 0;      word3 = word4 = NULL;      if (rawOrder) {	if (words &&	    words->xMin < xLimit &&	    ((fit1 = lineFit(line1, word0, words)) >= 0)) {	  word3 = NULL;	  word4 = words;	}      } else {	for (word1 = NULL, word2 = words;	     word2 && word2->xMin < xLimit;	     word1 = word2, word2 = word2->next) {	  fit2 = lineFit(line1, word0, word2);	  if (fit2 >= 0 && (!word4 ||			    (word4 && fit2 < fit1))) {	    fit1 = fit2;	    word3 = word1;	    word4 = word2;	  }	}      }      if (word4) {	if (word3) {	  word3->next = word4->next;	} else {	  words = word4->next;	}	word0->next = word4;	word4->next = NULL;	if (word4->xMax > line1->xMax) {	  line1->xMax = word4->xMax;	}	if (word4->yMin < line1->yMin) {	  line1->yMin = word4->yMin;	}	if (word4->yMax > line1->yMax) {	  line1->yMax = word4->yMax;	}	line1->len += word4->len;	if (fit1 > minSpace) {	  word0->spaceAfter = gTrue;	  ++line1->len;	}	word0 = word4;      } else {	break;      }    }    // build the line text    line1->text = (Unicode *)gmalloc(line1->len * sizeof(Unicode));    line1->xRight = (double *)gmalloc(line1->len * sizeof(double));    line1->col = (int *)gmalloc(line1->len * sizeof(int));    i = 0;    for (word1 = line1->words; word1; word1 = word1->next) {      for (j = 0; j < word1->len; ++j) {	line1->text[i] = word1->text[j];	line1->xRight[i] = word1->xRight[j];	++i;      }      if (word1->spaceAfter && word1->next) {	line1->text[i] = (Unicode)0x0020;	line1->xRight[i] = word1->next->xMin;	++i;      }    }    line1->convertedLen = 0;    for (j = 0; j < line1->len; ++j) {      line1->col[j] = line1->convertedLen;      if (isUnicode) {	++line1->convertedLen;      } else if (uMap) {	line1->convertedLen +=	  uMap->mapUnicode(line1->text[j], buf, sizeof(buf));      }    }    // check for hyphen at end of line    //~ need to check for other chars used as hyphens    if (line1->text[line1->len - 1] == (Unicode)'-') {      line1->hyphenated = gTrue;    }    // insert line on list    if (line0) {      line0->next = line1;    } else {      lineList = line1;    }    line0 = line1;  }  if (uMap) {    uMap->decRefCnt();  }#if 0 // for debugging  printf("*** lines in xy order ***\n");  for (line0 = lineList; line0; line0 = line0->next) {    printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n",	   line0->xMin, line0->xMax, line0->yMin, line0->yMax,	   line0->yBase, line0->len);    for (word0 = line0->words; word0; word0 = word0->next) {      printf("  word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSz=%.2f space=%d: '",	     word0->xMin, word0->xMax, word0->yMin, word0->yMax,	     word0->yBase, word0->fontSize, word0->spaceAfter);      for (i = 0; i < word0->len; ++i) {	fputc(word0->text[i] & 0xff, stdout);      }      printf("'\n");    }  }  printf("\n");  fflush(stdout);#endif  //----- column assignment  for (line1 = lineList; line1; line1 = line1->next) {    col1 = 0;    for (line2 = lineList; line2 != line1; line2 = line2->next) {      if (line1->xMin >= line2->xMax) {	d = (int)((line1->xMin - line2->xMax) /		  (line1->font->maxSpaceWidth * line1->fontSize));	if (d > 4) {	  d = 4;	}	col2 = line2->col[0] + line2->convertedLen + d;	if (col2 > col1) {	  col1 = col2;	}      } else if (line1->xMin > line2->xMin) {	for (i = 0; i < line2->len && line1->xMin >= line2->xRight[i]; ++i) ;	col2 = line2->col[i];	if (col2 > col1) {	  col1 = col2;	}      }    }    for (j = 0; j < line1->len; ++j) {      line1->col[j] += col1;    }  }  //----- assemble lines into blocks  if (rawOrder) {    lines = lineList;    for (line1 = lines; line1; line1 = line1->next) {      line1->xSpaceL = 0;      line1->xSpaceR = pageWidth;    }  } else {    // sort lines into yx order    lines = NULL;    while (lineList) {      line0 = lineList;      lineList = lineList->next;      for (line1 = NULL, line2 = lines;	   line2 && !line0->yxBefore(line2);	   line1 = line2, line2 = line2->next) ;      if (line1) {	line1->next = line0;      } else {	lines = line0;      }      line0->next = line2;    }    // compute whitespace to left and right of each line    line0 = lines;    for (line1 = lines; line1; line1 = line1->next) {      // find the first vertically overlapping line      for (; line0 && line0->yMax < line1->yMin; line0 = line0->next) ;      // check each vertically overlapping line -- look for the nearest      // on each side      line1->xSpaceL = 0;      line1->xSpaceR = pageWidth;      for (line2 = line0;	   line2 && line2->yMin < line1->yMax;	   line2 = line2->next) {	if (line2->yMax > line1->yMin) {	  if (line2->xMax < line1->xMin) {	    if (line2->xMax > line1->xSpaceL) {	      line1->xSpaceL = line2->xMax;	    }	  } else if (line2->xMin > line1->xMax) {	    if (line2->xMin < line1->xSpaceR) {	      line1->xSpaceR = line2->xMin;	    }	  }	}      }    }  } // (!rawOrder)#if 0 // for debugging  printf("*** lines in yx order ***\n");  for (line0 = lines; line0; line0 = line0->next) {    printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f xSpaceL=%.2f xSpaceR=%.2f len=%d]\n",	   line0->xMin, line0->xMax, line0->yMin, line0->yMax,	   line0->yBase, line0->xSpaceL, line0->xSpaceR, line0->len);    for (word0 = line0->words; word0; word0 = word0->next) {      printf("  word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSz=%.2f space=%d: '",	     word0->xMin, word0->xMax, word0->yMin, word0->yMax,	     word0->yBase, word0->fontSize, word0->spaceAfter);      for (i = 0; i < word0->len; ++i) {	fputc(word0->text[i] & 0xff, stdout);      }      printf("'\n");    }  }  printf("\n");  fflush(stdout);#endif  lineList = lines;  yxBlocks = NULL;  blk0 = NULL;  while (lineList) {    // build a new block object    line0 = lineList;    lineList = lineList->next;    line0->next = NULL;    blk1 = new TextBlock();    blk1->lines = line0;    blk1->xMin = line0->xMin;    blk1->xMax = line0->xMax;    blk1->yMin = line0->yMin;    blk1->yMax = line0->yMax;    blk1->xSpaceL = line0->xSpaceL;    blk1->xSpaceR = line0->xSpaceR;    blk1->maxFontSize = line0->fontSize;    // find subsequent lines in the block    while (lineList) {      // look for the first horizontally overlapping line below this      // one      yLimit = line0->yMax + blkMaxSpacing * line0->fontSize;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -