📄 textoutputdev.cc
字号:
curWord = new TextWord(state, x0, y0, font, fontSize);}void TextPage::addChar(GfxState *state, double x, double y, double dx, double dy, CharCode c, Unicode *u, int uLen) { double x1, y1, w1, h1, dx2, dy2, sp; int n, i; // if the previous char was a space, addChar will have called // endWord, so we need to start a new word if (!curWord) { beginWord(state, x, y); } // throw away chars that aren't inside the page bounds state->transform(x, y, &x1, &y1); if (x1 < 0 || x1 > pageWidth || y1 < 0 || y1 > pageHeight) { return; } // subtract char and word spacing from the dx,dy values sp = state->getCharSpace(); if (c == (CharCode)0x20) { sp += state->getWordSpace(); } state->textTransformDelta(sp * state->getHorizScaling(), 0, &dx2, &dy2); dx -= dx2; dy -= dy2; state->transformDelta(dx, dy, &w1, &h1); // check the tiny chars limit if (!globalParams->getTextKeepTinyChars() && fabs(w1) < 3 && fabs(h1) < 3) { if (++nTinyChars > 20000) { return; } } // break words at space character if (uLen == 1 && u[0] == (Unicode)0x20) { endWord(); return; } // large char spacing is sometimes used to move text around -- in // this case, break text into individual chars and let the coalesce // function deal with it later n = curWord->len; if (n > 0 && x1 - curWord->xRight[n-1] > curWord->font->minSpaceWidth * curWord->fontSize) { // large char spacing is sometimes used to move text around endWord(); beginWord(state, x, y); } // add the characters to the current word if (uLen != 0) { w1 /= uLen; h1 /= uLen; } for (i = 0; i < uLen; ++i) { curWord->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]); }}void TextPage::endWord() { // This check is needed because Type 3 characters can contain // text-drawing operations (when TextPage is being used via // XOutputDev rather than TextOutputDev). if (nest > 0) { --nest; return; } if (curWord) { addWord(curWord); curWord = NULL; }}void TextPage::addWord(TextWord *word) { TextWord *p1, *p2; // throw away zero-length words -- they don't have valid xMin/xMax // values, and they're useless anyway if (word->len == 0) { delete word; return; } // insert word in xy list if (rawOrder) { p1 = wordPtr; p2 = NULL; } else { if (wordPtr && wordPtr->xyBefore(word)) { p1 = wordPtr; p2 = wordPtr->next; } else { p1 = NULL; p2 = words; } for (; p2; p1 = p2, p2 = p2->next) { if (word->xyBefore(p2)) { break; } } } if (p1) { p1->next = word; } else { words = word; } word->next = p2; wordPtr = word;}void TextPage::coalesce() { TextWord *word0, *word1, *word2, *word3, *word4; TextLine *line0, *line1, *line2, *line3, *line4, *lineList; TextBlock *blk0, *blk1, *blk2, *blk3, *blk4, *blk5, *blk6; TextBlock *yxBlocks, *blocks, *blkStack; TextFlow *flow0, *flow1; double sz, xLimit, minSpace, maxSpace, yLimit; double fit1, fit2; GBool found; UnicodeMap *uMap; GBool isUnicode; char buf[8]; int col1, col2, d, i, j;#if 0 // for debugging printf("*** initial word list ***\n"); for (word0 = words; word0; word0 = word0->next) { printf("word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f: '", word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase); for (i = 0; i < word0->len; ++i) { fputc(word0->text[i] & 0xff, stdout); } printf("'\n"); } printf("\n"); fflush(stdout);#endif //----- discard duplicated text (fake boldface, drop shadows) word0 = words; while (word0) { sz = word0->fontSize; xLimit = word0->xMin + sz * dupMaxDeltaX; found = gFalse; for (word1 = word0, word2 = word0->next; word2 && word2->xMin < xLimit; word1 = word2, word2 = word2->next) { if (word2->len == word0->len && !memcmp(word2->text, word0->text, word0->len * sizeof(Unicode)) && fabs(word2->yMin - word0->yMin) < sz * dupMaxDeltaY && fabs(word2->yMax - word0->yMax) < sz * dupMaxDeltaY && fabs(word2->xMax - word0->xMax) < sz * dupMaxDeltaX) { found = gTrue; break; } } if (found) { word1->next = word2->next; delete word2; } else { word0 = word0->next; } }#if 0 // for debugging printf("*** words after removing duplicate text ***\n"); for (word0 = words; word0; word0 = word0->next) { printf("word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f: '", word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase); for (i = 0; i < word0->len; ++i) { fputc(word0->text[i] & 0xff, stdout); } printf("'\n"); } printf("\n"); fflush(stdout);#endif //----- merge words word0 = words; while (word0) { sz = word0->fontSize; // look for adjacent text which is part of the same word, and // merge it into this word xLimit = word0->xMax + sz * word0->font->minSpaceWidth; if (rawOrder) { word1 = word0; word2 = word0->next; found = word2 && word2->xMin < xLimit && word2->font == word0->font && fabs(word2->fontSize - sz) < 0.05 && fabs(word2->yBase - word0->yBase) < 0.05; } else { found = gFalse; for (word1 = word0, word2 = word0->next; word2 && word2->xMin < xLimit; word1 = word2, word2 = word2->next) { if (word2->font == word0->font && fabs(word2->fontSize - sz) < 0.05 && fabs(word2->yBase - word0->yBase) < 0.05) { found = gTrue; break; } } } if (found) { word0->merge(word2); word1->next = word2->next; delete word2; continue; } word0 = word0->next; }#if 0 // for debugging printf("*** after merging words ***\n"); for (word0 = words; word0; word0 = word0->next) { printf("word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f: '", word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase); for (i = 0; i < word0->len; ++i) { fputc(word0->text[i] & 0xff, stdout); } printf("'\n"); } printf("\n"); fflush(stdout);#endif //----- assemble words into lines uMap = globalParams->getTextEncoding(); isUnicode = uMap ? uMap->isUnicode() : gFalse; lineList = NULL; line0 = NULL; while (words) { // build a new line object word0 = words; words = words->next; word0->next = NULL; line1 = new TextLine(); line1->words = word0; line1->xMin = word0->xMin; line1->xMax = word0->xMax; line1->yMin = word0->yMin; line1->yMax = word0->yMax; line1->yBase = word0->yBase; line1->font = word0->font; line1->fontSize = word0->fontSize; line1->len = word0->len; minSpace = line1->fontSize * word0->font->minSpaceWidth; maxSpace = line1->fontSize * word0->font->maxSpaceWidth; // find subsequent words in the line while (words) { xLimit = line1->xMax + maxSpace; fit1 = fit2 = 0; word3 = word4 = NULL; if (rawOrder) { if (words && words->xMin < xLimit && ((fit1 = lineFit(line1, word0, words)) >= 0)) { word3 = NULL; word4 = words; } } else { for (word1 = NULL, word2 = words; word2 && word2->xMin < xLimit; word1 = word2, word2 = word2->next) { fit2 = lineFit(line1, word0, word2); if (fit2 >= 0 && (!word4 || (word4 && fit2 < fit1))) { fit1 = fit2; word3 = word1; word4 = word2; } } } if (word4) { if (word3) { word3->next = word4->next; } else { words = word4->next; } word0->next = word4; word4->next = NULL; if (word4->xMax > line1->xMax) { line1->xMax = word4->xMax; } if (word4->yMin < line1->yMin) { line1->yMin = word4->yMin; } if (word4->yMax > line1->yMax) { line1->yMax = word4->yMax; } line1->len += word4->len; if (fit1 > minSpace) { word0->spaceAfter = gTrue; ++line1->len; } word0 = word4; } else { break; } } // build the line text line1->text = (Unicode *)gmalloc(line1->len * sizeof(Unicode)); line1->xRight = (double *)gmalloc(line1->len * sizeof(double)); line1->col = (int *)gmalloc(line1->len * sizeof(int)); i = 0; for (word1 = line1->words; word1; word1 = word1->next) { for (j = 0; j < word1->len; ++j) { line1->text[i] = word1->text[j]; line1->xRight[i] = word1->xRight[j]; ++i; } if (word1->spaceAfter && word1->next) { line1->text[i] = (Unicode)0x0020; line1->xRight[i] = word1->next->xMin; ++i; } } line1->convertedLen = 0; for (j = 0; j < line1->len; ++j) { line1->col[j] = line1->convertedLen; if (isUnicode) { ++line1->convertedLen; } else if (uMap) { line1->convertedLen += uMap->mapUnicode(line1->text[j], buf, sizeof(buf)); } } // check for hyphen at end of line //~ need to check for other chars used as hyphens if (line1->text[line1->len - 1] == (Unicode)'-') { line1->hyphenated = gTrue; } // insert line on list if (line0) { line0->next = line1; } else { lineList = line1; } line0 = line1; } if (uMap) { uMap->decRefCnt(); }#if 0 // for debugging printf("*** lines in xy order ***\n"); for (line0 = lineList; line0; line0 = line0->next) { printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f len=%d]\n", line0->xMin, line0->xMax, line0->yMin, line0->yMax, line0->yBase, line0->len); for (word0 = line0->words; word0; word0 = word0->next) { printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSz=%.2f space=%d: '", word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase, word0->fontSize, word0->spaceAfter); for (i = 0; i < word0->len; ++i) { fputc(word0->text[i] & 0xff, stdout); } printf("'\n"); } } printf("\n"); fflush(stdout);#endif //----- column assignment for (line1 = lineList; line1; line1 = line1->next) { col1 = 0; for (line2 = lineList; line2 != line1; line2 = line2->next) { if (line1->xMin >= line2->xMax) { d = (int)((line1->xMin - line2->xMax) / (line1->font->maxSpaceWidth * line1->fontSize)); if (d > 4) { d = 4; } col2 = line2->col[0] + line2->convertedLen + d; if (col2 > col1) { col1 = col2; } } else if (line1->xMin > line2->xMin) { for (i = 0; i < line2->len && line1->xMin >= line2->xRight[i]; ++i) ; col2 = line2->col[i]; if (col2 > col1) { col1 = col2; } } } for (j = 0; j < line1->len; ++j) { line1->col[j] += col1; } } //----- assemble lines into blocks if (rawOrder) { lines = lineList; for (line1 = lines; line1; line1 = line1->next) { line1->xSpaceL = 0; line1->xSpaceR = pageWidth; } } else { // sort lines into yx order lines = NULL; while (lineList) { line0 = lineList; lineList = lineList->next; for (line1 = NULL, line2 = lines; line2 && !line0->yxBefore(line2); line1 = line2, line2 = line2->next) ; if (line1) { line1->next = line0; } else { lines = line0; } line0->next = line2; } // compute whitespace to left and right of each line line0 = lines; for (line1 = lines; line1; line1 = line1->next) { // find the first vertically overlapping line for (; line0 && line0->yMax < line1->yMin; line0 = line0->next) ; // check each vertically overlapping line -- look for the nearest // on each side line1->xSpaceL = 0; line1->xSpaceR = pageWidth; for (line2 = line0; line2 && line2->yMin < line1->yMax; line2 = line2->next) { if (line2->yMax > line1->yMin) { if (line2->xMax < line1->xMin) { if (line2->xMax > line1->xSpaceL) { line1->xSpaceL = line2->xMax; } } else if (line2->xMin > line1->xMax) { if (line2->xMin < line1->xSpaceR) { line1->xSpaceR = line2->xMin; } } } } } } // (!rawOrder)#if 0 // for debugging printf("*** lines in yx order ***\n"); for (line0 = lines; line0; line0 = line0->next) { printf("[line: x=%.2f..%.2f y=%.2f..%.2f base=%.2f xSpaceL=%.2f xSpaceR=%.2f len=%d]\n", line0->xMin, line0->xMax, line0->yMin, line0->yMax, line0->yBase, line0->xSpaceL, line0->xSpaceR, line0->len); for (word0 = line0->words; word0; word0 = word0->next) { printf(" word: x=%.2f..%.2f y=%.2f..%.2f base=%.2f fontSz=%.2f space=%d: '", word0->xMin, word0->xMax, word0->yMin, word0->yMax, word0->yBase, word0->fontSize, word0->spaceAfter); for (i = 0; i < word0->len; ++i) { fputc(word0->text[i] & 0xff, stdout); } printf("'\n"); } } printf("\n"); fflush(stdout);#endif lineList = lines; yxBlocks = NULL; blk0 = NULL; while (lineList) { // build a new block object line0 = lineList; lineList = lineList->next; line0->next = NULL; blk1 = new TextBlock(); blk1->lines = line0; blk1->xMin = line0->xMin; blk1->xMax = line0->xMax; blk1->yMin = line0->yMin; blk1->yMax = line0->yMax; blk1->xSpaceL = line0->xSpaceL; blk1->xSpaceR = line0->xSpaceR; blk1->maxFontSize = line0->fontSize; // find subsequent lines in the block while (lineList) { // look for the first horizontally overlapping line below this // one yLimit = line0->yMax + blkMaxSpacing * line0->fontSize;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -