📄 textoutputdev.cc
字号:
// expand the array if needed wordBaseIdx = (int)(word->base / textPoolStep); if (minBaseIdx > maxBaseIdx) { minBaseIdx = wordBaseIdx - 128; maxBaseIdx = wordBaseIdx + 128; pool = (TextWord **)gmallocn(maxBaseIdx - minBaseIdx + 1, sizeof(TextWord *)); for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) { pool[baseIdx - minBaseIdx] = NULL; } } else if (wordBaseIdx < minBaseIdx) { newMinBaseIdx = wordBaseIdx - 128; newPool = (TextWord **)gmallocn(maxBaseIdx - newMinBaseIdx + 1, sizeof(TextWord *)); for (baseIdx = newMinBaseIdx; baseIdx < minBaseIdx; ++baseIdx) { newPool[baseIdx - newMinBaseIdx] = NULL; } memcpy(&newPool[minBaseIdx - newMinBaseIdx], pool, (maxBaseIdx - minBaseIdx + 1) * sizeof(TextWord *)); gfree(pool); pool = newPool; minBaseIdx = newMinBaseIdx; } else if (wordBaseIdx > maxBaseIdx) { newMaxBaseIdx = wordBaseIdx + 128; pool = (TextWord **)greallocn(pool, newMaxBaseIdx - minBaseIdx + 1, sizeof(TextWord *)); for (baseIdx = maxBaseIdx + 1; baseIdx <= newMaxBaseIdx; ++baseIdx) { pool[baseIdx - minBaseIdx] = NULL; } maxBaseIdx = newMaxBaseIdx; } // insert the new word if (cursor && wordBaseIdx == cursorBaseIdx && word->primaryCmp(cursor) > 0) { w0 = cursor; w1 = cursor->next; } else { w0 = NULL; w1 = pool[wordBaseIdx - minBaseIdx]; } for (; w1 && word->primaryCmp(w1) > 0; w0 = w1, w1 = w1->next) ; word->next = w1; if (w0) { w0->next = word; } else { pool[wordBaseIdx - minBaseIdx] = word; } cursor = word; cursorBaseIdx = wordBaseIdx;}//------------------------------------------------------------------------// TextLine//------------------------------------------------------------------------TextLine::TextLine(TextBlock *blkA, int rotA, double baseA) { blk = blkA; rot = rotA; xMin = yMin = 0; xMax = yMax = -1; base = baseA; words = lastWord = NULL; text = NULL; edge = NULL; col = NULL; len = 0; convertedLen = 0; hyphenated = gFalse; next = NULL;}TextLine::~TextLine() { TextWord *word; while (words) { word = words; words = words->next; delete word; } gfree(text); gfree(edge); gfree(col);}void TextLine::addWord(TextWord *word) { if (lastWord) { lastWord->next = word; } else { words = word; } lastWord = word; if (xMin > xMax) { xMin = word->xMin; xMax = word->xMax; yMin = word->yMin; yMax = word->yMax; } else { if (word->xMin < xMin) { xMin = word->xMin; } if (word->xMax > xMax) { xMax = word->xMax; } if (word->yMin < yMin) { yMin = word->yMin; } if (word->yMax > yMax) { yMax = word->yMax; } }}double TextLine::primaryDelta(TextLine *line) { double delta; delta = 0; // make gcc happy switch (rot) { case 0: delta = line->xMin - xMax; break; case 1: delta = line->yMin - yMax; break; case 2: delta = xMin - line->xMax; break; case 3: delta = yMin - line->yMax; break; } return delta;}int TextLine::primaryCmp(TextLine *line) { double cmp; cmp = 0; // make gcc happy switch (rot) { case 0: cmp = xMin - line->xMin; break; case 1: cmp = yMin - line->yMin; break; case 2: cmp = line->xMax - xMax; break; case 3: cmp = line->yMax - yMax; break; } return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}int TextLine::secondaryCmp(TextLine *line) { double cmp; cmp = (rot == 0 || rot == 3) ? base - line->base : line->base - base; return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}int TextLine::cmpYX(TextLine *line) { int cmp; if ((cmp = secondaryCmp(line))) { return cmp; } return primaryCmp(line);}int TextLine::cmpXY(const void *p1, const void *p2) { TextLine *line1 = *(TextLine **)p1; TextLine *line2 = *(TextLine **)p2; int cmp; if ((cmp = line1->primaryCmp(line2))) { return cmp; } return line1->secondaryCmp(line2);}void TextLine::coalesce(UnicodeMap *uMap) { TextWord *word0, *word1; double space, delta, minSpace; GBool isUnicode; char buf[8]; int i, j; if (words->next) { // compute the inter-word space threshold if (words->len > 1 || words->next->len > 1) { minSpace = 0; } else { minSpace = words->primaryDelta(words->next); for (word0 = words->next, word1 = word0->next; word1 && minSpace > 0; word0 = word1, word1 = word0->next) { if (word1->len > 1) { minSpace = 0; } delta = word0->primaryDelta(word1); if (delta < minSpace) { minSpace = delta; } } } if (minSpace <= 0) { space = maxCharSpacing * words->fontSize; } else { space = maxWideCharSpacingMul * minSpace; if (space > maxWideCharSpacing * words->fontSize) { space = maxWideCharSpacing * words->fontSize; } } // merge words word0 = words; word1 = words->next; while (word1) { if (word0->primaryDelta(word1) >= space) { word0->spaceAfter = gTrue; word0 = word1; word1 = word1->next; } else if (word0->font == word1->font && word0->underlined == word1->underlined && fabs(word0->fontSize - word1->fontSize) < maxWordFontSizeDelta * words->fontSize && word1->charPos == word0->charPos + word0->charLen) { word0->merge(word1); word0->next = word1->next; delete word1; word1 = word0->next; } else { word0 = word1; word1 = word1->next; } } } // build the line text isUnicode = uMap ? uMap->isUnicode() : gFalse; len = 0; for (word1 = words; word1; word1 = word1->next) { len += word1->len; if (word1->spaceAfter) { ++len; } } text = (Unicode *)gmallocn(len, sizeof(Unicode)); edge = (double *)gmallocn(len + 1, sizeof(double)); i = 0; for (word1 = words; word1; word1 = word1->next) { for (j = 0; j < word1->len; ++j) { text[i] = word1->text[j]; edge[i] = word1->edge[j]; ++i; } edge[i] = word1->edge[word1->len]; if (word1->spaceAfter) { text[i] = (Unicode)0x0020; ++i; } } // compute convertedLen and set up the col array col = (int *)gmallocn(len + 1, sizeof(int)); convertedLen = 0; for (i = 0; i < len; ++i) { col[i] = convertedLen; if (isUnicode) { ++convertedLen; } else if (uMap) { convertedLen += uMap->mapUnicode(text[i], buf, sizeof(buf)); } } col[len] = convertedLen; // check for hyphen at end of line //~ need to check for other chars used as hyphens hyphenated = text[len - 1] == (Unicode)'-';}//------------------------------------------------------------------------// TextLineFrag//------------------------------------------------------------------------class TextLineFrag {public: TextLine *line; // the line object int start, len; // offset and length of this fragment // (in Unicode chars) double xMin, xMax; // bounding box coordinates double yMin, yMax; double base; // baseline virtual coordinate int col; // first column void init(TextLine *lineA, int startA, int lenA); void computeCoords(GBool oneRot); static int cmpYXPrimaryRot(const void *p1, const void *p2); static int cmpYXLineRot(const void *p1, const void *p2); static int cmpXYLineRot(const void *p1, const void *p2); static int cmpXYColumnPrimaryRot(const void *p1, const void *p2); static int cmpXYColumnLineRot(const void *p1, const void *p2);};void TextLineFrag::init(TextLine *lineA, int startA, int lenA) { line = lineA; start = startA; len = lenA; col = line->col[start];}void TextLineFrag::computeCoords(GBool oneRot) { TextBlock *blk; double d0, d1, d2, d3, d4; if (oneRot) { switch (line->rot) { case 0: xMin = line->edge[start]; xMax = line->edge[start + len]; yMin = line->yMin; yMax = line->yMax; break; case 1: xMin = line->xMin; xMax = line->xMax; yMin = line->edge[start]; yMax = line->edge[start + len]; break; case 2: xMin = line->edge[start + len]; xMax = line->edge[start]; yMin = line->yMin; yMax = line->yMax; break; case 3: xMin = line->xMin; xMax = line->xMax; yMin = line->edge[start + len]; yMax = line->edge[start]; break; } base = line->base; } else { if (line->rot == 0 && line->blk->page->primaryRot == 0) { xMin = line->edge[start]; xMax = line->edge[start + len]; yMin = line->yMin; yMax = line->yMax; base = line->base; } else { blk = line->blk; d0 = line->edge[start]; d1 = line->edge[start + len]; d2 = d3 = d4 = 0; // make gcc happy switch (line->rot) { case 0: d2 = line->yMin; d3 = line->yMax; d4 = line->base; d0 = (d0 - blk->xMin) / (blk->xMax - blk->xMin); d1 = (d1 - blk->xMin) / (blk->xMax - blk->xMin); d2 = (d2 - blk->yMin) / (blk->yMax - blk->yMin); d3 = (d3 - blk->yMin) / (blk->yMax - blk->yMin); d4 = (d4 - blk->yMin) / (blk->yMax - blk->yMin); break; case 1: d2 = line->xMax; d3 = line->xMin; d4 = line->base; d0 = (d0 - blk->yMin) / (blk->yMax - blk->yMin); d1 = (d1 - blk->yMin) / (blk->yMax - blk->yMin); d2 = (blk->xMax - d2) / (blk->xMax - blk->xMin); d3 = (blk->xMax - d3) / (blk->xMax - blk->xMin); d4 = (blk->xMax - d4) / (blk->xMax - blk->xMin); break; case 2: d2 = line->yMax; d3 = line->yMin; d4 = line->base; d0 = (blk->xMax - d0) / (blk->xMax - blk->xMin); d1 = (blk->xMax - d1) / (blk->xMax - blk->xMin); d2 = (blk->yMax - d2) / (blk->yMax - blk->yMin); d3 = (blk->yMax - d3) / (blk->yMax - blk->yMin); d4 = (blk->yMax - d4) / (blk->yMax - blk->yMin); break; case 3: d2 = line->xMin; d3 = line->xMax; d4 = line->base; d0 = (blk->yMax - d0) / (blk->yMax - blk->yMin); d1 = (blk->yMax - d1) / (blk->yMax - blk->yMin); d2 = (d2 - blk->xMin) / (blk->xMax - blk->xMin); d3 = (d3 - blk->xMin) / (blk->xMax - blk->xMin); d4 = (d4 - blk->xMin) / (blk->xMax - blk->xMin); break; } switch (line->blk->page->primaryRot) { case 0: xMin = blk->xMin + d0 * (blk->xMax - blk->xMin); xMax = blk->xMin + d1 * (blk->xMax - blk->xMin); yMin = blk->yMin + d2 * (blk->yMax - blk->yMin); yMax = blk->yMin + d3 * (blk->yMax - blk->yMin); base = blk->yMin + base * (blk->yMax - blk->yMin); break; case 1: xMin = blk->xMax - d3 * (blk->xMax - blk->xMin); xMax = blk->xMax - d2 * (blk->xMax - blk->xMin); yMin = blk->yMin + d0 * (blk->yMax - blk->yMin); yMax = blk->yMin + d1 * (blk->yMax - blk->yMin); base = blk->xMax - d4 * (blk->xMax - blk->xMin); break; case 2: xMin = blk->xMax - d1 * (blk->xMax - blk->xMin); xMax = blk->xMax - d0 * (blk->xMax - blk->xMin); yMin = blk->yMax - d3 * (blk->yMax - blk->yMin); yMax = blk->yMax - d2 * (blk->yMax - blk->yMin); base = blk->yMax - d4 * (blk->yMax - blk->yMin); break; case 3: xMin = blk->xMin + d2 * (blk->xMax - blk->xMin); xMax = blk->xMin + d3 * (blk->xMax - blk->xMin); yMin = blk->yMax - d1 * (blk->yMax - blk->yMin); yMax = blk->yMax - d0 * (blk->yMax - blk->yMin); base = blk->xMin + d4 * (blk->xMax - blk->xMin); break; } } }}int TextLineFrag::cmpYXPrimaryRot(const void *p1, const void *p2) { TextLineFrag *frag1 = (TextLineFrag *)p1; TextLineFrag *frag2 = (TextLineFrag *)p2; double cmp; cmp = 0; // make gcc happy switch (frag1->line->blk->page->primaryRot) { case 0: if (fabs(cmp = frag1->yMin - frag2->yMin) < 0.01) { cmp = frag1->xMin - frag2->xMin; } break; case 1: if (fabs(cmp = frag2->xMax - frag1->xMax) < 0.01) { cmp = frag1->yMin - frag2->yMin; } break; case 2: if (fabs(cmp = frag2->yMin - frag1->yMin) < 0.01) { cmp = frag2->xMax - frag1->xMax; } break; case 3: if (fabs(cmp = frag1->xMax - frag2->xMax) < 0.01) { cmp = frag2->yMax - frag1->yMax; } break; } return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}int TextLineFrag::cmpYXLineRot(const void *p1, const void *p2) { TextLineFrag *frag1 = (TextLineFrag *)p1; TextLineFrag *frag2 = (TextLineFrag *)p2; double cmp; cmp = 0; // make gcc happy switch (frag1->line->rot) { case 0: if ((cmp = frag1->yMin - frag2->yMin) == 0) { cmp = frag1->xMin - frag2->xMin; } break; case 1: if ((cmp = frag2->xMax - frag1->xMax) == 0) { cmp = frag1->yMin - frag2->yMin; } break; case 2: if ((cmp = frag2->yMin - frag1->yMin) == 0) { cmp = frag2->xMax - frag1->xMax; } break; case 3: if ((cmp = frag1->xMax - frag2->xMax) == 0) { cmp = frag2->yMax - frag1->yMax; } break; } return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}int TextLineFrag::cmpXYLineRot(const void *p1, const void *p2) { TextLineFrag *frag1 = (TextLineFrag *)p1; TextLineFrag *frag2 = (TextLineFrag *)p2; double cmp;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -