📄 textoutputdev.cc
字号:
cmp = 0; // make gcc happy switch (frag1->line->rot) { case 0: if ((cmp = frag1->xMin - frag2->xMin) == 0) { cmp = frag1->yMin - frag2->yMin; } break; case 1: if ((cmp = frag1->yMin - frag2->yMin) == 0) { cmp = frag2->xMax - frag1->xMax; } break; case 2: if ((cmp = frag2->xMax - frag1->xMax) == 0) { cmp = frag2->yMin - frag1->yMin; } break; case 3: if ((cmp = frag2->yMax - frag1->yMax) == 0) { cmp = frag1->xMax - frag2->xMax; } break; } return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}int TextLineFrag::cmpXYColumnPrimaryRot(const void *p1, const void *p2) { TextLineFrag *frag1 = (TextLineFrag *)p1; TextLineFrag *frag2 = (TextLineFrag *)p2; double cmp; // if columns overlap, compare y values if (frag1->col < frag2->col + (frag2->line->col[frag2->start + frag2->len] - frag2->line->col[frag2->start]) && frag2->col < frag1->col + (frag1->line->col[frag1->start + frag1->len] - frag1->line->col[frag1->start])) { cmp = 0; // make gcc happy switch (frag1->line->blk->page->primaryRot) { case 0: cmp = frag1->yMin - frag2->yMin; break; case 1: cmp = frag2->xMax - frag1->xMax; break; case 2: cmp = frag2->yMin - frag1->yMin; break; case 3: cmp = frag1->xMax - frag2->xMax; break; } return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; } // otherwise, compare starting column return frag1->col - frag2->col;}int TextLineFrag::cmpXYColumnLineRot(const void *p1, const void *p2) { TextLineFrag *frag1 = (TextLineFrag *)p1; TextLineFrag *frag2 = (TextLineFrag *)p2; double cmp; // if columns overlap, compare y values if (frag1->col < frag2->col + (frag2->line->col[frag2->start + frag2->len] - frag2->line->col[frag2->start]) && frag2->col < frag1->col + (frag1->line->col[frag1->start + frag1->len] - frag1->line->col[frag1->start])) { cmp = 0; // make gcc happy switch (frag1->line->rot) { case 0: cmp = frag1->yMin - frag2->yMin; break; case 1: cmp = frag2->xMax - frag1->xMax; break; case 2: cmp = frag2->yMin - frag1->yMin; break; case 3: cmp = frag1->xMax - frag2->xMax; break; } return cmp < 0 ? -1 : cmp > 0 ? 1 : 0; } // otherwise, compare starting column return frag1->col - frag2->col;}//------------------------------------------------------------------------// TextBlock//------------------------------------------------------------------------TextBlock::TextBlock(TextPage *pageA, int rotA) { page = pageA; rot = rotA; xMin = yMin = 0; xMax = yMax = -1; priMin = 0; priMax = page->pageWidth; pool = new TextPool(); lines = NULL; curLine = NULL; next = NULL; stackNext = NULL;}TextBlock::~TextBlock() { TextLine *line; delete pool; while (lines) { line = lines; lines = lines->next; delete line; }}void TextBlock::addWord(TextWord *word) { pool->addWord(word); if (xMin > xMax) { xMin = word->xMin; xMax = word->xMax; yMin = word->yMin; yMax = word->yMax; } else { if (word->xMin < xMin) { xMin = word->xMin; } if (word->xMax > xMax) { xMax = word->xMax; } if (word->yMin < yMin) { yMin = word->yMin; } if (word->yMax > yMax) { yMax = word->yMax; } }}void TextBlock::coalesce(UnicodeMap *uMap) { TextWord *word0, *word1, *word2, *bestWord0, *bestWord1, *lastWord; TextLine *line, *line0, *line1; int poolMinBaseIdx, startBaseIdx, minBaseIdx, maxBaseIdx; int baseIdx, bestWordBaseIdx, idx0, idx1; double minBase, maxBase; double fontSize, delta, priDelta, secDelta; TextLine **lineArray; GBool found; int col1, col2; int i, j, k; // discard duplicated text (fake boldface, drop shadows) for (idx0 = pool->minBaseIdx; idx0 <= pool->maxBaseIdx; ++idx0) { word0 = pool->getPool(idx0); while (word0) { priDelta = dupMaxPriDelta * word0->fontSize; secDelta = dupMaxSecDelta * word0->fontSize; if (rot == 0 || rot == 3) { maxBaseIdx = pool->getBaseIdx(word0->base + secDelta); } else { maxBaseIdx = pool->getBaseIdx(word0->base - secDelta); } found = gFalse; word1 = word2 = NULL; // make gcc happy for (idx1 = idx0; idx1 <= maxBaseIdx; ++idx1) { if (idx1 == idx0) { word1 = word0; word2 = word0->next; } else { word1 = NULL; word2 = pool->getPool(idx1); } for (; word2; word1 = word2, word2 = word2->next) { if (word2->len == word0->len && !memcmp(word2->text, word0->text, word0->len * sizeof(Unicode))) { switch (rot) { case 0: case 2: found = fabs(word0->xMin - word2->xMin) < priDelta && fabs(word0->xMax - word2->xMax) < priDelta && fabs(word0->yMin - word2->yMin) < secDelta && fabs(word0->yMax - word2->yMax) < secDelta; break; case 1: case 3: found = fabs(word0->xMin - word2->xMin) < secDelta && fabs(word0->xMax - word2->xMax) < secDelta && fabs(word0->yMin - word2->yMin) < priDelta && fabs(word0->yMax - word2->yMax) < priDelta; break; } } if (found) { break; } } if (found) { break; } } if (found) { if (word1) { word1->next = word2->next; } else { pool->setPool(idx1, word2->next); } delete word2; } else { word0 = word0->next; } } } // build the lines curLine = NULL; poolMinBaseIdx = pool->minBaseIdx; charCount = 0; nLines = 0; while (1) { // find the first non-empty line in the pool for (; poolMinBaseIdx <= pool->maxBaseIdx && !pool->getPool(poolMinBaseIdx); ++poolMinBaseIdx) ; if (poolMinBaseIdx > pool->maxBaseIdx) { break; } // look for the left-most word in the first four lines of the // pool -- this avoids starting with a superscript word startBaseIdx = poolMinBaseIdx; for (baseIdx = poolMinBaseIdx + 1; baseIdx < poolMinBaseIdx + 4 && baseIdx <= pool->maxBaseIdx; ++baseIdx) { if (!pool->getPool(baseIdx)) { continue; } if (pool->getPool(baseIdx)->primaryCmp(pool->getPool(startBaseIdx)) < 0) { startBaseIdx = baseIdx; } } // create a new line word0 = pool->getPool(startBaseIdx); pool->setPool(startBaseIdx, word0->next); word0->next = NULL; line = new TextLine(this, word0->rot, word0->base); line->addWord(word0); lastWord = word0; // compute the search range fontSize = word0->fontSize; minBase = word0->base - maxIntraLineDelta * fontSize; maxBase = word0->base + maxIntraLineDelta * fontSize; minBaseIdx = pool->getBaseIdx(minBase); maxBaseIdx = pool->getBaseIdx(maxBase); // find the rest of the words in this line while (1) { // find the left-most word whose baseline is in the range for // this line bestWordBaseIdx = 0; bestWord0 = bestWord1 = NULL; for (baseIdx = minBaseIdx; baseIdx <= maxBaseIdx; ++baseIdx) { for (word0 = NULL, word1 = pool->getPool(baseIdx); word1; word0 = word1, word1 = word1->next) { if (word1->base >= minBase && word1->base <= maxBase && (delta = lastWord->primaryDelta(word1)) >= minCharSpacing * fontSize) { if (delta < maxWordSpacing * fontSize && (!bestWord1 || word1->primaryCmp(bestWord1) < 0)) { bestWordBaseIdx = baseIdx; bestWord0 = word0; bestWord1 = word1; } break; } } } if (!bestWord1) { break; } // remove it from the pool, and add it to the line if (bestWord0) { bestWord0->next = bestWord1->next; } else { pool->setPool(bestWordBaseIdx, bestWord1->next); } bestWord1->next = NULL; line->addWord(bestWord1); lastWord = bestWord1; } // add the line if (curLine && line->cmpYX(curLine) > 0) { line0 = curLine; line1 = curLine->next; } else { line0 = NULL; line1 = lines; } for (; line1 && line->cmpYX(line1) > 0; line0 = line1, line1 = line1->next) ; if (line0) { line0->next = line; } else { lines = line; } line->next = line1; curLine = line; line->coalesce(uMap); charCount += line->len; ++nLines; } // sort lines into xy order for column assignment lineArray = (TextLine **)gmallocn(nLines, sizeof(TextLine *)); for (line = lines, i = 0; line; line = line->next, ++i) { lineArray[i] = line; } qsort(lineArray, nLines, sizeof(TextLine *), &TextLine::cmpXY); // column assignment nColumns = 0; for (i = 0; i < nLines; ++i) { line0 = lineArray[i]; col1 = 0; for (j = 0; j < i; ++j) { line1 = lineArray[j]; if (line1->primaryDelta(line0) >= 0) { col2 = line1->col[line1->len] + 1; } else { k = 0; // make gcc happy switch (rot) { case 0: for (k = 0; k < line1->len && line0->xMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]); ++k) ; break; case 1: for (k = 0; k < line1->len && line0->yMin >= 0.5 * (line1->edge[k] + line1->edge[k+1]); ++k) ; break; case 2: for (k = 0; k < line1->len && line0->xMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]); ++k) ; break; case 3: for (k = 0; k < line1->len && line0->yMax <= 0.5 * (line1->edge[k] + line1->edge[k+1]); ++k) ; break; } col2 = line1->col[k]; } if (col2 > col1) { col1 = col2; } } for (k = 0; k <= line0->len; ++k) { line0->col[k] += col1; } if (line0->col[line0->len] > nColumns) { nColumns = line0->col[line0->len]; } } gfree(lineArray);}void TextBlock::updatePriMinMax(TextBlock *blk) { double newPriMin, newPriMax; GBool gotPriMin, gotPriMax; gotPriMin = gotPriMax = gFalse; newPriMin = newPriMax = 0; // make gcc happy switch (page->primaryRot) { case 0: case 2: if (blk->yMin < yMax && blk->yMax > yMin) { if (blk->xMin < xMin) { newPriMin = blk->xMax; gotPriMin = gTrue; } if (blk->xMax > xMax) { newPriMax = blk->xMin; gotPriMax = gTrue; } } break; case 1: case 3: if (blk->xMin < xMax && blk->xMax > xMin) { if (blk->yMin < yMin) { newPriMin = blk->yMax; gotPriMin = gTrue; } if (blk->yMax > yMax) { newPriMax = blk->yMin; gotPriMax = gTrue; } } break; } if (gotPriMin) { if (newPriMin > xMin) { newPriMin = xMin; } if (newPriMin > priMin) { priMin = newPriMin; } } if (gotPriMax) { if (newPriMax < xMax) { newPriMax = xMax; } if (newPriMax < priMax) { priMax = newPriMax; } }}int TextBlock::cmpXYPrimaryRot(const void *p1, const void *p2) { TextBlock *blk1 = *(TextBlock **)p1; TextBlock *blk2 = *(TextBlock **)p2; double cmp; cmp = 0; // make gcc happy switch (blk1->page->primaryRot) { case 0: if ((cmp = blk1->xMin - blk2->xMin) == 0) { cmp = blk1->yMin - blk2->yMin; } break; case 1: if ((cmp = blk1->yMin - blk2->yMin) == 0) { cmp = blk2->xMax - blk1->xMax; } break; case 2: if ((cmp = blk2->xMax - blk1->xMax) == 0) { cmp = blk2->yMin - blk1->yMin; } break; case 3: if ((cmp = blk2->yMax - blk1->yMax) == 0) { cmp = blk1->xMax - blk2->xMax; } break; } return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}int TextBlock::cmpYXPrimaryRot(const void *p1, const void *p2) { TextBlock *blk1 = *(TextBlock **)p1; TextBlock *blk2 = *(TextBlock **)p2; double cmp; cmp = 0; // make gcc happy switch (blk1->page->primaryRot) { case 0: if ((cmp = blk1->yMin - blk2->yMin) == 0) { cmp = blk1->xMin - blk2->xMin; } break; case 1: if ((cmp = blk2->xMax - blk1->xMax) == 0) { cmp = blk1->yMin - blk2->yMin; } break; case 2: if ((cmp = blk2->yMin - blk1->yMin) == 0) { cmp = blk2->xMax - blk1->xMax; } break; case 3: if ((cmp = blk1->xMax - blk2->xMax) == 0) { cmp = blk2->yMax - blk1->yMax; } break; } return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}int TextBlock::primaryCmp(TextBlock *blk) { double cmp; cmp = 0; // make gcc happy switch (rot) { case 0: cmp = xMin - blk->xMin; break; case 1: cmp = yMin - blk->yMin; break; case 2: cmp = blk->xMax - xMax; break; case 3: cmp = blk->yMax - yMax; break; } return cmp < 0 ? -1 : cmp > 0 ? 1 : 0;}double TextBlock::secondaryDelta(TextBlock *blk) { double delta; delta = 0; // make gcc happy switch (rot) { case 0: delta = blk->yMin - yMax; break; case 1:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -