oldbasel.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 1,690 行 · 第 1/5 页
CPP
1,690 行
* find_lesser_parts * * Average the step from the spline for the other partitions * and find the commonest partition which has a descender. **********************************************************************/voidfind_lesser_parts ( //get descendersTO_ROW * row, //row to processTBOX blobcoords[], //bounding boxesint blobcount, /*no of blobs */char partids[], /*partition of each blob */int partsizes[], /*size of each part */int partcount, /*no of partitions */int bestpart /*biggest partition */) { register int blobindex; /*index of blob */ register int partition; /*current partition */ int xcentre; /*centre of blob */ int poscount; /*count of best up step */ int negcount; /*count of best down step */ float partsteps[MAXPARTS]; /*average step to part */ float bestpos; /*best up step */ float bestneg; /*best down step */ int runlength; /*length of bad run */ int biggestrun; /*biggest bad run */ biggestrun = 0; for (partition = 0; partition < partcount; partition++) partsteps[partition] = 0.0; /*zero accumulators */ for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) { xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1; /*in other parts */ if (partids[blobindex] != bestpart) { runlength++; /*run of non bests */ if (runlength > biggestrun) biggestrun = runlength; partsteps[partids[blobindex]] += blobcoords[blobindex].bottom () - row->baseline.y (xcentre); } else runlength = 0; } if (biggestrun > MAXBADRUN) row->xheight = -1.0f; /*failed */ else row->xheight = 1.0f; /*success */ poscount = negcount = 0; bestpos = bestneg = 0.0; /*no step yet */ for (partition = 0; partition < partcount; partition++) { if (partition != bestpart) { //by jetsoft divide by zero possible if (partsizes[partition]==0) partsteps[partition]=0; else partsteps[partition] /= partsizes[partition]; // if (partsteps[partition] >= MINASCRISE && partsizes[partition] > poscount) { /*ascender rise */ bestpos = partsteps[partition]; /*2nd most popular */ poscount = partsizes[partition]; } if (partsteps[partition] <= -MINASCRISE && partsizes[partition] > negcount) { /*ascender rise */ bestneg = partsteps[partition]; /*2nd most popular */ negcount = partsizes[partition]; } } } /*average x-height */ partsteps[bestpart] /= blobcount; row->descdrop = bestneg;}/********************************************************************** * old_first_xheight * * Makes an x-height spline by copying the baseline and shifting it. * It estimates the x-height across the line to use as the shift. * It also finds the ascender height if it can. **********************************************************************/voidold_first_xheight ( //the wiseowl wayTO_ROW * row, /*current row */TBOX blobcoords[], /*blob bounding boxes */int initialheight, //initial guessint blobcount, /*blobs in blobcoords */QSPLINE * baseline, /*established */float jumplimit /*min ascender height */) { register int blobindex; /*current blob */ /*height statistics */ STATS heightstat (0, MAXHEIGHT); int height; /*height of blob */ int xcentre; /*centre of blob */ int lineheight; /*approx xheight */ float ascenders; /*ascender sum */ int asccount; /*no of ascenders */ float xsum; /*xheight sum */ int xcount; /*xheight count */ register float diff; /*height difference */ if (blobcount > 1) { for (blobindex = 0; blobindex < blobcount; blobindex++) { xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) / 2; /*height of blob */ height = (int) (blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5); if (height > initialheight * oldbl_xhfract && height > textord_min_xheight) heightstat.add (height, 1); } if (heightstat.get_total () > 3) { lineheight = (int) heightstat.ile (0.25); if (lineheight <= 0) lineheight = (int) heightstat.ile (0.5); } else lineheight = initialheight; } else { lineheight = (int) (blobcoords[0].top () - baseline->y ((blobcoords[0].left () + blobcoords[0].right ()) / 2) + 0.5); } xsum = 0.0f; xcount = 0; for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount; blobindex++) { xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) / 2; diff = blobcoords[blobindex].top () - baseline->y (xcentre); /*is it ascender */ if (diff > lineheight + jumplimit) { ascenders += diff; asccount++; /*count ascenders */ } else if (diff > lineheight - jumplimit) { xsum += diff; /*mean xheight */ xcount++; } } if (xcount > 0) xsum /= xcount; /*average xheight */ else xsum = (float) lineheight; /*guess it */ row->xheight *= xsum; if (asccount > 0) row->ascrise = ascenders / asccount - xsum; else row->ascrise = 0.0f; /*had none */ if (row->xheight == 0) row->xheight = -1.0f;}/********************************************************************** * make_first_xheight * * Makes an x-height spline by copying the baseline and shifting it. * It estimates the x-height across the line to use as the shift. * It also finds the ascender height if it can. **********************************************************************/voidmake_first_xheight ( //find xheightTO_ROW * row, /*current row */TBOX blobcoords[], /*blob bounding boxes */int lineheight, //initial guessint init_lineheight, //block level guessint blobcount, /*blobs in blobcoords */QSPLINE * baseline, /*established */float jumplimit /*min ascender height */) { STATS heightstat (0, HEIGHTBUCKETS); int lefts[HEIGHTBUCKETS]; int rights[HEIGHTBUCKETS]; int modelist[MODENUM]; int blobindex; int mode_count; //blobs to count in thr int sign_bit; int mode_threshold; const int kBaselineTouch = 2; // This really should change with resolution. const int kGoodStrength = 8; // Strength of baseline-touching heights. const float kMinHeight = 0.25; // Min fraction of lineheight to use. sign_bit = row->xheight > 0 ? 1 : -1; memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0])); memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0])); mode_count = 0; for (blobindex = 0; blobindex < blobcount; blobindex++) { int xcenter = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) / 2; float base = baseline->y(xcenter); float bottomdiff = fabs(base - blobcoords[blobindex].bottom()); int strength = textord_ocropus_mode && bottomdiff <= kBaselineTouch ? kGoodStrength : 1; int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5); if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) { if (height > lineheight * oldbl_xhfract && height > textord_min_xheight) { heightstat.add (height, strength); if (height < HEIGHTBUCKETS) { if (xcenter > rights[height]) rights[height] = xcenter; if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height])) lefts[height] = xcenter; } } mode_count += strength; } } mode_threshold = (int) (blobcount * 0.1); if (oldbl_dot_error_size > 1 || oldbl_xhfix) mode_threshold = (int) (mode_count * 0.1); if (textord_oldbl_debug) { tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n", blobcount, mode_count, mode_threshold); } find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM); if (textord_oldbl_debug) { for (blobindex = 0; blobindex < MODENUM; blobindex++) tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]); tprintf ("\n"); } pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold); if (textord_oldbl_debug) tprintf ("Output xheight=%g\n", row->xheight); if (row->xheight < 0 && textord_oldbl_debug) tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight); if (sign_bit < 0) row->xheight = -row->xheight;}/********************************************************************** * find_top_modes * * Fill the input array with the indices of the top ten modes of the * input distribution. **********************************************************************/const int kMinModeFactorOcropus = 32;const int kMinModeFactor = 12;voidfind_top_modes ( //get modesSTATS * stats, //stats to hackint statnum, //no of pilesint modelist[], int modenum //no of modes to get) { int mode_count; int last_i = 0; int last_max = MAX_INT32; int i; int mode; int total_max = 0; int mode_factor = textord_ocropus_mode ? kMinModeFactorOcropus : kMinModeFactor; for (mode_count = 0; mode_count < modenum; mode_count++) { mode = 0; for (i = 0; i < statnum; i++) { if (stats->pile_count (i) > stats->pile_count (mode)) { if ((stats->pile_count (i) < last_max) || ((stats->pile_count (i) == last_max) && (i > last_i))) { mode = i; } } } last_i = mode; last_max = stats->pile_count (last_i); total_max += last_max; if (last_max <= total_max / mode_factor) mode = 0; modelist[mode_count] = mode; }}/********************************************************************** * pick_x_height * * Choose based on the height modes the best x height value. **********************************************************************/void pick_x_height(TO_ROW * row, //row to do int modelist[], int lefts[], int rights[], STATS * heightstat, int mode_threshold) { int x; int y; int z; float ratio; int found_one_bigger = FALSE; int best_x_height = 0; int best_asc = 0; int num_in_best; for (x = 0; x < MODENUM; x++) { for (y = 0; y < MODENUM; y++) { /* Check for two modes */ if (modelist[x] && modelist[y] && heightstat->pile_count (modelist[x]) > mode_threshold && (!textord_ocropus_mode || MIN(rights[modelist[x]], rights[modelist[y]]) > MAX(lefts[modelist[x]], lefts[modelist[y]]))) { ratio = (float) modelist[y] / (float) modelist[x]; if (1.2 < ratio && ratio < 1.8) { /* Two modes found */ best_x_height = modelist[x]; num_in_best = heightstat->pile_count (modelist[x]); /* Try to get one higher */ do { found_one_bigger = FALSE; for (z = 0; z < MODENUM; z++) { if (modelist[z] == best_x_height + 1 && (!textord_ocropus_mode || MIN(rights[modelist[x]], rights[modelist[y]]) > MAX(lefts[modelist[x]], lefts[modeli
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?