oldbasel.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 1,690 行 · 第 1/5 页

CPP
1,690
字号
 * find_lesser_parts * * Average the step from the spline for the other partitions * and find the commonest partition which has a descender. **********************************************************************/voidfind_lesser_parts (              //get descendersTO_ROW * row,                    //row to processTBOX blobcoords[],                //bounding boxesint blobcount,                   /*no of blobs */char partids[],                  /*partition of each blob */int partsizes[],                 /*size of each part */int partcount,                   /*no of partitions */int bestpart                     /*biggest partition */) {  register int blobindex;        /*index of blob */  register int partition;        /*current partition */  int xcentre;                   /*centre of blob */  int poscount;                  /*count of best up step */  int negcount;                  /*count of best down step */  float partsteps[MAXPARTS];     /*average step to part */  float bestpos;                 /*best up step */  float bestneg;                 /*best down step */  int runlength;                 /*length of bad run */  int biggestrun;                /*biggest bad run */  biggestrun = 0;  for (partition = 0; partition < partcount; partition++)    partsteps[partition] = 0.0;  /*zero accumulators */  for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {    xcentre = (blobcoords[blobindex].left ()      + blobcoords[blobindex].right ()) >> 1;                                 /*in other parts */    if (partids[blobindex] != bestpart) {      runlength++;               /*run of non bests */      if (runlength > biggestrun)        biggestrun = runlength;      partsteps[partids[blobindex]] += blobcoords[blobindex].bottom ()        - row->baseline.y (xcentre);    }    else      runlength = 0;  }  if (biggestrun > MAXBADRUN)    row->xheight = -1.0f;        /*failed */  else    row->xheight = 1.0f;         /*success */  poscount = negcount = 0;  bestpos = bestneg = 0.0;       /*no step yet */  for (partition = 0; partition < partcount; partition++) {    if (partition != bestpart) {	//by jetsoft divide by zero possible		if (partsizes[partition]==0)		partsteps[partition]=0;       else		partsteps[partition] /= partsizes[partition];	//      if (partsteps[partition] >= MINASCRISE      && partsizes[partition] > poscount) {                                 /*ascender rise */        bestpos = partsteps[partition];                                 /*2nd most popular */        poscount = partsizes[partition];      }      if (partsteps[partition] <= -MINASCRISE      && partsizes[partition] > negcount) {                                 /*ascender rise */        bestneg = partsteps[partition];                                 /*2nd most popular */        negcount = partsizes[partition];      }    }  }                                 /*average x-height */  partsteps[bestpart] /= blobcount;  row->descdrop = bestneg;}/********************************************************************** * old_first_xheight * * Makes an x-height spline by copying the baseline and shifting it. * It estimates the x-height across the line to use as the shift. * It also finds the ascender height if it can. **********************************************************************/voidold_first_xheight (              //the wiseowl wayTO_ROW * row,                    /*current row */TBOX blobcoords[],                /*blob bounding boxes */int initialheight,               //initial guessint blobcount,                   /*blobs in blobcoords */QSPLINE * baseline,              /*established */float jumplimit                  /*min ascender height */) {  register int blobindex;        /*current blob */                                 /*height statistics */  STATS heightstat (0, MAXHEIGHT);  int height;                    /*height of blob */  int xcentre;                   /*centre of blob */  int lineheight;                /*approx xheight */  float ascenders;               /*ascender sum */  int asccount;                  /*no of ascenders */  float xsum;                    /*xheight sum */  int xcount;                    /*xheight count */  register float diff;           /*height difference */  if (blobcount > 1) {    for (blobindex = 0; blobindex < blobcount; blobindex++) {      xcentre = (blobcoords[blobindex].left ()        + blobcoords[blobindex].right ()) / 2;                                 /*height of blob */      height = (int) (blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5);      if (height > initialheight * oldbl_xhfract        && height > textord_min_xheight)        heightstat.add (height, 1);    }    if (heightstat.get_total () > 3) {      lineheight = (int) heightstat.ile (0.25);      if (lineheight <= 0)        lineheight = (int) heightstat.ile (0.5);    }    else      lineheight = initialheight;  }  else {    lineheight = (int) (blobcoords[0].top ()      - baseline->y ((blobcoords[0].left ()      + blobcoords[0].right ()) / 2) +      0.5);  }  xsum = 0.0f;  xcount = 0;  for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;  blobindex++) {    xcentre = (blobcoords[blobindex].left ()      + blobcoords[blobindex].right ()) / 2;    diff = blobcoords[blobindex].top () - baseline->y (xcentre);                                 /*is it ascender */    if (diff > lineheight + jumplimit) {      ascenders += diff;      asccount++;                /*count ascenders */    }    else if (diff > lineheight - jumplimit) {      xsum += diff;              /*mean xheight */      xcount++;    }  }  if (xcount > 0)    xsum /= xcount;              /*average xheight */  else    xsum = (float) lineheight;   /*guess it */  row->xheight *= xsum;  if (asccount > 0)    row->ascrise = ascenders / asccount - xsum;  else    row->ascrise = 0.0f;         /*had none */  if (row->xheight == 0)    row->xheight = -1.0f;}/********************************************************************** * make_first_xheight * * Makes an x-height spline by copying the baseline and shifting it. * It estimates the x-height across the line to use as the shift. * It also finds the ascender height if it can. **********************************************************************/voidmake_first_xheight (             //find xheightTO_ROW * row,                    /*current row */TBOX blobcoords[],                /*blob bounding boxes */int lineheight,                  //initial guessint init_lineheight,             //block level guessint blobcount,                   /*blobs in blobcoords */QSPLINE * baseline,              /*established */float jumplimit                  /*min ascender height */) {  STATS heightstat (0, HEIGHTBUCKETS);  int lefts[HEIGHTBUCKETS];  int rights[HEIGHTBUCKETS];  int modelist[MODENUM];  int blobindex;  int mode_count;                //blobs to count in thr  int sign_bit;  int mode_threshold;  const int kBaselineTouch = 2;  // This really should change with resolution.  const int kGoodStrength = 8;  // Strength of baseline-touching heights.  const float kMinHeight = 0.25;  // Min fraction of lineheight to use.  sign_bit = row->xheight > 0 ? 1 : -1;  memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0]));  memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0]));  mode_count = 0;  for (blobindex = 0; blobindex < blobcount; blobindex++) {    int xcenter = (blobcoords[blobindex].left () +        blobcoords[blobindex].right ()) / 2;    float base = baseline->y(xcenter);    float bottomdiff = fabs(base - blobcoords[blobindex].bottom());    int strength = textord_ocropus_mode &&                   bottomdiff <= kBaselineTouch ? kGoodStrength : 1;    int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5);    if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) {      if (height > lineheight * oldbl_xhfract        && height > textord_min_xheight) {        heightstat.add (height, strength);        if (height < HEIGHTBUCKETS) {          if (xcenter > rights[height])            rights[height] = xcenter;          if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height]))            lefts[height] = xcenter;        }      }      mode_count += strength;    }  }  mode_threshold = (int) (blobcount * 0.1);  if (oldbl_dot_error_size > 1 || oldbl_xhfix)    mode_threshold = (int) (mode_count * 0.1);  if (textord_oldbl_debug) {    tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n",      blobcount, mode_count, mode_threshold);  }  find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);  if (textord_oldbl_debug) {    for (blobindex = 0; blobindex < MODENUM; blobindex++)      tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]);    tprintf ("\n");  }  pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);  if (textord_oldbl_debug)    tprintf ("Output xheight=%g\n", row->xheight);  if (row->xheight < 0 && textord_oldbl_debug)    tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight);  if (sign_bit < 0)    row->xheight = -row->xheight;}/********************************************************************** * find_top_modes * * Fill the input array with the indices of the top ten modes of the * input distribution. **********************************************************************/const int kMinModeFactorOcropus = 32;const int kMinModeFactor = 12;voidfind_top_modes (                 //get modesSTATS * stats,                   //stats to hackint statnum,                     //no of pilesint modelist[], int modenum      //no of modes to get) {  int mode_count;  int last_i = 0;  int last_max = MAX_INT32;  int i;  int mode;  int total_max = 0;  int mode_factor = textord_ocropus_mode ?                    kMinModeFactorOcropus : kMinModeFactor;  for (mode_count = 0; mode_count < modenum; mode_count++) {    mode = 0;    for (i = 0; i < statnum; i++) {      if (stats->pile_count (i) > stats->pile_count (mode)) {        if ((stats->pile_count (i) < last_max) ||        ((stats->pile_count (i) == last_max) && (i > last_i))) {          mode = i;        }      }    }    last_i = mode;    last_max = stats->pile_count (last_i);    total_max += last_max;    if (last_max <= total_max / mode_factor)      mode = 0;    modelist[mode_count] = mode;  }}/********************************************************************** * pick_x_height * * Choose based on the height modes the best x height value. **********************************************************************/void pick_x_height(TO_ROW * row,                    //row to do                   int modelist[],                   int lefts[], int rights[],                   STATS * heightstat,                   int mode_threshold) {  int x;  int y;  int z;  float ratio;  int found_one_bigger = FALSE;  int best_x_height = 0;  int best_asc = 0;  int num_in_best;  for (x = 0; x < MODENUM; x++) {    for (y = 0; y < MODENUM; y++) {      /* Check for two modes */      if (modelist[x] && modelist[y] &&          heightstat->pile_count (modelist[x]) > mode_threshold &&          (!textord_ocropus_mode ||           MIN(rights[modelist[x]], rights[modelist[y]]) >           MAX(lefts[modelist[x]], lefts[modelist[y]]))) {        ratio = (float) modelist[y] / (float) modelist[x];        if (1.2 < ratio && ratio < 1.8) {          /* Two modes found */          best_x_height = modelist[x];          num_in_best = heightstat->pile_count (modelist[x]);          /* Try to get one higher */          do {            found_one_bigger = FALSE;            for (z = 0; z < MODENUM; z++) {              if (modelist[z] == best_x_height + 1 &&                  (!textord_ocropus_mode ||                    MIN(rights[modelist[x]], rights[modelist[y]]) >                    MAX(lefts[modelist[x]], lefts[modeli

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?