makerow.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 1,682 行 · 第 1/5 页

CPP
1,682
字号
 * Compute the linespacing and offset. **********************************************************************/float median_block_xheight(                  //find lines                           TO_BLOCK *block,  //block to do                           float gradient    //global skew                          ) {  TO_ROW *row;                   //current row  float result;                  //output size  float xcentre;                 //centre of blob  TO_ROW_IT row_it = block->get_rows ();  BLOBNBOX_IT blob_it;  BLOBNBOX *blob;                //current blob  float *heights;                //for choose nth  inT32 blob_count;              //blobs in block  inT32 blob_index;              //current blob  blob_count = 0;  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())    blob_count += row_it.data ()->blob_list ()->length ();  heights = (float *) alloc_mem (blob_count * sizeof (float));  if (heights == NULL)    MEMORY_OUT.error ("compute_row_stats", ABORT, NULL);  blob_index = 0;  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {    row = row_it.data ();    blob_it.set_to_list (row->blob_list ());    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();    blob_it.forward ()) {      blob = blob_it.data ();      if (!blob->joined_to_prev ()) {        xcentre =          (blob->bounding_box ().left () +          blob->bounding_box ().right ()) / 2.0f;        heights[blob_index] =          blob->bounding_box ().top () - gradient * xcentre -          row->parallel_c ();        if (heights[blob_index] > 0)          blob_index++;      }    }  }  ASSERT_HOST (blob_index > 0);  //dont expect 0  blob_count = blob_index;  blob_index = choose_nth_item (blob_count / 2, heights, blob_count);  result = heights[blob_index];  free_mem(heights);  return result;}/********************************************************************** * compute_row_xheight * * Estimate the xheight of this row. * Compute the ascender rise and descender drop at the same time. **********************************************************************/inT32 compute_row_xheight(                   //find lines                          TO_ROW *row,       //row to do                          inT32 min_height,  //min xheight                          inT32 max_height,  //max xheight                          float gradient     //global skew                         ) {  BOOL8 in_best_pile;            //control of mode size  inT32 prev_size;               //previous size  float xcentre;                 //centre of blob  float height;                  //height of blob  BLOBNBOX_IT blob_it = row->blob_list ();  BLOBNBOX *blob;                //current blob  inT32 blob_count;              //blobs in block  inT32 x;                       //xheight index  inT32 asc;                     //ascender index  inT32 blob_index;              //current blob  inT32 mode_count;              //no of modes  inT32 best_count;              //count of best x so far  float ratio;                   //size ratio  inT32 modes[MAX_HEIGHT_MODES]; //biggest piles  STATS heights (min_height, max_height + 1);  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {    blob = blob_it.data ();    if (!blob->joined_to_prev ()) {      xcentre =        (blob->bounding_box ().left () +        blob->bounding_box ().right ()) / 2.0f;      height = blob->bounding_box ().top ();      if (textord_fix_xheight_bug)        height -= row->baseline.y (xcentre);      else        height -= gradient * xcentre + row->parallel_c ();      if (height >= min_height && height <= max_height        && (!textord_xheight_tweak || height > textord_min_xheight))        heights.add ((inT32) floor (height + 0.5), 1);    }  }  blob_index = heights.mode ();  //find mode                                 //get count of mode  blob_count = heights.pile_count (blob_index);  if (textord_debug_xheights)    tprintf ("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d,%d\n",      min_height, max_height, blob_index, blob_count,      heights.get_total (), row->blob_list ()->length ());  row->ascrise = 0.0f;  row->xheight = 0.0f;  row->descdrop = 0.0f;          //undefined;  in_best_pile = FALSE;  prev_size = -MAX_INT32;  best_count = 0;  if (blob_count > 0) {                                 //get biggest ones    mode_count = compute_height_modes (&heights, min_height, max_height, modes, MAX_HEIGHT_MODES);    for (x = 0; x < mode_count - 1; x++) {      if (modes[x] != prev_size + 1)        in_best_pile = FALSE;    //had empty height      if (heights.pile_count (modes[x])        >= blob_count * textord_xheight_mode_fraction      && (in_best_pile || heights.pile_count (modes[x]) > best_count)) {        for (asc = x + 1; asc < mode_count; asc++) {          ratio = (float) modes[asc] / modes[x];          if (textord_ascx_ratio_min < ratio            && ratio < textord_ascx_ratio_max            && heights.pile_count (modes[asc])          >= blob_count * textord_ascheight_mode_fraction) {            if (heights.pile_count (modes[x]) > best_count) {              in_best_pile = TRUE;              best_count = heights.pile_count (modes[x]);            }            //                                              tprintf("X=%d, asc=%d, count=%d,  ratio=%g\n",            //                                                      modes[x],modes[asc]-modes[x],            //                                                      heights.pile_count(modes[x]),            //                                                      ratio);            prev_size = modes[x];            row->xheight = (float) modes[x];            row->ascrise = (float) (modes[asc] - modes[x]);          }        }      }    }    if (row->xheight == 0) {                                 //single mode      row->xheight = (float) blob_index;      row->ascrise = 0.0f;      if (textord_debug_xheights)        tprintf ("Single mode xheight set to %g\n", row->xheight);    }    else if (textord_debug_xheights)      tprintf ("Multi-mode xheight set to %g, asc=%g\n",          row->xheight, row->ascrise);    row->descdrop = (float) compute_row_descdrop (row, gradient);    //find descenders  }  return best_count;}/********************************************************************** * compute_row_descdrop * * Estimate the descdrop of this row. **********************************************************************/inT32 compute_row_descdrop(                //find lines                           TO_ROW *row,    //row to do                           float gradient  //global skew                          ) {  inT32 min_height = (inT32) floor (row->xheight * textord_descx_ratio_min);  inT32 max_height = (inT32) floor (row->xheight * textord_descx_ratio_max);  float xcentre;                 //centre of blob  float height;                  //height of blob  BLOBNBOX_IT blob_it = row->blob_list ();  BLOBNBOX *blob;                //current blob  inT32 blob_count;              //blobs in block  inT32 blob_index;              //current blob  STATS heights (min_height, max_height + 1);  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {    blob = blob_it.data ();    if (!blob->joined_to_prev ()) {      xcentre =        (blob->bounding_box ().left () +        blob->bounding_box ().right ()) / 2.0f;      height =        gradient * xcentre + row->parallel_c () -        blob->bounding_box ().bottom ();      if (height >= min_height && height <= max_height)        heights.add ((inT32) floor (height + 0.5), 1);    }  }  blob_index = heights.mode ();  //find mode                                 //get count of mode  blob_count = heights.pile_count (blob_index);  return blob_count > 0 ? -blob_index : 0;}/********************************************************************** * compute_height_modes * * Find the top maxmodes values in the input array and put their * indices in the output in the order in which they occurred. **********************************************************************/inT32 compute_height_modes(                   //find lines                           STATS *heights,    //stats to search                           inT32 min_height,  //bottom of range                           inT32 max_height,  //top of range                           inT32 *modes,      //output array                           inT32 maxmodes     //size of modes                          ) {  inT32 pile_count;              //no in source pile  inT32 src_count;               //no of source entries  inT32 src_index;               //current entry  inT32 least_count;             //height of smalllest  inT32 least_index;             //index of least  inT32 dest_count;              //index in modes  src_count = max_height + 1 - min_height;  dest_count = 0;  least_count = MAX_INT32;  least_index = -1;  for (src_index = 0; src_index < src_count; src_index++) {    pile_count = heights->pile_count (min_height + src_index);    if (pile_count > 0) {      if (dest_count < maxmodes) {        if (pile_count < least_count) {                                 //find smallest in array          least_count = pile_count;          least_index = dest_count;        }        modes[dest_count++] = min_height + src_index;      }      else if (pile_count >= least_count) {        while (least_index < maxmodes - 1) {          modes[least_index] = modes[least_index + 1];          //shuffle up          least_index++;        }                                 //new one on end        modes[maxmodes - 1] = min_height + src_index;        if (pile_count == least_count) {                                 //new smallest          least_index = maxmodes - 1;        }        else {          least_count = heights->pile_count (modes[0]);          least_index = 0;          for (dest_count = 1; dest_count < maxmodes; dest_count++) {            pile_count = heights->pile_count (modes[dest_count]);            if (pile_count < least_count) {                                 //find smallest              least_count = pile_count;              least_index = dest_count;            }          }        }      }    }  }  return dest_count;}/********************************************************************** * correct_row_xheight * * Adjust the xheight etc of this row if not within reasonable limits * of the average for the block. **********************************************************************/void correct_row_xheight(                //fix bad values                         TO_ROW *row,    //row to fix                         float xheight,  //average values                         float ascrise,                         float descdrop) {  if (textord_row_xheights) {    if (row->xheight <= 0)      row->xheight = xheight;    if (row->ascrise < row->xheight * (textord_ascx_ratio_min - 1)) {      if (row->xheight >= xheight * (1 - textord_xheight_error_margin)      && row->xheight <= xheight * (1 + textord_xheight_error_margin)) {        row->all_caps = FALSE;        row->ascrise = ascrise;      }      else if (row->xheight >=        (xheight + ascrise) * (1 - textord_xheight_error_margin)        && row->xheight <=      (xheight + ascrise) * (1 + textord_xheight_error_margin)) {        row->all_caps = TRUE;                                 //it was caps        row->ascrise = row->xheight - xheight;        row->xheight = xheight;      }      else {        row->all_caps = TRUE;        row->ascrise = row->xheight * ascrise / (xheight + ascrise);        row->xheight -= row->ascrise;      }    }    else      row->all_caps = FALSE;    row->ascrise = ascrise;    if (row->descdrop >= -row->xheight * (textord_ascx_ratio_min - 1))      row->descdrop = descdrop;  }  else {    if (row->xheight < xheight * (1 - textord_xheight_error_margin)      || row->xheight > xheight * (1 + textord_xheight_error_margin))      row->xheight = xheight;    //set to average    row->all_caps = row->ascrise <= 0;    if (row->ascrise < ascrise * (1 - textord_xheight_error_margin)      || row->ascrise > ascrise * (1 + textord_xheight_error_margin))      row->ascrise = ascrise;    //set to average    if (row->descdrop < descdrop * (1 - textord_xheight_error_margin)      || row->descdrop > descdrop * (1 + textord_xheight_error_margin))      row->descdrop = descdrop;  //set to average  }}/********************************************************************** * separate_underlines * * Test wide objects for being potential underlines. If they are then * put them in a separate list in the block. **********************************************************************/void separate_underlines(                  //make rough chars                         TO_BLOCK *block,  //block to do                         float gradient,   //skew angle                         FCOORD rotation,  //inverse landscape                         BOOL8 testing_on  //correct orientation                

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?