makerow.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 1,682 行 · 第 1/5 页

CPP
1,682
字号
  //get real membership  if (block->get_rows ()->length () == 0)    return;  fit_parallel_rows(block,                    gradient,                    rotation,                    block_edge,                    textord_show_expanded_rows &&testing_on);  if (!textord_new_initial_xheight)    compute_row_stats(block, textord_show_expanded_rows &&testing_on);  row_it.move_to_last ();  do {    row = row_it.data ();    y_max = row->max_y ();       //get current limits    y_min = row->min_y ();    y_bottom = row->intercept () - block->line_size * textord_expansion_factor *      textord_merge_desc;    y_top = row->intercept () + block->line_size * textord_expansion_factor *      (textord_merge_x + textord_merge_asc);    if (y_min > y_bottom) {      //expansion allowed      if (textord_show_expanded_rows && testing_on)        tprintf("Expanding bottom of row at %f from %f to %f\n",                row->intercept(), y_min, y_bottom);                                 //expandable      swallowed_row = TRUE;      while (swallowed_row && !row_it.at_last ()) {        swallowed_row = FALSE;                                 //get next one        test_row = row_it.data_relative (1);                                 //overlaps space        if (test_row->max_y () > y_bottom) {          if (test_row->min_y () > y_bottom) {            if (textord_show_expanded_rows && testing_on)              tprintf("Eating row below at %f\n", test_row->intercept());            row_it.forward ();#ifndef GRAPHICS_DISABLED            if (textord_show_expanded_rows && testing_on)              plot_parallel_row(test_row,                                gradient,                                block_edge,                                ScrollView::WHITE,                                rotation);#endif            blob_it.set_to_list (row->blob_list ());            blob_it.add_list_after (test_row->blob_list ());                                 //swallow complete row            delete row_it.extract ();            row_it.backward ();            swallowed_row = TRUE;          }          else if (test_row->max_y () < y_min) {                                 //shorter limit            y_bottom = test_row->max_y ();            if (textord_show_expanded_rows && testing_on)              tprintf("Truncating limit to %f due to touching row at %f\n",                      y_bottom, test_row->intercept());          }          else {            y_bottom = y_min;    //can't expand it            if (textord_show_expanded_rows && testing_on)              tprintf("Not expanding limit beyond %f due to touching row at %f\n",                      y_bottom, test_row->intercept());          }        }      }      y_min = y_bottom;          //expand it    }    if (y_max < y_top) {         //expansion allowed      if (textord_show_expanded_rows && testing_on)        tprintf("Expanding top of row at %f from %f to %f\n",                row->intercept(), y_max, y_top);      swallowed_row = TRUE;      while (swallowed_row && !row_it.at_first ()) {        swallowed_row = FALSE;                                 //get one above        test_row = row_it.data_relative (-1);        if (test_row->min_y () < y_top) {          if (test_row->max_y () < y_top) {            if (textord_show_expanded_rows && testing_on)              tprintf("Eating row above at %f\n", test_row->intercept());            row_it.backward ();            blob_it.set_to_list (row->blob_list ());#ifndef GRAPHICS_DISABLED            if (textord_show_expanded_rows && testing_on)              plot_parallel_row(test_row,                                gradient,                                block_edge,                                ScrollView::WHITE,                                rotation);#endif            blob_it.add_list_after (test_row->blob_list ());                                 //swallow complete row            delete row_it.extract ();            row_it.forward ();            swallowed_row = TRUE;          }          else if (test_row->min_y () < y_max) {                                 //shorter limit            y_top = test_row->min_y ();            if (textord_show_expanded_rows && testing_on)              tprintf("Truncating limit to %f due to touching row at %f\n",                      y_top, test_row->intercept());          }          else {            y_top = y_max;       //can't expand it            if (textord_show_expanded_rows && testing_on)              tprintf("Not expanding limit beyond %f due to touching row at %f\n",                      y_top, test_row->intercept());          }        }      }      y_max = y_top;    }                                 //new limits    row->set_limits (y_min, y_max);    row_it.backward ();  }  while (!row_it.at_last ());}/********************************************************************** * adjust_row_limits * * Change the limits of rows to suit the default fractions. **********************************************************************/void adjust_row_limits(                 //tidy limits                       TO_BLOCK *block  //block to do                      ) {  TO_ROW *row;                   //current row  float size;                    //size of row  float ymax;                    //top of row  float ymin;                    //bottom of row  TO_ROW_IT row_it = block->get_rows ();  if (textord_show_expanded_rows)    tprintf("Adjusting row limits for block(%d,%d)\n",            block->block->bounding_box().left(),            block->block->bounding_box().top());  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {    row = row_it.data ();    size = row->max_y () - row->min_y ();    if (textord_show_expanded_rows)      tprintf("Row at %f has min %f, max %f, size %f\n",              row->intercept(), row->min_y(), row->max_y(), size);    size /= textord_merge_x + textord_merge_asc + textord_merge_desc;    ymax = size * (textord_merge_x + textord_merge_asc);    ymin = -size * textord_merge_desc;    row->set_limits (row->intercept () + ymin, row->intercept () + ymax);    row->merged = FALSE;  }}/********************************************************************** * compute_row_stats * * Compute the linespacing and offset. **********************************************************************/void compute_row_stats(                  //find lines                       TO_BLOCK *block,  //block to do                       BOOL8 testing_on  //correct orientation                      ) {  inT32 row_index;               //of median  TO_ROW *row;                   //current row  TO_ROW *prev_row;              //previous row  float iqr;                     //inter quartile range  TO_ROW_IT row_it = block->get_rows ();                                 //number of rows  inT16 rowcount = row_it.length ();  TO_ROW **rows;                 //for choose nth  rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));  if (rows == NULL)    MEMORY_OUT.error ("compute_row_stats", ABORT, NULL);  rowcount = 0;  prev_row = NULL;  row_it.move_to_last ();        //start at bottom  do {    row = row_it.data ();    if (prev_row != NULL) {      rows[rowcount++] = prev_row;      prev_row->spacing = row->intercept () - prev_row->intercept ();      if (testing_on)        tprintf ("Row at %g yields spacing of %g\n",          row->intercept (), prev_row->spacing);    }    prev_row = row;    row_it.backward ();  }  while (!row_it.at_last ());  block->key_row = prev_row;  block->baseline_offset =    fmod (prev_row->parallel_c (), block->line_spacing);  if (testing_on)    tprintf ("Blob based spacing=(%g,%g), offset=%g",      block->line_size, block->line_spacing, block->baseline_offset);  if (rowcount > 0) {    row_index = choose_nth_item (rowcount * 3 / 4, rows, rowcount,      sizeof (TO_ROW *), row_spacing_order);    iqr = rows[row_index]->spacing;    row_index = choose_nth_item (rowcount / 4, rows, rowcount,      sizeof (TO_ROW *), row_spacing_order);    iqr -= rows[row_index]->spacing;    row_index = choose_nth_item (rowcount / 2, rows, rowcount,      sizeof (TO_ROW *), row_spacing_order);    block->key_row = rows[row_index];    if (testing_on)      tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr);    if (rowcount > 2    && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {      if (!textord_new_initial_xheight) {        if (rows[row_index]->spacing < block->line_spacing          && rows[row_index]->spacing > block->line_size)          //within range          block->line_size = rows[row_index]->spacing;        //spacing=size        else if (rows[row_index]->spacing > block->line_spacing)          block->line_size = block->line_spacing;        //too big so use max      }      else {        if (rows[row_index]->spacing < block->line_spacing)          block->line_size = rows[row_index]->spacing;        else          block->line_size = block->line_spacing;        //too big so use max      }      if (block->line_size < textord_min_xheight)        block->line_size = (float) textord_min_xheight;      block->line_spacing = rows[row_index]->spacing;      block->max_blob_size =        block->line_spacing * textord_excess_blobsize;    }    block->baseline_offset = fmod (rows[row_index]->intercept (),      block->line_spacing);  }  if (testing_on)    tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n",      block->line_size, block->line_spacing, block->baseline_offset);  free_mem(rows);}/********************************************************************** * compute_block_xheight * * Compute the xheight of the individual rows, then correlate them * and interpret ascenderless lines, correcting xheights. **********************************************************************/void compute_block_xheight(                  //find lines                           TO_BLOCK *block,  //block to do                           float gradient    //global skew                          ) {  TO_ROW *row;                   //current row  int xh_count, desc_count;      //no of samples  float block_median;            //median blob size  int asc_count, cap_count;  inT32 min_size, max_size;      //limits on xheight  inT32 evidence;                //no of samples on row  float xh_sum, desc_sum;        //for averages  float asc_sum, cap_sum;  TO_ROW_IT row_it = block->get_rows ();  STATS row_heights;             //block evidence  if (row_it.empty ())    return;                      //no rows  block_median = median_block_xheight (block, gradient);  block_median *= 2;  if (block_median < block->line_size)    block_median = block->line_size;  //      tprintf("Block median=%g, linesize=%g\n",  //              block_median,block->line_size);  max_size = (inT32) ceil (block_median);  min_size = (inT32) floor (block_median * textord_minxh);  row_heights.set_range (min_size, max_size + 1);  xh_count = desc_count = asc_count = cap_count = 0;  xh_sum = desc_sum = asc_sum = cap_sum = 0.0f;  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {    row = row_it.data ();    evidence = compute_row_xheight (row, min_size, max_size, gradient);    if (row->xheight > 0 && row->ascrise > 0) {      row_heights.add ((inT32) row->xheight, evidence);      xh_count += evidence;      asc_sum += row->ascrise;      asc_count++;    }    else if (row->xheight > 0) {      cap_sum += row->xheight;   //assume just caps      cap_count++;    }    if (row->descdrop != 0) {      desc_sum += row->descdrop;      desc_count++;    }  }  if (xh_count > 0) {                                 //median    xh_sum = row_heights.ile (0.5);    asc_sum /= asc_count;  }  else if (cap_count > 0) {    cap_sum /= cap_count;        //must assume caps    xh_sum =      cap_sum * textord_merge_x / (textord_merge_x + textord_merge_asc);    asc_sum =      cap_sum * textord_merge_asc / (textord_merge_x + textord_merge_asc);  }  else {                                 //default sizes    xh_sum = block_median * textord_merge_x;    asc_sum = block_median * textord_merge_asc;  }  if (desc_count > 0) {    desc_sum /= desc_count;  }  else {    desc_sum = xh_sum * textord_merge_desc / textord_merge_x;  }  //      tprintf("Block average x height=%g, count=%d, asc=%g/%d, desc=%g/%d,cap=%g/%d\n",  //              xh_sum,xh_count,asc_sum,asc_count,desc_sum,desc_count,  //              cap_sum,cap_count);  if (xh_sum < textord_min_xheight)    xh_sum = (float) textord_min_xheight;  block->xheight = xh_sum;  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {    correct_row_xheight (row_it.data (), xh_sum, asc_sum, desc_sum);  }}/********************************************************************** * median_block_xheight *

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?