⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tospace.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 5 页
字号:
                                 //iterator  BLOBNBOX_IT blob_it = row->blob_list ();  STATS all_gap_stats (0, MAXSPACING);  STATS cert_space_gap_stats (0, MAXSPACING);  STATS all_space_gap_stats (0, MAXSPACING);  STATS small_gap_stats (0, MAXSPACING);  BOX blob_box;  BOX prev_blob_box;  INT16 gap_width;  INT16 real_space_threshold = 0;  INT16 max = 0;  INT16 index;  INT16 large_gap_count = 0;  BOOL8 suspected_table;  INT32 max_max_nonspace;        //upper bound  BOOL8 good_block_space_estimate = block_space_gap_width > 0;  INT32 end_of_row;  INT32 row_length = 0;  float sane_space;  INT32 sane_threshold;  /* Collect first pass stats for row */  if (!good_block_space_estimate)    block_space_gap_width = INT16 (floor (row->xheight / 2));  if (!row->blob_list ()->empty ()) {    if (tosp_threshold_bias1 > 0)      real_space_threshold =        block_non_space_gap_width +        INT16 (floor (0.5 +        tosp_threshold_bias1 * (block_space_gap_width -        block_non_space_gap_width)));    else      real_space_threshold =     //Old TO method        (block_space_gap_width + block_non_space_gap_width) / 2;    blob_it.set_to_list (row->blob_list ());    blob_it.mark_cycle_pt ();    end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();    if (tosp_use_pre_chopping)      blob_box = box_next_pre_chopped (&blob_it);    else if (tosp_stats_use_xht_gaps)      blob_box = reduced_box_next (row, &blob_it);    else      blob_box = box_next (&blob_it);    row_length = end_of_row - blob_box.left ();    prev_blob_box = blob_box;    while (!blob_it.cycled_list ()) {      if (tosp_use_pre_chopping)        blob_box = box_next_pre_chopped (&blob_it);      else if (tosp_stats_use_xht_gaps)        blob_box = reduced_box_next (row, &blob_it);      else        blob_box = box_next (&blob_it);      gap_width = blob_box.left () - prev_blob_box.right ();      if (ignore_big_gap (row, row_length, gapmap,        prev_blob_box.right (), blob_box.left ()))        large_gap_count++;      else {        if (gap_width >= real_space_threshold) {          if (!tosp_row_use_cert_spaces ||            (gap_width > tosp_fuzzy_space_factor2 * row->xheight) ||            ((gap_width > tosp_fuzzy_space_factor1 * row->xheight)            && (!tosp_narrow_blobs_not_cert            || (!narrow_blob (row, prev_blob_box)            && !narrow_blob (row, blob_box))))            || (wide_blob (row, prev_blob_box)            && wide_blob (row, blob_box)))            cert_space_gap_stats.add (gap_width, 1);          all_space_gap_stats.add (gap_width, 1);        }        else          small_gap_stats.add (gap_width, 1);        all_gap_stats.add (gap_width, 1);      }      prev_blob_box = blob_box;    }  }  suspected_table = (large_gap_count > 1) ||    ((large_gap_count > 0) &&    (all_gap_stats.get_total () <= tosp_few_samples));  /* Now determine row kern size, space size and threshold */  if ((cert_space_gap_stats.get_total () >=    tosp_enough_space_samples_for_median) ||    ((suspected_table ||    all_gap_stats.get_total () <= tosp_short_row) &&    cert_space_gap_stats.get_total () > 0))    old_to_method(row,                  &all_gap_stats,                  &cert_space_gap_stats,                  &small_gap_stats,                  block_space_gap_width,                  block_non_space_gap_width);  else {    if (!tosp_recovery_isolated_row_stats ||      !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table,    block_idx, row_idx)) {      if (tosp_row_use_cert_spaces && (tosp_debug_level > 5))        tprintf ("B:%d R:%d -- Inadequate certain spaces.\n",          block_idx, row_idx);      if (tosp_row_use_cert_spaces1 && good_block_space_estimate) {                                 //Use block default        row->space_size = block_space_gap_width;        if (all_gap_stats.get_total () > tosp_redo_kern_limit)          row->kern_size = all_gap_stats.median ();        else          row->kern_size = block_non_space_gap_width;        row->space_threshold =          INT32 (floor ((row->space_size + row->kern_size) / 2));      }      else        old_to_method(row,                      &all_gap_stats,                      &all_space_gap_stats,                      &small_gap_stats,                      block_space_gap_width,                      block_non_space_gap_width);    }  }  if (tosp_improve_thresh && !suspected_table)    improve_row_threshold(row, &all_gap_stats);  /* Now lets try to be careful not to do anything silly with tables when we  are ignoring big gaps*/  if (tosp_sanity_method == 0) {    if (suspected_table &&    (row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) {      if (tosp_debug_level > 0)        tprintf ("B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f.\n",          block_idx, row_idx,          row->kern_size, row->space_threshold, row->space_size);      row->space_threshold =        (INT32) (tosp_table_kn_sp_ratio * row->kern_size);      row->space_size = MAX (row->space_threshold + 1, row->xheight);    }  }  else if (tosp_sanity_method == 1) {    sane_space = row->space_size;    /* NEVER let space size get too close to kern size */    if ((row->space_size < tosp_min_sane_kn_sp * MAX (row->kern_size, 2.5))      || ((row->space_size - row->kern_size) <    (tosp_silly_kn_sp_gap * row->xheight))) {      if (good_block_space_estimate &&        (block_space_gap_width >= tosp_min_sane_kn_sp * row->kern_size))        sane_space = block_space_gap_width;      else        sane_space =          MAX (tosp_min_sane_kn_sp * MAX (row->kern_size, 2.5),          row->xheight / 2);      if (tosp_debug_level > 0)        tprintf          ("B:%d R:%d -- DONT BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n",          block_idx, row_idx, row->kern_size, row->space_threshold,          row->space_size, sane_space);      row->space_size = sane_space;      row->space_threshold =        INT32 (floor ((row->space_size + row->kern_size) / 2));    }    /* NEVER let threshold get VERY far away from kern */    sane_threshold = INT32 (floor (tosp_max_sane_kn_thresh *      MAX (row->kern_size, 2.5)));    if (row->space_threshold > sane_threshold) {      if (tosp_debug_level > 0)        tprintf ("B:%d R:%d -- DONT BELIEVE THRESH %3.2f %d %3.2f->%d.\n",          block_idx, row_idx,          row->kern_size,          row->space_threshold, row->space_size, sane_threshold);      row->space_threshold = sane_threshold;      if (row->space_size <= sane_threshold)        row->space_size = row->space_threshold + 1.0f;    }    /* Beware of tables - there may be NO spaces */    if (suspected_table) {      sane_space = MAX (tosp_table_kn_sp_ratio * row->kern_size,        tosp_table_xht_sp_ratio * row->xheight);      sane_threshold = INT32 (floor ((sane_space + row->kern_size) / 2));      if ((row->space_size < sane_space) ||      (row->space_threshold < sane_threshold)) {        if (tosp_debug_level > 0)          tprintf ("B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n",            block_idx, row_idx,            row->kern_size,            row->space_threshold, row->space_size);                                 //the minimum sane value        row->space_threshold = (INT32) sane_space;        row->space_size = MAX (row->space_threshold + 1, row->xheight);      }    }  }  /* Now lets try to put some error limits on the threshold */  if (tosp_old_to_method) {    /* Old textord made a space if gap >= threshold */                                 //NO FUZZY SPACES YET    row->max_nonspace = row->space_threshold;                                 //NO FUZZY SPACES       YET    row->min_space = row->space_threshold + 1;  }  else {    /* Any gap greater than 0.6 x-ht is bound to be a space (isn't it:-) */    row->min_space =      MIN (INT32 (ceil (tosp_fuzzy_space_factor * row->xheight)),      INT32 (row->space_size));    if (row->min_space <= row->space_threshold)                                 //Dont be silly      row->min_space = row->space_threshold + 1;    /*    Lets try to guess the max certain kern gap by looking at the cluster of    kerns for the row. The row is proportional so the kerns should cluster    tightly at the bottom of the distribution. We also expect most gaps to be    kerns. Find the maximum of the kern piles between 0 and twice the kern    estimate. Piles before the first one with less than 1/10 the maximum    number of samples can be taken as certain kerns.      Of course, there are some cases where the kern peak and space peaks merge,      so we will put an UPPER limit on the max certain kern gap of some fraction      below the threshold.    */    max_max_nonspace = INT32 ((row->space_threshold + row->kern_size) / 2);                                 //default    row->max_nonspace = max_max_nonspace;    for (index = 0; index <= max_max_nonspace; index++) {      if (all_gap_stats.pile_count (index) > max)        max = all_gap_stats.pile_count (index);      if ((index > row->kern_size) &&      (all_gap_stats.pile_count (index) < 0.1 * max)) {        row->max_nonspace = index;        break;      }    }  }  /* Yet another algorithm - simpler this time - just choose a fraction of the  threshold to space range */  if ((tosp_fuzzy_sp_fraction > 0) &&    (row->space_size > row->space_threshold))    row->min_space = MAX (row->min_space,      (INT32) ceil (row->space_threshold +      tosp_fuzzy_sp_fraction *      (row->space_size -      row->space_threshold)));  /* Ensure that ANY space less than some multiplier times the kern size is  fuzzy.  In tables there is a risk of erroneously setting a small space size  when there are no real spaces. Sometimes tables have text squashed into  columns so that the kn->sp ratio is small anyway - this means that we cant  use this to force a wider separation - hence we rely on context to join any  dubious breaks. */  if ((tosp_table_fuzzy_kn_sp_ratio > 0) &&    (suspected_table || tosp_fuzzy_limit_all))    row->min_space = MAX (row->min_space,      (INT32) ceil (tosp_table_fuzzy_kn_sp_ratio *      row->kern_size));  if ((tosp_fuzzy_kn_fraction > 0) && (row->kern_size < row->space_threshold))    row->max_nonspace = (INT32) floor (0.5 + row->kern_size +      tosp_fuzzy_kn_fraction *      (row->space_threshold -      row->kern_size));  if (row->max_nonspace > row->space_threshold)                                 //Dont be silly    row->max_nonspace = row->space_threshold;  if (tosp_debug_level > 5)    tprintf      ("B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) Sp:%3.2f\n",      block_idx, row_idx, row_length, block_non_space_gap_width,      block_space_gap_width, real_space_threshold, row->kern_size,      row->max_nonspace, row->space_threshold, row->min_space,      row->space_size);}void old_to_method(                                 //estimate for block                   TO_ROW *row,                   STATS *all_gap_stats,                   STATS *space_gap_stats,                   STATS *small_gap_stats,                   INT16 block_space_gap_width,                   INT16 block_non_space_gap_width  //estimate for block                  ) {  /* Old to condition was > 2 */  if (space_gap_stats->get_total () >= tosp_enough_space_samples_for_median) {  //Adequate samples    /* Set space size to median of spaces BUT limits it if it seems wildly out */    row->space_size = space_gap_stats->median ();    if (row->space_size > block_space_gap_width * 1.5) {      if (tosp_old_to_bug_fix)        row->space_size = block_space_gap_width * 1.5;      else                                 //BUG??? should be *1.5        row->space_size = block_space_gap_width;    }    if (row->space_size < (block_non_space_gap_width * 2) + 1)      row->space_size = (block_non_space_gap_width * 2) + 1;  }                                 //Only 1 or 2 samples  else if (space_gap_stats->get_total () >= 1) {                                 //hence mean not median    row->space_size = space_gap_stats->mean ();    if (row->space_size > block_space_gap_width * 1.5) {      if (tosp_old_to_bug_fix)        row->space_size = block_space_gap_width * 1.5;      else                                 //BUG??? should be *1.5        row->space_size = block_space_gap_width;    }    if (row->space_size < (block_non_space_gap_width * 3) + 1)      row->space_size = (block_non_space_gap_width * 3) + 1;  }  else                                 //Use block default    row->space_size = block_space_gap_width;  if ((tosp_only_small_gaps_for_kern) &&    (small_gap_stats->get_total () > tosp_redo_kern_limit))    row->kern_size = small_gap_stats->median ();  else if (all_gap_stats->get_total () > tosp_redo_kern_limit)    row->kern_size = all_gap_stats->median ();  else                                 //old TO -SAME FOR ALL ROWS    row->kern_size = block_non_space_gap_width;  if (tosp_threshold_bias2 > 0)    row->space_threshold =      INT32 (floor (0.5 + row->kern_size +      tosp_threshold_bias2 * (row->space_size -      row->kern_size)));  else    /*      NOTE old text ord uses (space_size + kern_size + 1)/2  as the threshold    and holds this in a float. The use is with a >= test    NEW textord uses an integer threshold and a > test    It comes to the same thing.      (Though there is a difference in that old textor has integer space_size      and kern_size.)    */    row->space_threshold =      INT32 (floor ((row->space_size + row->kern_size) / 2));}/************************************************************************* * isolated_row_stats() * Set values for min_space, max_non_space based on row stats only *************************************************************************/BOOL8 isolated_row_stats(TO_ROW *row,                         GAPMAP *gapmap,                         STATS *all_gap_stats,                         BOOL8 suspected_table,                         INT16 block_idx,                         INT16 row_idx) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -