⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tospace.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 5 页
字号:
  float kern_estimate;  float crude_threshold_estimate;  INT16 small_gaps_count;  INT16 total;                                 //iterator  BLOBNBOX_IT blob_it = row->blob_list ();  STATS cert_space_gap_stats (0, MAXSPACING);  STATS all_space_gap_stats (0, MAXSPACING);  STATS small_gap_stats (0, MAXSPACING);  BOX blob_box;  BOX prev_blob_box;  INT16 gap_width;  INT32 end_of_row;  INT32 row_length;  kern_estimate = all_gap_stats->median ();  crude_threshold_estimate = MAX (tosp_init_guess_kn_mult * kern_estimate,    tosp_init_guess_xht_mult * row->xheight);  small_gaps_count = stats_count_under (all_gap_stats,    (INT16)    ceil (crude_threshold_estimate));  total = all_gap_stats->get_total ();  if ((total <= tosp_redo_kern_limit) ||    ((small_gaps_count / (float) total) < tosp_enough_small_gaps) ||  (total - small_gaps_count < 1)) {    if (tosp_debug_level > 5)      tprintf ("B:%d R:%d -- Cant do isolated row stats.\n",        block_idx, row_idx);    return FALSE;  }  blob_it.set_to_list (row->blob_list ());  blob_it.mark_cycle_pt ();  end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();  if (tosp_use_pre_chopping)    blob_box = box_next_pre_chopped (&blob_it);  else if (tosp_stats_use_xht_gaps)    blob_box = reduced_box_next (row, &blob_it);  else    blob_box = box_next (&blob_it);  row_length = end_of_row - blob_box.left ();  prev_blob_box = blob_box;  while (!blob_it.cycled_list ()) {    if (tosp_use_pre_chopping)      blob_box = box_next_pre_chopped (&blob_it);    else if (tosp_stats_use_xht_gaps)      blob_box = reduced_box_next (row, &blob_it);    else      blob_box = box_next (&blob_it);    gap_width = blob_box.left () - prev_blob_box.right ();    if (!ignore_big_gap (row, row_length, gapmap,      prev_blob_box.right (), blob_box.left ()) &&    (gap_width > crude_threshold_estimate)) {      if ((gap_width > tosp_fuzzy_space_factor2 * row->xheight) ||        ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) &&        (!tosp_narrow_blobs_not_cert ||        (!narrow_blob (row, prev_blob_box) &&        !narrow_blob (row, blob_box)))) ||        (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box)))        cert_space_gap_stats.add (gap_width, 1);      all_space_gap_stats.add (gap_width, 1);    }    if (gap_width < crude_threshold_estimate)      small_gap_stats.add (gap_width, 1);    prev_blob_box = blob_box;  }  if (cert_space_gap_stats.get_total () >=    tosp_enough_space_samples_for_median)                                 //median    row->space_size = cert_space_gap_stats.median ();  else if (suspected_table && (cert_space_gap_stats.get_total () > 0))                                 //to avoid spaced    row->space_size = cert_space_gap_stats.mean ();  //      1's in tables  else if (all_space_gap_stats.get_total () >=    tosp_enough_space_samples_for_median)                                 //median    row->space_size = all_space_gap_stats.median ();  else    row->space_size = all_space_gap_stats.mean ();  if (tosp_only_small_gaps_for_kern)    row->kern_size = small_gap_stats.median ();  else    row->kern_size = all_gap_stats->median ();  row->space_threshold =    INT32 (floor ((row->space_size + row->kern_size) / 2));  /* Sanity check */  if ((row->kern_size >= row->space_threshold) ||    (row->space_threshold >= row->space_size) ||  (row->space_threshold <= 0)) {    if (tosp_debug_level > 0)      tprintf ("B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n",        block_idx, row_idx,        row->kern_size, row->space_threshold, row->space_size);    row->kern_size = 0.0f;    row->space_threshold = 0;    row->space_size = 0.0f;    return FALSE;  }  if (tosp_debug_level > 5)    tprintf ("B:%d R:%d -- Isolated row stats: %f %d %f\n",      block_idx, row_idx,      row->kern_size, row->space_threshold, row->space_size);  return TRUE;}INT16 stats_count_under(STATS *stats, INT16 threshold) {  INT16 index;  INT16 total = 0;  for (index = 0; index < threshold; index++)    total += stats->pile_count (index);  return total;}/************************************************************************* * improve_row_threshold() *    Try to recognise a "normal line" - *           > 25 gaps *     &&    space > 3 * kn  && space > 10 *              (I.e. reasonably large space and kn:sp ratio) *     &&    > 3/4 # gaps < kn + (sp - kn)/3 *              (I.e. most gaps are well away from space estimate) *     &&    a gap of max( 3, (sp - kn)/3 ) empty histogram positions is found *           somewhere in the histogram between kn and sp *     THEN set the threshold and fuzzy limits to this gap - ie NO fuzzies *          NO!!!!! the bristol line has "11" with a gap of 12 between the 1's!!! *          try moving the default threshold to within this band but leave the *          fuzzy limit calculation as at present. *************************************************************************/void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats) {  float sp = row->space_size;  float kn = row->kern_size;  INT16 reqd_zero_width = 0;  INT16 zero_width = 0;  INT16 zero_start = 0;  INT16 index = 0;  if (tosp_debug_level > 10)    tprintf ("Improve row threshold 0");  if ((all_gap_stats->get_total () <= 25) ||    (sp <= 10) ||    (sp <= 3 * kn) ||    (stats_count_under (all_gap_stats,    (INT16) ceil (kn + (sp - kn) / 3 + 0.5)) <    (0.75 * all_gap_stats->get_total ())))    return;  if (tosp_debug_level > 10)    tprintf (" 1");  /*  Look for the first region of all 0's in the histogram which is wider than  max( 3, (sp - kn)/3 ) and starts between kn and sp. If found, and current  threshold is not within it, move the threshold so that is is just inside it.  */  reqd_zero_width = (INT16) floor ((sp - kn) / 3 + 0.5);  if (reqd_zero_width < 3)    reqd_zero_width = 3;  for (index = INT16 (ceil (kn)); index < INT16 (floor (sp)); index++) {    if (all_gap_stats->pile_count (index) == 0) {      if (zero_width == 0)        zero_start = index;      zero_width++;    }    else {      if (zero_width >= reqd_zero_width)        break;      else {        zero_width = 0;      }    }  }  index--;  if (tosp_debug_level > 10)    tprintf (" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n",      reqd_zero_width, zero_width, zero_start, row->space_threshold);  if ((zero_width < reqd_zero_width) ||    ((row->space_threshold >= zero_start) &&    (row->space_threshold <= index)))    return;  if (tosp_debug_level > 10)    tprintf (" 2");  if (row->space_threshold < zero_start) {    if (tosp_debug_level > 5)      tprintf        ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d  thresh:%d -> %d\n",        kn, sp, zero_start, index, row->space_threshold, zero_start);    row->space_threshold = zero_start;  }  if (row->space_threshold > index) {    if (tosp_debug_level > 5)      tprintf        ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d  thresh:%d -> %d\n",        kn, sp, zero_start, index, row->space_threshold, index);    row->space_threshold = index;  }}/********************************************************************** * make_prop_words * * Convert a TO_BLOCK to a BLOCK. **********************************************************************/ROW *make_prop_words(                 //find lines                     TO_ROW *row,     //row to make                     FCOORD rotation  //for drawing                    ) {  BOOL8 bol;                     //start of line  /* prev_ values are for start of word being built. non prev_ values are for  the gap between the word being built and the next one. */  BOOL8 prev_fuzzy_sp;           //probably space  BOOL8 prev_fuzzy_non;          //probably not  UINT8 prev_blanks;             //in front of word  BOOL8 fuzzy_sp;                //probably space  BOOL8 fuzzy_non;               //probably not  UINT8 blanks;                  //in front of word  ROW *real_row;                 //output row  OUTLINE_IT out_it;             //outlines  C_OUTLINE_IT cout_it;  PBLOB_LIST blobs;              //blobs in word  C_BLOB_LIST cblobs;  PBLOB_IT blob_it = &blobs;     //iterator  C_BLOB_IT cblob_it = &cblobs;  WERD_LIST words;  WERD_IT word_it;               //new words  WERD *word;                    //new word  WERD_IT rep_char_it;           //repeated char words  INT32 next_rep_char_word_right = MAX_INT32;  float repetition_spacing;      //gap between repetitions  INT32 xstarts[2];              //row ends  double coeffs[3];              //quadratic  INT32 prev_x;                  //end of prev blob  BLOBNBOX *bblob;               //current blob  BOX blob_box;                  //bounding box  BLOBNBOX_IT box_it;            //iterator  BOX prev_blob_box;  BOX next_blob_box;  INT16 prev_gap = MAX_INT16;  INT16 current_gap = MAX_INT16;  INT16 next_gap = MAX_INT16;  INT16 prev_within_xht_gap = MAX_INT16;  INT16 current_within_xht_gap = MAX_INT16;  INT16 next_within_xht_gap = MAX_INT16;  INT16 word_count = 0;  static INT16 row_count = 0;  row_count++;  rep_char_it.set_to_list (&(row->rep_words));  if (!rep_char_it.empty ()) {    next_rep_char_word_right =      rep_char_it.data ()->bounding_box ().right ();  }  prev_x = -MAX_INT16;  blob_it.set_to_list (&blobs);  cblob_it.set_to_list (&cblobs);  box_it.set_to_list (row->blob_list ());  word_it.set_to_list (&words);  bol = TRUE;  prev_blanks = 0;  prev_fuzzy_sp = FALSE;  prev_fuzzy_non = FALSE;  if (!box_it.empty ()) {    xstarts[0] = box_it.data ()->bounding_box ().left ();    if (xstarts[0] > next_rep_char_word_right) {      /* We need to insert a repeated char word at the start of the row */      word = rep_char_it.extract ();      word_it.add_after_then_move (word);      /* Set spaces before repeated char word */      word->set_flag (W_BOL, TRUE);      bol = FALSE;      word->set_blanks (0);                                 //NO uncertainty      word->set_flag (W_FUZZY_SP, FALSE);      word->set_flag (W_FUZZY_NON, FALSE);      xstarts[0] = word->bounding_box ().left ();      /* Set spaces after repeated char word (and leave current word set) */      repetition_spacing = find_mean_blob_spacing (word);      current_gap = box_it.data ()->bounding_box ().left () -        next_rep_char_word_right;      current_within_xht_gap = current_gap;      if (current_gap > tosp_rep_space * repetition_spacing) {        prev_blanks = (UINT8) floor (current_gap / row->space_size);        if (prev_blanks < 1)          prev_blanks = 1;      }      else        prev_blanks = 0;      if (tosp_debug_level > 5)        tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f;  Rgap:%d  ",          box_it.data ()->bounding_box ().left (),          box_it.data ()->bounding_box ().bottom (),          repetition_spacing, current_gap);      prev_fuzzy_sp = FALSE;      prev_fuzzy_non = FALSE;      if (rep_char_it.empty ()) {        next_rep_char_word_right = MAX_INT32;      }      else {        rep_char_it.forward ();        next_rep_char_word_right =          rep_char_it.data ()->bounding_box ().right ();      }    }    peek_at_next_gap(row,                     box_it,                     next_blob_box,                     next_gap,                     next_within_xht_gap);    do {      bblob = box_it.data ();      blob_box = bblob->bounding_box ();      if (bblob->joined_to_prev ()) {        if (bblob->blob () != NULL) {          out_it.set_to_list (blob_it.data ()->out_list ());          out_it.move_to_last ();          out_it.add_list_after (bblob->blob ()->out_list ());          delete bblob->blob ();        }        else if (bblob->cblob () != NULL) {          cout_it.set_to_list (cblob_it.data ()->out_list ());          cout_it.move_to_last ();          cout_it.add_list_after (bblob->cblob ()->out_list ());          delete bblob->cblob ();        }      }      else {        if (bblob->blob () != NULL)          blob_it.add_after_then_move (bblob->blob ());        else if (bblob->cblob () != NULL)          cblob_it.add_after_then_move (bblob->cblob ());        prev_x = blob_box.right ();      }      box_it.forward ();         //next one      bblob = box_it.data ();      blob_box = bblob->bounding_box ();      if (!bblob->joined_to_prev () &&      (bblob->blob () != NULL || bblob->cblob () != NULL)) {        /* Real Blob - not multiple outlines or pre-chopped */        prev_gap = current_gap;        prev_within_xht_gap = current_within_xht_gap;        prev_blob_box = next_blob_box;        current_gap = next_gap;        current_within_xht_gap = next_within_xht_gap;        peek_at_next_gap(row,                         box_it,                         next_blob_box,                         next_gap,                         next_within_xht_gap);        if ((blob_box.left () > next_rep_char_word_right) ||          (!tosp_only_use_xht_gaps &&

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -