⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fixspace.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
        for (i = 0, prev_char_punct = FALSE; i < word_len; i++) {          current_char_punct =            punct_chars.contains (word->best_choice->string ()[i]);          if (prev_char_punct || (current_char_punct && (i > 0)))            total_score++;          prev_char_punct = current_char_punct;        }      }      prev_char_digit = digit_or_numeric_punct (word,        word->best_choice->        string ()[word_len - 1]);      prev_char_1 =        ((word_done        && (word->best_choice->string ()[word_len - 1] == '1'))        || (!word_done        && STRING (conflict_set_I_l_1).contains (word->best_choice->        string ()[word_len -        1])));    }    /* Find next word */    do    word_res_it.forward ();    while (word_res_it.data ()->part_of_combo);  }  while (!word_res_it.at_first ());  total_score += prev_word_score;  if (prev_word_done)    done_word_count++;  if (done_word_count == word_count)    return PERFECT_WERDS;  else    return total_score;}BOOL8 digit_or_numeric_punct(WERD_RES *word, char ch) {   return (isdigit (ch) ||    (fixsp_numeric_fix &&    (word->best_choice->permuter () == NUMBER_PERM) &&    STRING (numeric_punctuation).contains (ch)));}/************************************************************************* * transform_to_next_perm() * Examines the current word list to find the smallest word gap size. Then walks * the word list closing any gaps of this size by either inserted new * combination words, or extending existing ones. * * The routine COULD be limited to stop it building words longer than N blobs. * * If there are no more gaps then it DELETES the entire list and returns the * empty list to cause termination. *************************************************************************/void transform_to_next_perm(WERD_RES_LIST &words) {   WERD_RES_IT word_it(&words);   WERD_RES_IT prev_word_it(&words);   WERD_RES *word;  WERD_RES *prev_word;  WERD_RES *combo;  WERD *copy_word;  INT16 prev_right = -1;  BOX box;  INT16 gap;  INT16 min_gap = MAX_INT16;  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {    word = word_it.data ();    if (!word->part_of_combo) {      box = word->word->bounding_box ();      if (prev_right >= 0) {        gap = box.left () - prev_right;        if (gap < min_gap)          min_gap = gap;      }      prev_right = box.right ();    }  }  if (min_gap < MAX_INT16) {    prev_right = -1;             //back to start    word_it.set_to_list (&words);    for (;                       //cant use cycle pt due to inserted combos at start of list    (prev_right < 0) || !word_it.at_first (); word_it.forward ()) {      word = word_it.data ();      if (!word->part_of_combo) {        box = word->word->bounding_box ();        if (prev_right >= 0) {          gap = box.left () - prev_right;          if (gap <= min_gap) {            prev_word = prev_word_it.data ();            if (prev_word->combination)              combo = prev_word;            else {              /* Make a new combination and insert before the first word being joined */              copy_word = new WERD;              *copy_word = *(prev_word->word);              //deep copy              combo = new WERD_RES (copy_word);              combo->combination = TRUE;              prev_word->part_of_combo = TRUE;              prev_word_it.add_before_then_move (combo);            }            combo->word->set_flag (W_EOL, word->word->flag (W_EOL));            if (word->combination) {              combo->word->join_on (word->word);              //Move blbs to combo                                 //old combo no longer needed              delete word_it.extract ();            }            else {                                 //Cpy current wd to combo              combo->copy_on (word);              word->part_of_combo = TRUE;            }            combo->done = FALSE;            if (combo->outword != NULL) {              delete combo->outword;              delete combo->best_choice;              delete combo->raw_choice;              combo->outword = NULL;              combo->best_choice = NULL;              combo->raw_choice = NULL;            }          }          else                                 //catch up              prev_word_it = word_it;        }        prev_right = box.right ();      }    }  }  else    words.clear ();              //signal termination}void dump_words(WERD_RES_LIST &perm, INT16 score, INT16 mode, BOOL8 improved) {   WERD_RES_IT word_res_it(&perm);   static STRING initial_str;  if (debug_fix_space_level > 0) {    if (mode == 1) {      initial_str = "";      for (word_res_it.mark_cycle_pt ();      !word_res_it.cycled_list (); word_res_it.forward ()) {        if (!word_res_it.data ()->part_of_combo) {          initial_str += word_res_it.data ()->best_choice->string ();          initial_str += ' ';        }      }    }    #ifndef SECURE_NAMES    if (debug_fix_space_level > 1) {      switch (mode) {        case 1:          tprintf ("EXTRACTED (%d): \"", score);          break;        case 2:          tprintf ("TESTED (%d): \"", score);          break;        case 3:          tprintf ("RETURNED (%d): \"", score);          break;      }      for (word_res_it.mark_cycle_pt ();      !word_res_it.cycled_list (); word_res_it.forward ()) {        if (!word_res_it.data ()->part_of_combo)          tprintf ("%s/%1d ",            word_res_it.data ()->best_choice->string ().            string (),            (int) word_res_it.data ()->best_choice->permuter ());      }      tprintf ("\"\n");    }    else if (improved) {      tprintf ("FIX SPACING \"%s\" => \"", initial_str.string ());      for (word_res_it.mark_cycle_pt ();      !word_res_it.cycled_list (); word_res_it.forward ()) {        if (!word_res_it.data ()->part_of_combo)          tprintf ("%s/%1d ",            word_res_it.data ()->best_choice->string ().            string (),            (int) word_res_it.data ()->best_choice->permuter ());      }      tprintf ("\"\n");    }    #endif  }}/************************************************************************* * uniformly_spaced() * Return true if one of the following are true: *    - All inter-char gaps are the same width *	- The largest gap is no larger than twice the mean/median of the others *	- The largest gap is < 64/5 = 13 and all others are <= 0 * **** REMEMBER - WE'RE NOW WORKING WITH A BLN WERD !!! *************************************************************************/BOOL8 uniformly_spaced(  //sensible word                       WERD_RES *word) {  PBLOB_IT blob_it;  BOX box;  INT16 prev_right = -MAX_INT16;  INT16 gap;  INT16 max_gap = -MAX_INT16;  INT16 max_gap_count = 0;  STATS gap_stats (0, MAXSPACING);  BOOL8 result;  const ROW *row = word->denorm.row ();  float max_non_space;  float normalised_max_nonspace;  INT16 i = 0;  STRING punct_chars = "\"`',.:;";  blob_it.set_to_list (word->outword->blob_list ());  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {    box = blob_it.data ()->bounding_box ();    if ((prev_right > -MAX_INT16) &&      (!fixsp_ignore_punct ||      (!punct_chars.contains (word->best_choice->string ()[i - 1]) &&    !punct_chars.contains (word->best_choice->string ()[i])))) {      gap = box.left () - prev_right;      if (gap < max_gap)        gap_stats.add (gap, 1);      else if (gap == max_gap)        max_gap_count++;      else {        if (max_gap_count > 0)          gap_stats.add (max_gap, max_gap_count);        max_gap = gap;        max_gap_count = 1;      }    }    prev_right = box.right ();    i++;  }  max_non_space = (row->space () + 3 * row->kern ()) / 4;  normalised_max_nonspace = max_non_space * bln_x_height / row->x_height ();  result = ((gap_stats.get_total () == 0) ||    (max_gap <= normalised_max_nonspace) ||    ((gap_stats.get_total () > 2) &&    (max_gap <= 2 * gap_stats.median ())) ||    ((gap_stats.get_total () <= 2) &&    (max_gap <= 2 * gap_stats.mean ())));  #ifndef SECURE_NAMES  if ((debug_fix_space_level > 1)) {    if (result)      tprintf        ("ACCEPT SPACING FOR: \"%s\" norm_maxnon = %f max=%d maxcount=%d total=%d mean=%f median=%f\n",        word->best_choice->string ().string (), normalised_max_nonspace,        max_gap, max_gap_count, gap_stats.get_total (), gap_stats.mean (),        gap_stats.median ());    else      tprintf        ("REJECT SPACING FOR: \"%s\" norm_maxnon = %f max=%d maxcount=%d total=%d mean=%f median=%f\n",        word->best_choice->string ().string (), normalised_max_nonspace,        max_gap, max_gap_count, gap_stats.get_total (), gap_stats.mean (),        gap_stats.median ());  }  #endif  return result;}BOOL8 fixspace_thinks_word_done(WERD_RES *word) {   if (word->done)    return TRUE;  /*    Use all the standard pass 2 conditions for mode 5 in set_done() in    reject.c BUT DONT REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DONT    CARE WHETHER WE HAVE of/at on/an etc.  */  if ((fixsp_done_mode > 0) &&    (word->tess_accepted ||    ((fixsp_done_mode == 2) &&    (word->reject_map.reject_count () == 0)) ||    (fixsp_done_mode == 3)) &&    (strchr (word->best_choice->string ().string (), ' ') == NULL) &&    ((word->best_choice->permuter () == SYSTEM_DAWG_PERM) ||    (word->best_choice->permuter () == FREQ_DAWG_PERM) ||    (word->best_choice->permuter () == USER_DAWG_PERM) ||    (word->best_choice->permuter () == NUMBER_PERM)))    return TRUE;  else    return FALSE;}/************************************************************************* * fix_sp_fp_word() * Test the current word to see if it can be split by deleting noise blobs. If * so, do the buisiness. * Return with the iterator pointing to the same place if the word is unchanged, * or the last of the replacement words. *************************************************************************/void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row) {   WERD_RES *word_res;  WERD_RES_LIST sub_word_list;  WERD_RES_IT sub_word_list_it(&sub_word_list);   INT16 blob_index;  INT16 new_length;  float junk;  word_res = word_res_it.data ();  if (!fixsp_check_for_fp_noise_space ||    word_res->word->flag (W_REP_CHAR) ||    word_res->combination ||    word_res->part_of_combo || !word_res->word->flag (W_DONT_CHOP))    return;  blob_index = worst_noise_blob (word_res, &junk);  if (blob_index < 0)    return;  #ifndef SECURE_NAMES  if (debug_fix_space_level > 1) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -