⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 docqual.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 4 页
字号:
          if (longest_upper_run_len < upper_string_count)            longest_upper_run_len = upper_string_count;          if (last_char == *str) {            alpha_repetition_count++;            if (longest_alpha_repetition_count < alpha_repetition_count) {              longest_alpha_repetition_count = alpha_repetition_count;            }          }          else {            last_char = *str;            alpha_repetition_count = 1;          }          break;        case FIRST_NUM:          isolated_digits++;        default:          state = FIRST_UPPER;          last_char = *str;          alpha_repetition_count = 1;          upper_string_count = 1;          break;      }    }    else if (islower (*str)) {      total_alpha_count++;      switch (state) {        case SUBSEQUENT_LOWER:        case FIRST_LOWER:          state = SUBSEQUENT_LOWER;          lower_string_count++;          if (longest_lower_run_len < lower_string_count)            longest_lower_run_len = lower_string_count;          if (last_char == *str) {            alpha_repetition_count++;            if (longest_alpha_repetition_count < alpha_repetition_count) {              longest_alpha_repetition_count = alpha_repetition_count;            }          }          else {            last_char = *str;            alpha_repetition_count = 1;          }          break;        case FIRST_NUM:          isolated_digits++;        default:          state = FIRST_LOWER;          last_char = *str;          alpha_repetition_count = 1;          lower_string_count = 1;          break;      }    }    else if (isdigit (*str)) {      total_digit_count++;      switch (state) {        case FIRST_NUM:          state = SUBSEQUENT_NUM;        case SUBSEQUENT_NUM:          break;        case FIRST_UPPER:        case FIRST_LOWER:          isolated_alphas++;        default:          state = FIRST_NUM;          break;      }    }    else {      if (*str == ' ')        tess_rejs++;      else        bad_char_count++;      switch (state) {        case FIRST_NUM:          isolated_digits++;          break;        case FIRST_UPPER:        case FIRST_LOWER:          isolated_alphas++;        default:          break;      }      state = JUNK;    }  }  switch (state) {    case FIRST_NUM:      isolated_digits++;      break;    case FIRST_UPPER:    case FIRST_LOWER:      isolated_alphas++;    default:      break;  }  if (crunch_include_numerals) {    total_alpha_count += total_digit_count - isolated_digits;  }  if (crunch_leave_ok_strings &&    (len >= 4) &&    (2 * (total_alpha_count - isolated_alphas) > len) &&  (longest_alpha_repetition_count < crunch_long_repetitions)) {    if ((crunch_accept_ok &&      (acceptable_word_string (str) != AC_UNACCEPTABLE)) ||      (longest_lower_run_len > crunch_leave_lc_strings) ||      (longest_upper_run_len > crunch_leave_uc_strings))      return G_NEVER_CRUNCH;  }  if ((word->reject_map.length () > 1) &&    (strpbrk (str, " ") == NULL) &&    ((word->best_choice->permuter () == SYSTEM_DAWG_PERM) ||    (word->best_choice->permuter () == FREQ_DAWG_PERM) ||    (word->best_choice->permuter () == USER_DAWG_PERM) ||    (word->best_choice->permuter () == NUMBER_PERM) ||    (acceptable_word_string (str) != AC_UNACCEPTABLE) || ok_dict_word))    return G_OK;  ok_chars = len - bad_char_count - isolated_digits -    isolated_alphas - tess_rejs;  if (crunch_debug > 3) {    tprintf ("garbage_word: \"%s\"\n",      word->best_choice->string ().string ());    tprintf ("LEN: %d  bad: %d  iso_N: %d  iso_A: %d  rej: %d\n",      len,      bad_char_count, isolated_digits, isolated_alphas, tess_rejs);  }  if ((bad_char_count == 0) &&    (tess_rejs == 0) &&    ((len > isolated_digits + isolated_alphas) || (len <= 2)))    return G_OK;  if ((tess_rejs > ok_chars) ||    ((tess_rejs > 0) && ((bad_char_count + tess_rejs) * 2 > len)))    return G_TERRIBLE;  if (len > 4) {    dodgy_chars = 2 * tess_rejs + bad_char_count +      isolated_digits + isolated_alphas;    if ((dodgy_chars > 5) || ((dodgy_chars / (float) len) > 0.5))      return G_DODGY;    else      return G_OK;  }  else {    dodgy_chars = 2 * tess_rejs + bad_char_count;    if (((len == 4) && (dodgy_chars > 2)) ||      ((len == 3) && (dodgy_chars > 2)) || (dodgy_chars >= len))      return G_DODGY;    else      return G_OK;  }}/************************************************************************* * word_deletable() *     DELETE WERDS AT ENDS OF ROWS IF *        Word is crunched && *        ( string length = 0                                          OR *          > 50% of chars are "|" (before merging)							OR *          certainty < -10                                            OR *          rating /char > 60                                          OR *          TOP of word is more than 0.5 xht BELOW baseline            OR *          BOTTOM of word is more than 0.5 xht ABOVE xht              OR *          length of word < 3xht                                      OR *          height of word < 0.7 xht                                   OR *          height of word > 3.0 xht                                   OR *          >75% of the outline BBs have longest dimension < 0.5xht *************************************************************************/CRUNCH_MODE word_deletable(WERD_RES *word, INT16 &delete_mode) {   int word_len = word->reject_map.length ();  float rating_per_ch;  BOX box;                       //BB of word  if (word->unlv_crunch_mode == CR_NONE) {    delete_mode = 0;    return CR_NONE;  }  if (word_len == 0) {    delete_mode = 1;    return CR_DELETE;  }  box = word->outword->bounding_box ();  if (box.height () < crunch_del_min_ht * bln_x_height) {    delete_mode = 4;    return CR_DELETE;  }  if (noise_outlines (word->outword)) {    delete_mode = 5;    return CR_DELETE;  }  if ((failure_count (word) * 1.5) > word_len) {    delete_mode = 2;    return CR_LOOSE_SPACE;  }  if (word->best_choice->certainty () < crunch_del_cert) {    delete_mode = 7;    return CR_LOOSE_SPACE;  }  rating_per_ch = word->best_choice->rating () / word_len;  if (rating_per_ch > crunch_del_rating) {    delete_mode = 8;    return CR_LOOSE_SPACE;  }  if (box.top () < bln_baseline_offset - crunch_del_low_word * bln_x_height) {    delete_mode = 9;    return CR_LOOSE_SPACE;  }  if (box.bottom () >  bln_baseline_offset + crunch_del_high_word * bln_x_height) {    delete_mode = 10;    return CR_LOOSE_SPACE;  }  if (box.height () > crunch_del_max_ht * bln_x_height) {    delete_mode = 11;    return CR_LOOSE_SPACE;  }  if (box.width () < crunch_del_min_width * bln_x_height) {    delete_mode = 3;    return CR_LOOSE_SPACE;  }  delete_mode = 0;  return CR_NONE;}INT16 failure_count(WERD_RES *word) {   char *str = (char *) word->best_choice->string ().string ();  int tess_rejs = 0;  for (; *str != '\0'; str++) {    if (*str == ' ')      tess_rejs++;  }  return tess_rejs;}BOOL8 noise_outlines(WERD *word) {   PBLOB_IT blob_it;  OUTLINE_IT outline_it;  BOX box;                       //BB of outline  INT16 outline_count = 0;  INT16 small_outline_count = 0;  INT16 max_dimension;  float small_limit = bln_x_height * crunch_small_outlines_size;  blob_it.set_to_list (word->blob_list ());  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {    outline_it.set_to_list (blob_it.data ()->out_list ());    for (outline_it.mark_cycle_pt ();    !outline_it.cycled_list (); outline_it.forward ()) {      outline_count++;      box = outline_it.data ()->bounding_box ();      if (box.height () > box.width ())        max_dimension = box.height ();      else        max_dimension = box.width ();      if (max_dimension < small_limit)        small_outline_count++;    }  }  return (small_outline_count >= outline_count);}/************************************************************************* * insert_rej_cblobs() * Put rejected word blobs back into the outword. * NOTE!!! AFTER THIS THE CHOICES LIST WILL NOT HAVE THE CORRECT NUMBER * OF ELEMENTS. *************************************************************************/void insert_rej_cblobs(  //word to do                       WERD_RES *word) {  PBLOB_IT blob_it;              //blob iterator  PBLOB_IT rej_blob_it;  const STRING *wordstr;  int old_len;  int rej_len;  char new_str[512];  REJMAP new_map;  int i = 0;                     //new_str index  int j = 0;                     //old_str index  int new_len;  gblob_sort_list (word->outword->rej_blob_list (), TRUE);  rej_blob_it.set_to_list (word->outword->rej_blob_list ());  if (rej_blob_it.empty ())    return;  rej_len = rej_blob_it.length ();  blob_it.set_to_list (word->outword->blob_list ());  wordstr = &(word->best_choice->string ());  old_len = wordstr->length ();  ASSERT_HOST (word->reject_map.length () == old_len);  ASSERT_HOST (blob_it.length () == old_len);  if ((old_len + rej_len) > 511)    return;                      //Word is garbage anyway prevent abort  new_map.initialise (old_len + rej_len);  while (!rej_blob_it.empty ()) {    if ((j >= old_len) ||      (rej_blob_it.data ()->bounding_box ().left () <=    blob_it.data ()->bounding_box ().left ())) {      /* Insert reject blob */      if (j >= old_len)        blob_it.add_to_end (rej_blob_it.extract ());      else        blob_it.add_before_stay_put (rej_blob_it.extract ());      if (!rej_blob_it.empty ())        rej_blob_it.forward ();      new_str[i] = ' ';      new_map[i].setrej_rej_cblob ();      i++;    }    else {      new_str[i] = (*wordstr)[j];      new_map[i] = word->reject_map[j];      i++;      j++;      blob_it.forward ();    }  }  /* Add any extra normal blobs to strings */  while (j < wordstr->length ()) {    new_str[i] = (*wordstr)[j];    new_map[i] = word->reject_map[j];    i++;    j++;  }  new_str[i] = '\0';  /*    tprintf(          "\nOld len %d; New len %d; New str \"%s\"; New map \"%s\"\n",          old_len, i, new_str, new_map );  */  ASSERT_HOST (i == blob_it.length ());  ASSERT_HOST (i == old_len + rej_len);  word->reject_map = new_map;  *((STRING *) wordstr) = new_str;  new_len = strlen (word->best_choice->string ().string ());  ASSERT_HOST (word->reject_map.length () == new_len);  ASSERT_HOST (word->outword->blob_list ()->length () == new_len);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -