📄 fixspace.cpp

📁 一ＯＣＲ的相关资料。．希望对研究ＯＣＲ的朋友有所帮助．
💻 CPP
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
    tprintf ("FP fixspace working on \"%s\"\n",      word_res->best_choice->string ().string ());  }  #endif  gblob_sort_list ((PBLOB_LIST *) word_res->word->rej_cblob_list (), FALSE);  sub_word_list_it.add_after_stay_put (word_res_it.extract ());  fix_noisy_space_list(sub_word_list, row);   new_length = sub_word_list.length ();  word_res_it.add_list_before (&sub_word_list);  for (; (!word_res_it.at_last () && (new_length > 1)); new_length--) {    word_res_it.forward ();  }}void fix_noisy_space_list(WERD_RES_LIST &best_perm, ROW *row) {   INT16 best_score;  WERD_RES_IT best_perm_it(&best_perm);   WERD_RES_LIST current_perm;  WERD_RES_IT current_perm_it(&current_perm);   WERD_RES *old_word_res;  WERD_RES *new_word_res;  INT16 current_score;  BOOL8 improved = FALSE;                                 //default score  best_score = fp_eval_word_spacing (best_perm);  dump_words (best_perm, best_score, 1, improved);  new_word_res = new WERD_RES;  old_word_res = best_perm_it.data ();                                 //Kludge to force deep copy  old_word_res->combination = TRUE;  *new_word_res = *old_word_res; //deep copy                                 //Undo kludge  old_word_res->combination = FALSE;                                 //Undo kludge  new_word_res->combination = FALSE;  current_perm_it.add_to_end (new_word_res);  break_noisiest_blob_word(current_perm);   while ((best_score != PERFECT_WERDS) && !current_perm.empty ()) {    match_current_words(current_perm, row);     current_score = fp_eval_word_spacing (current_perm);    dump_words (current_perm, current_score, 2, improved);    if (current_score > best_score) {      best_perm.clear ();      best_perm.deep_copy (&current_perm);      best_score = current_score;      improved = TRUE;    }    if (current_score < PERFECT_WERDS)      break_noisiest_blob_word(current_perm);   }  dump_words (best_perm, best_score, 3, improved);}/************************************************************************* * break_noisiest_blob_word() * Find the word with the blob which looks like the worst noise. * Break the word into two, deleting the noise blob. *************************************************************************/void break_noisiest_blob_word(WERD_RES_LIST &words) {   WERD_RES_IT word_it(&words);   WERD_RES_IT worst_word_it;  float worst_noise_score = 9999;  int worst_blob_index = -1;     //noisiest blb of noisiest wd  int blob_index;                //of wds noisiest blb  float noise_score;             //of wds noisiest blb  WERD_RES *word_res;  C_BLOB_IT blob_it;  C_BLOB_IT rej_cblob_it;  C_BLOB_LIST new_blob_list;  C_BLOB_IT new_blob_it;  C_BLOB_IT new_rej_cblob_it;  WERD *new_word;  INT16 start_of_noise_blob;  INT16 i;  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {    blob_index = worst_noise_blob (word_it.data (), &noise_score);    if ((blob_index > -1) && (worst_noise_score > noise_score)) {      worst_noise_score = noise_score;      worst_blob_index = blob_index;      worst_word_it = word_it;    }  }  if (worst_blob_index < 0) {    words.clear ();              //signal termination    return;  }  /* Now split the worst_word_it */  word_res = worst_word_it.data ();  /* Move blobs before noise blob to a new bloblist */  new_blob_it.set_to_list (&new_blob_list);  blob_it.set_to_list (word_res->word->cblob_list ());  for (i = 0; i < worst_blob_index; i++, blob_it.forward ()) {    new_blob_it.add_after_then_move (blob_it.extract ());  }  start_of_noise_blob = blob_it.data ()->bounding_box ().left ();  delete blob_it.extract ();     //throw out noise blb  new_word = new WERD (&new_blob_list, word_res->word);  new_word->set_flag (W_EOL, FALSE);  word_res->word->set_flag (W_BOL, FALSE);  word_res->word->set_blanks (1);//After break  new_rej_cblob_it.set_to_list (new_word->rej_cblob_list ());  rej_cblob_it.set_to_list (word_res->word->rej_cblob_list ());  for (;    (!rej_cblob_it.empty () &&    (rej_cblob_it.data ()->bounding_box ().left () <  start_of_noise_blob)); rej_cblob_it.forward ()) {    new_rej_cblob_it.add_after_then_move (rej_cblob_it.extract ());  }  worst_word_it.add_before_then_move (new WERD_RES (new_word));  word_res->done = FALSE;  if (word_res->outword != NULL) {    delete word_res->outword;    delete word_res->best_choice;    delete word_res->raw_choice;    word_res->outword = NULL;    word_res->best_choice = NULL;    word_res->raw_choice = NULL;  }}INT16 worst_noise_blob(WERD_RES *word_res, float *worst_noise_score) {   PBLOB_IT blob_it;  INT16 blob_count;  float noise_score[512];  int i;  int min_noise_blob;            //1st contender  int max_noise_blob;            //last contender  int non_noise_count;  int worst_noise_blob;          //Worst blob  float small_limit = bln_x_height * fixsp_small_outlines_size;  float non_noise_limit = bln_x_height * 0.8;  blob_it.set_to_list (word_res->outword->blob_list ());  //normalised  blob_count = blob_it.length ();  ASSERT_HOST (blob_count <= 512);  if (blob_count < 5)    return -1;                   //too short to split  /* Get the noise scores for all blobs */  #ifndef SECURE_NAMES  if (debug_fix_space_level > 5)    tprintf ("FP fixspace Noise metrics for \"%s\": ",      word_res->best_choice->string ().string ());  #endif  for (i = 0; i < blob_count; i++, blob_it.forward ()) {    if (word_res->reject_map[i].accepted ())      noise_score[i] = non_noise_limit;    else      noise_score[i] = blob_noise_score (blob_it.data ());    if (debug_fix_space_level > 5)      tprintf ("%1.1f ", noise_score[i]);  }  if (debug_fix_space_level > 5)    tprintf ("\n");  /* Now find the worst one which is far enough away from the end of the word */  non_noise_count = 0;  for (i = 0;  (i < blob_count) && (non_noise_count < fixsp_non_noise_limit); i++) {    if (noise_score[i] >= non_noise_limit)      non_noise_count++;  }  if (non_noise_count < fixsp_non_noise_limit)    return -1;  min_noise_blob = i;  non_noise_count = 0;  for (i = blob_count - 1;  (i >= 0) && (non_noise_count < fixsp_non_noise_limit); i--) {    if (noise_score[i] >= non_noise_limit)      non_noise_count++;  }  if (non_noise_count < fixsp_non_noise_limit)    return -1;  max_noise_blob = i;  if (min_noise_blob > max_noise_blob)    return -1;  *worst_noise_score = small_limit;  worst_noise_blob = -1;  for (i = min_noise_blob; i <= max_noise_blob; i++) {    if (noise_score[i] < *worst_noise_score) {      worst_noise_blob = i;      *worst_noise_score = noise_score[i];    }  }  return worst_noise_blob;}float blob_noise_score(PBLOB *blob) {   OUTLINE_IT outline_it;  BOX box;                       //BB of outline  INT16 outline_count = 0;  INT16 max_dimension;  INT16 largest_outline_dimension = 0;  outline_it.set_to_list (blob->out_list ());  for (outline_it.mark_cycle_pt ();  !outline_it.cycled_list (); outline_it.forward ()) {    outline_count++;    box = outline_it.data ()->bounding_box ();    if (box.height () > box.width ())      max_dimension = box.height ();    else      max_dimension = box.width ();    if (largest_outline_dimension < max_dimension)      largest_outline_dimension = max_dimension;  }  if (fixsp_noise_score_fixing) {    if (outline_count > 5)                                 //penalise LOTS of blobs      largest_outline_dimension *= 2;    box = blob->bounding_box ();    if ((box.bottom () > bln_baseline_offset * 4) ||      (box.top () < bln_baseline_offset / 2))                                 //Lax blob is if high or low      largest_outline_dimension /= 2;  }  return largest_outline_dimension;}void fixspace_dbg(WERD_RES *word) {   BOX box = word->word->bounding_box ();  BOOL8 show_map_detail = FALSE;  INT16 i;  box.print ();  #ifndef SECURE_NAMES  tprintf (" \"%s\" ", word->best_choice->string ().string ());  tprintf ("Blob count: %d (word); %d/%d (outword)\n",    word->word->gblob_list ()->length (),    word->outword->gblob_list ()->length (),    word->outword->rej_blob_list ()->length ());  word->reject_map.print (debug_fp);  tprintf ("\n");  if (show_map_detail) {    tprintf ("\"%s\"\n", word->best_choice->string ().string ());    for (i = 0; word->best_choice->string ()[i] != '\0'; i++) {      tprintf ("**** \"%c\" ****\n", word->best_choice->string ()[i]);      word->reject_map[i].full_print (debug_fp);    }  }  tprintf ("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");  tprintf ("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");  #endif}/************************************************************************* * fp_eval_word_spacing() * Evaluation function for fixed pitch word lists. * * Basically, count the number of "nice" characters - those which are in tess * acceptable words or in dict words and are not rejected. * Penalise any potential noise chars *************************************************************************/INT16 fp_eval_word_spacing(WERD_RES_LIST &word_res_list) {   WERD_RES_IT word_it(&word_res_list);   WERD_RES *word;  PBLOB_IT blob_it;  INT16 word_length;  INT16 score = 0;  INT16 i;  const char *chs;  float small_limit = bln_x_height * fixsp_small_outlines_size;  if (!fixsp_fp_eval)    return (eval_word_spacing (word_res_list));  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {    word = word_it.data ();    word_length = word->reject_map.length ();    chs = word->best_choice->string ().string ();    if ((word->done ||      word->tess_accepted) ||      (word->best_choice->permuter () == SYSTEM_DAWG_PERM) ||      (word->best_choice->permuter () == FREQ_DAWG_PERM) ||      (word->best_choice->permuter () == USER_DAWG_PERM) ||    (safe_dict_word (chs) > 0)) {      blob_it.set_to_list (word->outword->blob_list ());      for (i = 0; i < word_length; i++, blob_it.forward ()) {        if ((chs[i] == ' ') ||          (blob_noise_score (blob_it.data ()) < small_limit))          score -= 1;            //penalise possibly erroneous non-space        else if (word->reject_map[i].accepted ())          score++;      }    }  }  if (score < 0)    score = 0;  return score;}
上一页 1 23
💿 文件大小 2763 K
👤 上传用户 danlong
📂 所属分类其他书籍
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -