control.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 1,843 行 · 第 1/5 页

CPP
1,843
字号
  /* Pass 2 */  page_res_it.restart_page ();  word_index = 0;  while (!tessedit_test_adaption && page_res_it.word () != NULL) {    set_global_loc_code(LOC_PASS2);    word_index++;    if (monitor != NULL) {      monitor->ocr_alive = TRUE;      monitor->progress = 80 + 10 * word_index / word_count;      if ((monitor->end_time != 0 && clock() > monitor->end_time) ||          (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,                                                         dict_words)))        return;    }//changed by jetsoft//specific to its needs to extract one word when need	if (target_word_box)	{		TBOX current_word_box=page_res_it.word ()->word->bounding_box();		FCOORD center_pt((current_word_box.right()+current_word_box.left())/2,(current_word_box.bottom()+current_word_box.top())/2);		if (!target_word_box->contains(center_pt))		{			page_res_it.forward ();			continue;		}	}//end jetsoft    classify_word_pass2 (page_res_it.word (), page_res_it.row ()->row);    if (tessedit_em_adaption_mode > 0)      collect_ems_for_adaption (page_res_it.word (),        &em_clusters, &ems_waiting);    if (tessedit_cluster_adapt_after_pass2      && tessedit_cluster_adaption_mode != 0)      collect_characters_for_adaption (page_res_it.word (),        &char_clusters, &chars_waiting);    page_res_it.forward ();  }  /* Another pass */  set_global_loc_code(LOC_FUZZY_SPACE);  if (!tessedit_test_adaption && tessedit_fix_fuzzy_spaces    && !tessedit_word_for_word)    fix_fuzzy_spaces(monitor, word_count, page_res);  if (!tessedit_test_adaption && tessedit_em_adaption_mode != 0)                                 // Initially ems only    print_em_stats(&em_clusters, &ems_waiting);  /* Pass 3 - used for checking confusion sets */  page_res_it.restart_page ();  word_index = 0;  while (!tessedit_test_adaption && page_res_it.word () != NULL) {    set_global_loc_code(LOC_MM_ADAPT);    word_index++;    if (monitor != NULL) {      monitor->ocr_alive = TRUE;      monitor->progress = 95 + 5 * word_index / word_count;    }    check_debug_pt (page_res_it.word (), 70);    /* Use good matches to sort out confusions *///changed by jetsoft//specific to its needs to extract one word when need	if (target_word_box)	{		TBOX current_word_box=page_res_it.word ()->word->bounding_box();		FCOORD center_pt((current_word_box.right()+current_word_box.left())/2,(current_word_box.bottom()+current_word_box.top())/2);		if (!target_word_box->contains(center_pt))		{			page_res_it.forward ();			continue;		}	}// end jetsoft    if (tessedit_em_adaption_mode != 0)      adapt_to_good_ems (page_res_it.word (), &em_clusters, &ems_waiting);    if (tessedit_cluster_adapt_after_pass2      && tessedit_cluster_adaption_mode != 0)      adapt_to_good_samples (page_res_it.word (),        &char_clusters, &chars_waiting);    if (tessedit_reject_fullstops      && strchr (page_res_it.word ()->best_choice->string ().string (),      '.') != NULL)      reject_all_fullstops (page_res_it.word ());    else if (tessedit_reject_suspect_fullstops      && strchr (page_res_it.word ()->best_choice->string ().      string (), '.') != NULL)      reject_suspect_fullstops (page_res_it.word ());    page_res_it.rej_stat_word ();    chars_in_word = page_res_it.word ()->reject_map.length ();    rejects_in_word = page_res_it.word ()->reject_map.reject_count ();    blob_quality = word_blob_quality (page_res_it.word (),      page_res_it.row ()->row);    doc_blob_quality += blob_quality;    outline_errs = word_outline_errs (page_res_it.word ());    doc_outline_errs += outline_errs;    word_char_quality (page_res_it.word (),      page_res_it.row ()->row,      &all_char_quality, &accepted_all_char_quality);    doc_char_quality += all_char_quality;    uinT8 permuter_type = page_res_it.word ()->best_choice->permuter ();    if ((permuter_type == SYSTEM_DAWG_PERM) ||      (permuter_type == FREQ_DAWG_PERM) ||    (permuter_type == USER_DAWG_PERM)) {      good_char_count += chars_in_word - rejects_in_word;      doc_good_char_quality += accepted_all_char_quality;    }    check_debug_pt (page_res_it.word (), 80);    if (tessedit_reject_bad_qual_wds &&      (blob_quality == 0) && (outline_errs >= chars_in_word))      page_res_it.word ()->reject_map.rej_word_bad_quality ();    check_debug_pt (page_res_it.word (), 90);    page_res_it.forward ();  }  page_res_it.restart_page ();  while (!tessedit_test_adaption  && tessedit_cluster_adapt_after_pass3 && page_res_it.word () != NULL) {    if (monitor != NULL)      monitor->ocr_alive = TRUE;//changed by jetsoft//specific to its needs to extract one word when need	if (target_word_box)	{		TBOX current_word_box=page_res_it.word ()->word->bounding_box();		FCOORD center_pt((current_word_box.right()+current_word_box.left())/2,(current_word_box.bottom()+current_word_box.top())/2);		if (!target_word_box->contains(center_pt))		{			page_res_it.forward ();			continue;		}	}//end jetsoft    if (tessedit_cluster_adaption_mode != 0)      adapt_to_good_samples (page_res_it.word (),        &char_clusters, &chars_waiting);    page_res_it.forward ();  }  #ifndef SECURE_NAMES  if (tessedit_debug_quality_metrics) {    tprintf      ("QUALITY: num_chs= %d  num_rejs= %d %5.3f blob_qual= %d %5.3f outline_errs= %d %5.3f char_qual= %d %5.3f good_ch_qual= %d %5.3f\n",      page_res->char_count, page_res->rej_count,      page_res->rej_count / (float) page_res->char_count, doc_blob_quality,      doc_blob_quality / (float) page_res->char_count, doc_outline_errs,      doc_outline_errs / (float) page_res->char_count, doc_char_quality,      doc_char_quality / (float) page_res->char_count,      doc_good_char_quality,      good_char_count >      0 ? doc_good_char_quality / (float) good_char_count : 0.0);  }  #endif  BOOL8 good_quality_doc =    (page_res->rej_count / (float) page_res->char_count <= quality_rej_pc)    &&    (doc_blob_quality / (float) page_res->char_count >= quality_blob_pc) &&    (doc_outline_errs / (float) page_res->char_count <= quality_outline_pc) &&    (doc_char_quality / (float) page_res->char_count >= quality_char_pc);  /* Do whole document or whole block rejection pass*/  if (!tessedit_test_adaption) {    set_global_loc_code(LOC_DOC_BLK_REJ);    quality_based_rejection(page_res_it, good_quality_doc);  }  font_recognition_pass(page_res_it);  /* Write results pass */  set_global_loc_code(LOC_WRITE_RESULTS);  // This is now redundant, but retained commented so show how to obtain  // bounding boxes and style information.////changed by jetsoft//needed for dll to output memory structure  if ((dopasses == 0 || dopasses == 2) && (monitor || tessedit_write_unlv))	output_pass(page_res_it, ocr_char_space() > 0, target_word_box);// end jetsoft}/********************************************************************** * classify_word_pass1 * * Baseline normalize the word and pass it to Tess. **********************************************************************/void classify_word_pass1(                 //recog one word                         WERD_RES *word,  //word to do                         ROW *row,                         BOOL8 cluster_adapt,                         CHAR_SAMPLES_LIST *char_clusters,                         CHAR_SAMPLE_LIST *chars_waiting) {  WERD *bln_word;                //baseline norm copy                                 //detailed results  BLOB_CHOICE_LIST_CLIST local_blob_choices;  BLOB_CHOICE_LIST_CLIST *blob_choices;  BOOL8 adapt_ok;  const char *rejmap;  inT16 index;  STRING mapstr = "";  char *match_string;  char word_string[1024];  if (save_best_choices)    blob_choices = new BLOB_CHOICE_LIST_CLIST();  else    blob_choices = &local_blob_choices;  if (matcher_fp != NULL) {    fgets (word_string, 1023, correct_fp);    if ((match_string = strchr (word_string, '\r')) != NULL)      *match_string = '\0';    if ((match_string = strchr (word_string, '\n')) != NULL)      *match_string = '\0';    if (word_string[0] != '\0') {      word->word->set_text (word_string);      word_answer = (char *) word->word->text ();    }    else      word_answer = NULL;  }  check_debug_pt (word, 0);  matcher_pass = 0;  bln_word = make_bln_copy (word->word, row, word->x_height, &word->denorm);  word->best_choice = tess_segment_pass1 (bln_word, &word->denorm,    tess_default_matcher,    word->raw_choice, blob_choices,    word->outword);  /*     Test for TESS screw up on word. Recog_word has already ensured that the     choice list, outword blob lists and best_choice string are the same     length. A TESS screw up is indicated by a blank filled or 0 length string.   */  if ((word->best_choice->lengths ().length () == 0) ||    (strspn (word->best_choice->string ().string (), " ") ==  word->best_choice->string ().length ())) {    word->done = FALSE;          //Try again on pass2 - adaption may help    word->tess_failed = TRUE;    word->reject_map.initialise (word->best_choice->lengths ().length ());    word->reject_map.rej_word_tess_failure ();  }  else {    word->tess_failed = FALSE;    if ((word->best_choice->lengths ().length () !=      word->outword->blob_list ()->length ()) ||    (word->best_choice->lengths ().length () != blob_choices->length ())) {      tprintf        ("ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",        word->best_choice->string ().string (),        word->best_choice->lengths ().length (),        word->outword->blob_list ()->length (), blob_choices->length ());    }    ASSERT_HOST (word->best_choice->lengths ().length () ==      word->outword->blob_list ()->length ());    ASSERT_HOST (word->best_choice->lengths ().length () ==      blob_choices->length ());    /*       The adaption step used to be here. It has been moved to after       make_reject_map so that we know whether the word will be accepted in the       first pass or not.   This move will PREVENT adaption to words containing       double quotes because the word will not be identical to what tess thinks       its best choice is. (See CurrentBestChoiceIs in       danj/microfeatures/stopper.c which is used by AdaptableWord in       danj/microfeatures/adaptmatch.c)     */    if (word->word->flag (W_REP_CHAR)) {      fix_rep_char(word);    }    else {      fix_quotes (word->best_choice,      //turn to double        word->outword, blob_choices);      if (tessedit_fix_hyphens)                                 //turn 2 to 1        fix_hyphens (word->best_choice, word->outword, blob_choices);      record_certainty (word->best_choice->certainty (), 1);      //accounting      word->tess_accepted = tess_acceptable_word (word->best_choice,        word->raw_choice);      word->tess_would_adapt = tess_adaptable_word (word->outword,        word->best_choice,        word->raw_choice);                                 // Also sets word->done flag      make_reject_map (word, blob_choices, row, 1);      adapt_ok = word_adaptable (word, tessedit_tess_adaption_mode);      if (cluster_adapt)        adapt_to_good_samples(word, char_clusters, chars_waiting);      if (adapt_ok || tessedit_tess_adapt_to_rejmap) {        if (!tessedit_tess_adapt_to_rejmap)          rejmap = NULL;        else {          ASSERT_HOST (word->reject_map.length () ==            word->best_choice->lengths ().length ());          for (index = 0; index < word->reject_map.length (); index++) {            if (adapt_ok || word->reject_map[index].accepted ())              mapstr += '1';            else              mapstr += '0';          }          rejmap = mapstr.string ();        }                                 //adapt to it        tess_adapter (word->outword, &word->denorm,                      *word->best_choice,                      *word->raw_choice, rejmap);      }      if (tessedit_enable_doc_dict)        tess_add_doc_word (word->best_choice);      set_word_fonts(word, blob_choices);    }  }#if 0  if (tessedit_print_text) {    write_cooked_text (bln_word, word->best_choice->string (),      word->done, FALSE, stdout);  }#endif  delete bln_word;  // Save best choices in the WERD_CHOICE if needed  if (blob_choices != &local_blob_choices)    word->best_choice->set_blob_choices(blob_choices);  else    blob_choices->deep_clear();}/********************************************************************** * classify_word_pass2 * * Control what to do with the word in pass 2 **********************************************************************/void classify_word_pass2(  //word to do

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?