⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 control.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 4 页
字号:
      reject_suspect_fullstops (page_res_it.word ());    page_res_it.rej_stat_word ();    chars_in_word = page_res_it.word ()->reject_map.length ();    rejects_in_word = page_res_it.word ()->reject_map.reject_count ();    blob_quality = word_blob_quality (page_res_it.word (),      page_res_it.row ()->row);    doc_blob_quality += blob_quality;    outline_errs = word_outline_errs (page_res_it.word ());    doc_outline_errs += outline_errs;    word_char_quality (page_res_it.word (),      page_res_it.row ()->row,      &all_char_quality, &accepted_all_char_quality);    doc_char_quality += all_char_quality;    permuter_type = page_res_it.word ()->best_choice->permuter ();    if ((permuter_type == SYSTEM_DAWG_PERM) ||      (permuter_type == FREQ_DAWG_PERM) ||    (permuter_type == USER_DAWG_PERM)) {      good_char_count += chars_in_word - rejects_in_word;      doc_good_char_quality += accepted_all_char_quality;    }    check_debug_pt (page_res_it.word (), 80);    if (tessedit_reject_bad_qual_wds &&      (blob_quality == 0) && (outline_errs >= chars_in_word))      page_res_it.word ()->reject_map.rej_word_bad_quality ();    check_debug_pt (page_res_it.word (), 90);    page_res_it.forward ();  }  page_res_it.restart_page ();  while (!tessedit_test_adaption  && tessedit_cluster_adapt_after_pass3 && page_res_it.word () != NULL) {    if (monitor != NULL)      monitor->ocr_alive = TRUE;    if (tessedit_cluster_adaption_mode != 0)      adapt_to_good_samples (page_res_it.word (),        &char_clusters, &chars_waiting);    page_res_it.forward ();  }  #ifndef SECURE_NAMES  if (tessedit_debug_quality_metrics) {    tprintf      ("QUALITY: num_chs= %d  num_rejs= %d %5.3f blob_qual= %d %5.3f outline_errs= %d %5.3f char_qual= %d %5.3f good_ch_qual= %d %5.3f\n",      page_res->char_count, page_res->rej_count,      page_res->rej_count / (float) page_res->char_count, doc_blob_quality,      doc_blob_quality / (float) page_res->char_count, doc_outline_errs,      doc_outline_errs / (float) page_res->char_count, doc_char_quality,      doc_char_quality / (float) page_res->char_count,      doc_good_char_quality,      good_char_count >      0 ? doc_good_char_quality / (float) good_char_count : 0.0);  }  #endif  good_quality_doc =    (page_res->rej_count / (float) page_res->char_count <= quality_rej_pc)    &&    (doc_blob_quality / (float) page_res->char_count >= quality_blob_pc) &&    (doc_outline_errs / (float) page_res->char_count <= quality_outline_pc) &&    (doc_char_quality / (float) page_res->char_count >= quality_char_pc);  /* Do whole document or whole block rejection pass*/  if (!tessedit_test_adaption) {    set_global_loc_code(LOC_DOC_BLK_REJ);    quality_based_rejection(page_res_it, good_quality_doc);  }  font_recognition_pass(page_res_it);  /* Write results pass */  set_global_loc_code(LOC_WRITE_RESULTS);  // This is now redundant, but retained commented so show how to obtain  // bounding boxes and style information.  // output_pass (page_res_it, false);}/********************************************************************** * classify_word_pass1 * * Baseline normalize the word and pass it to Tess. **********************************************************************/void classify_word_pass1(                 //recog one word                         WERD_RES *word,  //word to do                         ROW *row,                         BOOL8 cluster_adapt,                         CHAR_SAMPLES_LIST *char_clusters,                         CHAR_SAMPLE_LIST *chars_waiting) {  WERD *bln_word;                //baseline norm copy                                 //detailed results  BLOB_CHOICE_LIST_CLIST blob_choices;  BOOL8 adapt_ok;  const char *rejmap;  INT16 index;  STRING mapstr = "";  char *match_string;  char word_string[1024];  if (matcher_fp != NULL) {    fgets (word_string, 1023, correct_fp);    if ((match_string = strchr (word_string, '\r')) != NULL)      *match_string = '\0';    if ((match_string = strchr (word_string, '\n')) != NULL)      *match_string = '\0';    if (word_string[0] != '\0') {      word->word->set_text (word_string);      word_answer = (char *) word->word->text ();    }    else      word_answer = NULL;  }  check_debug_pt (word, 0);  matcher_pass = 0;  bln_word = make_bln_copy (word->word, row, row->x_height (), &word->denorm);  word->best_choice = tess_segment_pass1 (bln_word, &word->denorm,    tess_default_matcher,    word->raw_choice, &blob_choices,    word->outword);  /*     Test for TESS screw up on word. Recog_word has already ensured that the     choice list, outword blob lists and best_choice string are the same     length. A TESS screw up is indicated by a blank filled or 0 length string.   */  if ((word->best_choice->string ().length () == 0) ||    (strspn (word->best_choice->string ().string (), " ") ==  word->best_choice->string ().length ())) {    word->done = FALSE;          //Try again on pass2 - adaption may help    word->tess_failed = TRUE;    word->reject_map.initialise (word->best_choice->string ().length ());    word->reject_map.rej_word_tess_failure ();  }  else {    word->tess_failed = FALSE;    if ((word->best_choice->string ().length () !=      word->outword->blob_list ()->length ()) ||    (word->best_choice->string ().length () != blob_choices.length ())) {      tprintf        ("ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",        word->best_choice->string ().string (),        word->best_choice->string ().length (),        word->outword->blob_list ()->length (), blob_choices.length ());    }    ASSERT_HOST (word->best_choice->string ().length () ==      word->outword->blob_list ()->length ());    ASSERT_HOST (word->best_choice->string ().length () ==      blob_choices.length ());    /*       The adaption step used to be here. It has been moved to after       make_reject_map so that we know whether the word will be accepted in the       first pass or not.   This move will PREVENT adaption to words containing       double quotes because the word will not be identical to what tess thinks       its best choice is. (See CurrentBestChoiceIs in       danj/microfeatures/stopper.c which is used by AdaptableWord in       danj/microfeatures/adaptmatch.c)     */    if (word->word->flag (W_REP_CHAR)) {      fix_rep_char(word);    }    else {      fix_quotes ((char *) word->best_choice->string ().string (),      //turn to double        word->outword, &blob_choices);      if (tessedit_fix_hyphens)                                 //turn 2 to 1        fix_hyphens ((char *) word->best_choice->string ().string (), word->outword, &blob_choices);      record_certainty (word->best_choice->certainty (), 1);      //accounting      word->tess_accepted = tess_acceptable_word (word->best_choice,        word->raw_choice);      word->tess_would_adapt = tess_adaptable_word (word->outword,        word->best_choice,        word->raw_choice);                                 // Also sets word->done flag      make_reject_map (word, &blob_choices, row, 1);      adapt_ok = word_adaptable (word, tessedit_tess_adaption_mode);      if (cluster_adapt)        adapt_to_good_samples(word, char_clusters, chars_waiting);      if (adapt_ok || tessedit_tess_adapt_to_rejmap) {        if (!tessedit_tess_adapt_to_rejmap)          rejmap = NULL;        else {          ASSERT_HOST (word->reject_map.length () ==            word->best_choice->string ().length ());          for (index = 0; index < word->reject_map.length (); index++) {            if (adapt_ok || word->reject_map[index].accepted ())              mapstr += '1';            else              mapstr += '0';          }          rejmap = mapstr.string ();        }                                 //adapt to it        tess_adapter (word->outword, &word->denorm, word->best_choice->string ().string (), word->raw_choice->string ().string (), rejmap);      }      if (tessedit_enable_doc_dict)        tess_add_doc_word (word->best_choice);      set_word_fonts(word, &blob_choices);    }  }  if (tessedit_print_text) {    write_cooked_text (bln_word, word->best_choice->string (),      word->done, FALSE, stdout);  }  delete bln_word;  blob_choices.deep_clear ();}/********************************************************************** * classify_word_pass2 * * Control what to do with the word in pass 2 **********************************************************************/void classify_word_pass2(  //word to do                         WERD_RES *word,                         ROW *row) {  BOOL8 done_this_pass = FALSE;  WERD_RES new_x_ht_word (word->word);  float new_x_ht = 0.0;  INT16 old_xht_reject_count;  INT16 new_xht_reject_count;  INT16 old_xht_accept_count;  INT16 new_xht_accept_count;  BOOL8 accept_new_x_ht = FALSE;  INT16 old_chs_in_wd;  INT16 new_chs_in_wd;  INT16 old_word_quality;  INT16 new_word_quality;  INT16 dummy;  set_global_subloc_code(SUBLOC_NORM);  check_debug_pt (word, 30);  if (!word->done ||    tessedit_training_tess ||  tessedit_training_wiseowl || tessedit_dump_choices) {    word->x_height = row->x_height ();    word->caps_height = 0.0;    if (word->outword != NULL) {      delete word->outword;      //get rid of junk      delete word->best_choice;      delete word->raw_choice;    }    match_word_pass2 (word, row, row->x_height ());    done_this_pass = TRUE;    check_debug_pt (word, 40);  }  if (!word->tess_failed && !word->word->flag (W_REP_CHAR)) {    set_global_subloc_code(SUBLOC_FIX_XHT);    if ((tessedit_xht_fiddles_on_done_wds || !word->done) &&      (tessedit_xht_fiddles_on_no_rej_wds ||    (word->reject_map.reject_count () > 0))) {      if ((x_ht_check_word_occ >= 2) && word_occ_first)        check_block_occ(word);      if (tessedit_redo_xheight)        re_estimate_x_ht(word, &new_x_ht);      if (((x_ht_check_word_occ >= 2) && !word_occ_first) ||        ((x_ht_check_word_occ >= 1) && (new_x_ht > 0)))        check_block_occ(word);    }    if (new_x_ht > 0) {      old_chs_in_wd = word->reject_map.length ();      /* Re-estimated x_ht error suggests a rematch is worthwhile. */      new_x_ht_word.x_height = new_x_ht;      new_x_ht_word.caps_height = 0.0;      match_word_pass2 (&new_x_ht_word, row, new_x_ht_word.x_height);      if (!new_x_ht_word.tess_failed) {        if ((x_ht_check_word_occ >= 1) && word_occ_first)          check_block_occ(&new_x_ht_word);        re_estimate_x_ht(&new_x_ht_word, &new_x_ht);        if ((x_ht_check_word_occ >= 1) && !word_occ_first)          check_block_occ(&new_x_ht_word);        old_xht_reject_count = word->reject_map.reject_count ();        old_xht_accept_count = old_chs_in_wd - old_xht_reject_count;        new_xht_reject_count = new_x_ht_word.reject_map.reject_count ();        new_chs_in_wd = new_x_ht_word.reject_map.length ();        new_xht_accept_count = new_chs_in_wd - new_xht_reject_count;        accept_new_x_ht =          ((new_xht_accept_count > old_xht_accept_count) ||          ((new_xht_accept_count == old_xht_accept_count) &&          (new_xht_accept_count > 0))) &&          (!new_x_ht_word.guessed_x_ht ||          !new_x_ht_word.guessed_caps_ht);        if (accept_new_x_ht && x_ht_quality_check) {          word_char_quality(word, row, &old_word_quality, &dummy);          word_char_quality(&new_x_ht_word, row, &new_word_quality, &dummy);          if (old_word_quality > new_word_quality)            accept_new_x_ht = FALSE;        }        if (accept_new_x_ht && (x_ht_stringency > 0)) {          accept_new_x_ht =            (count_alphanums (&new_x_ht_word) > x_ht_stringency);          if (!accept_new_x_ht && rej_use_xht) {            if (debug_x_ht_level >= 1)              tprintf                ("Failed stringency test so reject original word\n");            word->reject_map.rej_word_xht_fixup ();          }        }        #ifndef SECURE_NAMES        if (debug_x_ht_level >= 1) {          tprintf ("New XHT Match:: %s ",            word->best_choice->string ().string ());          word->reject_map.print (debug_fp);          tprintf (" -> %s ",            new_x_ht_word.best_choice->string ().string ());          new_x_ht_word.reject_map.print (debug_fp);          tprintf (" %s->%s %s %s\n",            word->guessed_x_ht ? "GUESS" : "CERT",            new_x_ht_word.guessed_x_ht ? "GUESS" : "CERT",            new_x_ht > 0.1 ? "STILL DOUBT" : "OK",            accept_new_x_ht ? "ACCEPTED" : "");        }        #endif      }      if (accept_new_x_ht) {        /*           The new x_ht is deemed superior so put the final results in the real word           and destroy the old results         */        delete word->outword;    //get rid of junk        word->outword = new_x_ht_word.outword;        word->denorm = new_x_ht_word.denorm;        delete word->best_choice;        word->best_choice = new_x_ht_word.best_choice;        delete word->raw_choice;        word->raw_choice = new_x_ht_word.raw_choice;        word->reject_map = new_x_ht_word.reject_map;        word->done = new_x_ht_word.done;        done_this_pass = TRUE;      }      else {      /*         The new x_ht is no better, so destroy the copy word and put any uncertain         x or cap ht estimate back to default. (I.e. dont blame me if its bad!)         Conditionally, use any ammended block occ chars.       */                                 //get rid of junk        delete new_x_ht_word.outword;        delete new_x_ht_word.best_choice;        delete new_x_ht_word.raw_choice;      }                                 //to keep new destructor happy      new_x_ht_word.outword = NULL;                                 //to keep new destructor happy      new_x_ht_word.best_choice = NULL;                                 //to keep new destructor happy      new_x_ht_word.raw_choice = NULL;      if (rej_mostly_reject_mode == 2) {        reject_mostly_rejects(word);        tprintf ("Rejecting mostly rejects on %s ",          word->best_choice->string ().string ());      }    }    set_global_subloc_code(SUBLOC_NORM);    if (done_this_pass && !word->done && tessedit_save_stats)      SaveBadWord (word->best_choice->string ().string (),        word->best_choice->certainty ());    record_certainty (word->best_choice->certainty (), 2);    //accounting  }#ifndef GRAPHICS_DISABLED  if (tessedit_draw_outwords) {    if (fx_win == NO_WINDOW)      create_fx_win();    clear_fx_win();    word->outword->plot (fx_win);    make_picture_current(fx_win);  }#endif  set_global_subloc_code(SUBLOC_NORM);  if (tessedit_print_text) {    write_cooked_text (word->outword, word->best_choice->string (),      word->done, done_this_pass, stdout);  }  check_debug_pt (word, 50);}/********************************************************************** * match_word_pass2 * * Baseline normalize the word and pass it to Tess. **********************************************************************/void match_word_pass2(                 //recog one word                      WERD_RES *word,  //word to do                      ROW *row,                      float x_height) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -