control.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 1,843 行 · 第 1/5 页

CPP
1,843
字号
                         WERD_RES *word,                         ROW *row) {  BOOL8 done_this_pass = FALSE;  WERD_RES new_x_ht_word (word->word);  float new_x_ht = 0.0;  inT16 old_xht_reject_count;  inT16 new_xht_reject_count;  inT16 old_xht_accept_count;  inT16 new_xht_accept_count;  BOOL8 accept_new_x_ht = FALSE;  inT16 old_chs_in_wd;  inT16 new_chs_in_wd;  inT16 old_word_quality;  inT16 new_word_quality;  inT16 dummy;  set_global_subloc_code(SUBLOC_NORM);  check_debug_pt (word, 30);  if (!word->done ||    tessedit_training_tess ||  tessedit_training_wiseowl || tessedit_dump_choices) {    word->caps_height = 0.0;    if (word->x_height == 0.0f)      word->x_height = row->x_height();    if (word->outword != NULL) {      delete word->outword;      //get rid of junk      delete word->best_choice;      delete word->raw_choice;    }    match_word_pass2 (word, row, word->x_height);    done_this_pass = TRUE;    check_debug_pt (word, 40);  }  if (!word->tess_failed && !word->word->flag (W_REP_CHAR)) {    set_global_subloc_code(SUBLOC_FIX_XHT);    if ((tessedit_xht_fiddles_on_done_wds || !word->done) &&      (tessedit_xht_fiddles_on_no_rej_wds ||    (word->reject_map.reject_count () > 0))) {      if ((x_ht_check_word_occ >= 2) && word_occ_first)        check_block_occ(word);      if (tessedit_redo_xheight)        re_estimate_x_ht(word, &new_x_ht);      if (((x_ht_check_word_occ >= 2) && !word_occ_first) ||        ((x_ht_check_word_occ >= 1) && (new_x_ht > 0)))        check_block_occ(word);    }    if (new_x_ht > 0) {      old_chs_in_wd = word->reject_map.length ();      /* Re-estimated x_ht error suggests a rematch is worthwhile. */      new_x_ht_word.x_height = new_x_ht;      new_x_ht_word.caps_height = 0.0;      match_word_pass2 (&new_x_ht_word, row, new_x_ht_word.x_height);      if (!new_x_ht_word.tess_failed) {        if ((x_ht_check_word_occ >= 1) && word_occ_first)          check_block_occ(&new_x_ht_word);        re_estimate_x_ht(&new_x_ht_word, &new_x_ht);        if ((x_ht_check_word_occ >= 1) && !word_occ_first)          check_block_occ(&new_x_ht_word);        old_xht_reject_count = word->reject_map.reject_count ();        old_xht_accept_count = old_chs_in_wd - old_xht_reject_count;        new_xht_reject_count = new_x_ht_word.reject_map.reject_count ();        new_chs_in_wd = new_x_ht_word.reject_map.length ();        new_xht_accept_count = new_chs_in_wd - new_xht_reject_count;        accept_new_x_ht =          ((new_xht_accept_count > old_xht_accept_count) ||          ((new_xht_accept_count == old_xht_accept_count) &&          (new_xht_accept_count > 0))) &&          (!new_x_ht_word.guessed_x_ht ||          !new_x_ht_word.guessed_caps_ht);        if (accept_new_x_ht && x_ht_quality_check) {          word_char_quality(word, row, &old_word_quality, &dummy);          word_char_quality(&new_x_ht_word, row, &new_word_quality, &dummy);          if (old_word_quality > new_word_quality)            accept_new_x_ht = FALSE;        }        if (accept_new_x_ht && (x_ht_stringency > 0)) {          accept_new_x_ht =            (count_alphanums (&new_x_ht_word) > x_ht_stringency);          if (!accept_new_x_ht && rej_use_xht) {            if (debug_x_ht_level >= 1)              tprintf                ("Failed stringency test so reject original word\n");            word->reject_map.rej_word_xht_fixup ();          }        }        #ifndef SECURE_NAMES        if (debug_x_ht_level >= 1) {          tprintf ("New XHT Match:: %s ",            word->best_choice->string ().string ());          word->reject_map.print (debug_fp);          tprintf (" -> %s ",            new_x_ht_word.best_choice->string ().string ());          new_x_ht_word.reject_map.print (debug_fp);          tprintf (" %s->%s %s %s\n",            word->guessed_x_ht ? "GUESS" : "CERT",            new_x_ht_word.guessed_x_ht ? "GUESS" : "CERT",            new_x_ht > 0.1 ? "STILL DOUBT" : "OK",            accept_new_x_ht ? "ACCEPTED" : "");        }        #endif      }      if (accept_new_x_ht) {        /*           The new x_ht is deemed superior so put the final results in the real word           and destroy the old results         */        delete word->outword;    //get rid of junk        word->outword = new_x_ht_word.outword;        word->denorm = new_x_ht_word.denorm;        delete word->best_choice;        word->best_choice = new_x_ht_word.best_choice;        delete word->raw_choice;        word->raw_choice = new_x_ht_word.raw_choice;        word->reject_map = new_x_ht_word.reject_map;        word->done = new_x_ht_word.done;        done_this_pass = TRUE;      }      else {      /*         The new x_ht is no better, so destroy the copy word and put any uncertain         x or cap ht estimate back to default. (I.e. dont blame me if its bad!)         Conditionally, use any ammended block occ chars.       */                                 //get rid of junk        delete new_x_ht_word.outword;        delete new_x_ht_word.best_choice;        delete new_x_ht_word.raw_choice;      }                                 //to keep new destructor happy      new_x_ht_word.outword = NULL;                                 //to keep new destructor happy      new_x_ht_word.best_choice = NULL;                                 //to keep new destructor happy      new_x_ht_word.raw_choice = NULL;      if (rej_mostly_reject_mode == 2) {        reject_mostly_rejects(word);        tprintf ("Rejecting mostly rejects on %s ",          word->best_choice->string ().string ());      }    }    set_global_subloc_code(SUBLOC_NORM);    if (done_this_pass && !word->done && tessedit_save_stats)      SaveBadWord (word->best_choice->string ().string (),        word->best_choice->certainty ());    record_certainty (word->best_choice->certainty (), 2);    //accounting  }#ifndef GRAPHICS_DISABLED  if (tessedit_draw_outwords) {    if (fx_win == NULL)      create_fx_win();    clear_fx_win();    word->outword->plot (fx_win);    TBOX wbox = word->outword->bounding_box();    fx_win->ZoomToRectangle(wbox.left(), wbox.top(),                            wbox.right(), wbox.bottom());    //make_picture_current(fx_win);    ScrollView::Update();  }#endif  set_global_subloc_code(SUBLOC_NORM);#if 0  if (tessedit_print_text) {    write_cooked_text (word->outword, word->best_choice->string (),      word->done, done_this_pass, stdout);  }#endif  check_debug_pt (word, 50);}/********************************************************************** * match_word_pass2 * * Baseline normalize the word and pass it to Tess. **********************************************************************/void match_word_pass2(                 //recog one word                      WERD_RES *word,  //word to do                      ROW *row,                      float x_height) {  WERD *bln_word;                //baseline norm copy                                 //detailed results  BLOB_CHOICE_LIST_CLIST local_blob_choices;  BLOB_CHOICE_LIST_CLIST *blob_choices;  if (save_best_choices)    blob_choices = new BLOB_CHOICE_LIST_CLIST();  else    blob_choices = &local_blob_choices;  set_global_subsubloc_code(SUBSUBLOC_OTHER);  if (matcher_fp != NULL) {    word_answer = (char *) word->word->text ();    if (word_answer != NULL && word_answer[0] == '\0')      word_answer = NULL;  }  matcher_pass = 0;  bln_word = make_bln_copy (word->word, row, x_height, &word->denorm);  set_global_subsubloc_code(SUBSUBLOC_TESS);  if (tessedit_training_tess)    word->best_choice = correct_segment_pass2 (bln_word,      &word->denorm,      tess_default_matcher,      tess_training_tester,      word->raw_choice,      blob_choices, word->outword);  else if (tessedit_dump_choices)    word->best_choice = test_segment_pass2 (bln_word,        &word->denorm,        tess_default_matcher,        choice_dump_tester,        word->raw_choice,        blob_choices, word->outword);  //      else if (tessedit_training_wiseowl)  //              best_choice=correct_segment_pass2( word, &denorm,  //                                                                                                        tess_default_matcher,wo_learn,  //                                                                                                        raw_choice,blob_choices,outword);  //      else if (tessedit_matcher_is_wiseowl)  //              best_choice=tess_segment_pass2( word, &denorm, wo_classify,  //                                                                                                raw_choice, blob_choices, outword);  else {    word->best_choice = tess_segment_pass2 (bln_word, &word->denorm,      tess_default_matcher,      word->raw_choice, blob_choices,      word->outword);  }  set_global_subsubloc_code(SUBSUBLOC_OTHER);  /*     Test for TESS screw up on word. Recog_word has already ensured that the     choice list, outword blob lists and best_choice string are the same     length. A TESS screw up is indicated by a blank filled or 0 length string.   */  if ((word->best_choice->string ().length () == 0) ||    (strspn (word->best_choice->string ().string (), " ") ==  word->best_choice->string ().length ())) {    word->tess_failed = TRUE;    word->reject_map.initialise (word->best_choice->string ().length ());    word->reject_map.rej_word_tess_failure ();    //              tprintf("Empty word produced\n");  }  else {    if ((word->best_choice->lengths ().length () !=      word->outword->blob_list ()->length ()) ||    (word->best_choice->lengths ().length () != blob_choices->length ())) {      tprintf        ("ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",        word->best_choice->string ().string (),        word->best_choice->lengths ().length (),        word->outword->blob_list ()->length (), blob_choices->length ());    }    ASSERT_HOST (word->best_choice->lengths ().length () ==      word->outword->blob_list ()->length ());    ASSERT_HOST (word->best_choice->lengths ().length () ==      blob_choices->length ());    word->tess_failed = FALSE;    if (word->word->flag (W_REP_CHAR)) {      fix_rep_char(word);    }    else {      fix_quotes (word->best_choice,        word->outword, blob_choices);      if (tessedit_fix_hyphens)        fix_hyphens (word->best_choice,          word->outword, blob_choices);      /* Dont trust fix_quotes! - though I think I've fixed the bug */      if ((word->best_choice->lengths ().length () !=           word->outword->blob_list ()->length ()) ||          (word->best_choice->lengths ().length () !=           blob_choices->length ())) {        #ifndef SECURE_NAMES        tprintf          ("POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",           word->best_choice->string ().string (),           word->best_choice->lengths ().length (),           word->outword->blob_list ()->length (),           blob_choices->length ());        #endif      }      ASSERT_HOST (word->best_choice->lengths ().length () ==        word->outword->blob_list ()->length ());      ASSERT_HOST (word->best_choice->lengths ().length () ==        blob_choices->length ());      word->tess_accepted = tess_acceptable_word (word->best_choice,        word->raw_choice);      make_reject_map (word, blob_choices, row, 2);    }  }  // Save best choices in the WERD_CHOICE if needed  if (blob_choices != &local_blob_choices)    word->best_choice->set_blob_choices(blob_choices);  else    blob_choices->deep_clear();  delete bln_word;  assert (word->raw_choice != NULL);}/************************************************************************* * fix_rep_char() * The word is a repeated char. Find the repeated char character. Make a reject * string which rejects any char other than the voted char. Set the word to done * to stop rematching it. * *************************************************************************/void fix_rep_char(                //Repeated char word                  WERD_RES *word  //word to do                 ) {  struct REP_CH  {    char ch[UNICHAR_LEN + 1];    int count;  };  REP_CH *rep_ch;                //array of char counts  int word_len;  int rep_ch_count = 0;          //how many unique chs  const char *word_str;          //the repeated chs  int i, j;  int offset;  int total = 0;  int max = 0;  char *maxch = NULL;              //Most common char  word_str = word->best_choice->string ().string ();  word_len = word->best_choice->lengths ().length ();;  rep_ch = (REP_CH *) alloc_mem (word_len * sizeof (REP_CH));  for (i = 0, offset = 0; i < word_len;       offset += word->best_choice->lengths()[i++]) {    for (j = 0; j < rep_ch_count &&             strncmp(rep_ch[j].ch, word_str + offset,                     word->best_choice->lengths()[i]) != 0; j++);    if (j < rep_ch_count)      rep_ch[j].count++;    else {      strncpy(rep_ch[rep_ch_count].ch, word_str + offset,              word->best_choice->lengths()[i]);      rep_ch[rep_ch_count].ch[word->best_choice->lengths()[i]] = '\0';      rep_ch[rep_ch_count].count = 1;      rep_ch_count++;    }  }  for (j = 0; j < rep_ch_count; j++) {    total += rep_ch[j].count;    if ((rep_ch[j].count > max) && (*rep_ch[j].ch != ' ')) {      max = rep_ch[j].count;      maxch = rep_ch[j].ch;    }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?