control.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 1,843 行 · 第 1/5 页
CPP
1,843 行
WERD_RES *word, ROW *row) { BOOL8 done_this_pass = FALSE; WERD_RES new_x_ht_word (word->word); float new_x_ht = 0.0; inT16 old_xht_reject_count; inT16 new_xht_reject_count; inT16 old_xht_accept_count; inT16 new_xht_accept_count; BOOL8 accept_new_x_ht = FALSE; inT16 old_chs_in_wd; inT16 new_chs_in_wd; inT16 old_word_quality; inT16 new_word_quality; inT16 dummy; set_global_subloc_code(SUBLOC_NORM); check_debug_pt (word, 30); if (!word->done || tessedit_training_tess || tessedit_training_wiseowl || tessedit_dump_choices) { word->caps_height = 0.0; if (word->x_height == 0.0f) word->x_height = row->x_height(); if (word->outword != NULL) { delete word->outword; //get rid of junk delete word->best_choice; delete word->raw_choice; } match_word_pass2 (word, row, word->x_height); done_this_pass = TRUE; check_debug_pt (word, 40); } if (!word->tess_failed && !word->word->flag (W_REP_CHAR)) { set_global_subloc_code(SUBLOC_FIX_XHT); if ((tessedit_xht_fiddles_on_done_wds || !word->done) && (tessedit_xht_fiddles_on_no_rej_wds || (word->reject_map.reject_count () > 0))) { if ((x_ht_check_word_occ >= 2) && word_occ_first) check_block_occ(word); if (tessedit_redo_xheight) re_estimate_x_ht(word, &new_x_ht); if (((x_ht_check_word_occ >= 2) && !word_occ_first) || ((x_ht_check_word_occ >= 1) && (new_x_ht > 0))) check_block_occ(word); } if (new_x_ht > 0) { old_chs_in_wd = word->reject_map.length (); /* Re-estimated x_ht error suggests a rematch is worthwhile. */ new_x_ht_word.x_height = new_x_ht; new_x_ht_word.caps_height = 0.0; match_word_pass2 (&new_x_ht_word, row, new_x_ht_word.x_height); if (!new_x_ht_word.tess_failed) { if ((x_ht_check_word_occ >= 1) && word_occ_first) check_block_occ(&new_x_ht_word); re_estimate_x_ht(&new_x_ht_word, &new_x_ht); if ((x_ht_check_word_occ >= 1) && !word_occ_first) check_block_occ(&new_x_ht_word); old_xht_reject_count = word->reject_map.reject_count (); old_xht_accept_count = old_chs_in_wd - old_xht_reject_count; new_xht_reject_count = new_x_ht_word.reject_map.reject_count (); new_chs_in_wd = new_x_ht_word.reject_map.length (); new_xht_accept_count = new_chs_in_wd - new_xht_reject_count; accept_new_x_ht = ((new_xht_accept_count > old_xht_accept_count) || ((new_xht_accept_count == old_xht_accept_count) && (new_xht_accept_count > 0))) && (!new_x_ht_word.guessed_x_ht || !new_x_ht_word.guessed_caps_ht); if (accept_new_x_ht && x_ht_quality_check) { word_char_quality(word, row, &old_word_quality, &dummy); word_char_quality(&new_x_ht_word, row, &new_word_quality, &dummy); if (old_word_quality > new_word_quality) accept_new_x_ht = FALSE; } if (accept_new_x_ht && (x_ht_stringency > 0)) { accept_new_x_ht = (count_alphanums (&new_x_ht_word) > x_ht_stringency); if (!accept_new_x_ht && rej_use_xht) { if (debug_x_ht_level >= 1) tprintf ("Failed stringency test so reject original word\n"); word->reject_map.rej_word_xht_fixup (); } } #ifndef SECURE_NAMES if (debug_x_ht_level >= 1) { tprintf ("New XHT Match:: %s ", word->best_choice->string ().string ()); word->reject_map.print (debug_fp); tprintf (" -> %s ", new_x_ht_word.best_choice->string ().string ()); new_x_ht_word.reject_map.print (debug_fp); tprintf (" %s->%s %s %s\n", word->guessed_x_ht ? "GUESS" : "CERT", new_x_ht_word.guessed_x_ht ? "GUESS" : "CERT", new_x_ht > 0.1 ? "STILL DOUBT" : "OK", accept_new_x_ht ? "ACCEPTED" : ""); } #endif } if (accept_new_x_ht) { /* The new x_ht is deemed superior so put the final results in the real word and destroy the old results */ delete word->outword; //get rid of junk word->outword = new_x_ht_word.outword; word->denorm = new_x_ht_word.denorm; delete word->best_choice; word->best_choice = new_x_ht_word.best_choice; delete word->raw_choice; word->raw_choice = new_x_ht_word.raw_choice; word->reject_map = new_x_ht_word.reject_map; word->done = new_x_ht_word.done; done_this_pass = TRUE; } else { /* The new x_ht is no better, so destroy the copy word and put any uncertain x or cap ht estimate back to default. (I.e. dont blame me if its bad!) Conditionally, use any ammended block occ chars. */ //get rid of junk delete new_x_ht_word.outword; delete new_x_ht_word.best_choice; delete new_x_ht_word.raw_choice; } //to keep new destructor happy new_x_ht_word.outword = NULL; //to keep new destructor happy new_x_ht_word.best_choice = NULL; //to keep new destructor happy new_x_ht_word.raw_choice = NULL; if (rej_mostly_reject_mode == 2) { reject_mostly_rejects(word); tprintf ("Rejecting mostly rejects on %s ", word->best_choice->string ().string ()); } } set_global_subloc_code(SUBLOC_NORM); if (done_this_pass && !word->done && tessedit_save_stats) SaveBadWord (word->best_choice->string ().string (), word->best_choice->certainty ()); record_certainty (word->best_choice->certainty (), 2); //accounting }#ifndef GRAPHICS_DISABLED if (tessedit_draw_outwords) { if (fx_win == NULL) create_fx_win(); clear_fx_win(); word->outword->plot (fx_win); TBOX wbox = word->outword->bounding_box(); fx_win->ZoomToRectangle(wbox.left(), wbox.top(), wbox.right(), wbox.bottom()); //make_picture_current(fx_win); ScrollView::Update(); }#endif set_global_subloc_code(SUBLOC_NORM);#if 0 if (tessedit_print_text) { write_cooked_text (word->outword, word->best_choice->string (), word->done, done_this_pass, stdout); }#endif check_debug_pt (word, 50);}/********************************************************************** * match_word_pass2 * * Baseline normalize the word and pass it to Tess. **********************************************************************/void match_word_pass2( //recog one word WERD_RES *word, //word to do ROW *row, float x_height) { WERD *bln_word; //baseline norm copy //detailed results BLOB_CHOICE_LIST_CLIST local_blob_choices; BLOB_CHOICE_LIST_CLIST *blob_choices; if (save_best_choices) blob_choices = new BLOB_CHOICE_LIST_CLIST(); else blob_choices = &local_blob_choices; set_global_subsubloc_code(SUBSUBLOC_OTHER); if (matcher_fp != NULL) { word_answer = (char *) word->word->text (); if (word_answer != NULL && word_answer[0] == '\0') word_answer = NULL; } matcher_pass = 0; bln_word = make_bln_copy (word->word, row, x_height, &word->denorm); set_global_subsubloc_code(SUBSUBLOC_TESS); if (tessedit_training_tess) word->best_choice = correct_segment_pass2 (bln_word, &word->denorm, tess_default_matcher, tess_training_tester, word->raw_choice, blob_choices, word->outword); else if (tessedit_dump_choices) word->best_choice = test_segment_pass2 (bln_word, &word->denorm, tess_default_matcher, choice_dump_tester, word->raw_choice, blob_choices, word->outword); // else if (tessedit_training_wiseowl) // best_choice=correct_segment_pass2( word, &denorm, // tess_default_matcher,wo_learn, // raw_choice,blob_choices,outword); // else if (tessedit_matcher_is_wiseowl) // best_choice=tess_segment_pass2( word, &denorm, wo_classify, // raw_choice, blob_choices, outword); else { word->best_choice = tess_segment_pass2 (bln_word, &word->denorm, tess_default_matcher, word->raw_choice, blob_choices, word->outword); } set_global_subsubloc_code(SUBSUBLOC_OTHER); /* Test for TESS screw up on word. Recog_word has already ensured that the choice list, outword blob lists and best_choice string are the same length. A TESS screw up is indicated by a blank filled or 0 length string. */ if ((word->best_choice->string ().length () == 0) || (strspn (word->best_choice->string ().string (), " ") == word->best_choice->string ().length ())) { word->tess_failed = TRUE; word->reject_map.initialise (word->best_choice->string ().length ()); word->reject_map.rej_word_tess_failure (); // tprintf("Empty word produced\n"); } else { if ((word->best_choice->lengths ().length () != word->outword->blob_list ()->length ()) || (word->best_choice->lengths ().length () != blob_choices->length ())) { tprintf ("ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n", word->best_choice->string ().string (), word->best_choice->lengths ().length (), word->outword->blob_list ()->length (), blob_choices->length ()); } ASSERT_HOST (word->best_choice->lengths ().length () == word->outword->blob_list ()->length ()); ASSERT_HOST (word->best_choice->lengths ().length () == blob_choices->length ()); word->tess_failed = FALSE; if (word->word->flag (W_REP_CHAR)) { fix_rep_char(word); } else { fix_quotes (word->best_choice, word->outword, blob_choices); if (tessedit_fix_hyphens) fix_hyphens (word->best_choice, word->outword, blob_choices); /* Dont trust fix_quotes! - though I think I've fixed the bug */ if ((word->best_choice->lengths ().length () != word->outword->blob_list ()->length ()) || (word->best_choice->lengths ().length () != blob_choices->length ())) { #ifndef SECURE_NAMES tprintf ("POST FIX_QUOTES FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n", word->best_choice->string ().string (), word->best_choice->lengths ().length (), word->outword->blob_list ()->length (), blob_choices->length ()); #endif } ASSERT_HOST (word->best_choice->lengths ().length () == word->outword->blob_list ()->length ()); ASSERT_HOST (word->best_choice->lengths ().length () == blob_choices->length ()); word->tess_accepted = tess_acceptable_word (word->best_choice, word->raw_choice); make_reject_map (word, blob_choices, row, 2); } } // Save best choices in the WERD_CHOICE if needed if (blob_choices != &local_blob_choices) word->best_choice->set_blob_choices(blob_choices); else blob_choices->deep_clear(); delete bln_word; assert (word->raw_choice != NULL);}/************************************************************************* * fix_rep_char() * The word is a repeated char. Find the repeated char character. Make a reject * string which rejects any char other than the voted char. Set the word to done * to stop rematching it. * *************************************************************************/void fix_rep_char( //Repeated char word WERD_RES *word //word to do ) { struct REP_CH { char ch[UNICHAR_LEN + 1]; int count; }; REP_CH *rep_ch; //array of char counts int word_len; int rep_ch_count = 0; //how many unique chs const char *word_str; //the repeated chs int i, j; int offset; int total = 0; int max = 0; char *maxch = NULL; //Most common char word_str = word->best_choice->string ().string (); word_len = word->best_choice->lengths ().length ();; rep_ch = (REP_CH *) alloc_mem (word_len * sizeof (REP_CH)); for (i = 0, offset = 0; i < word_len; offset += word->best_choice->lengths()[i++]) { for (j = 0; j < rep_ch_count && strncmp(rep_ch[j].ch, word_str + offset, word->best_choice->lengths()[i]) != 0; j++); if (j < rep_ch_count) rep_ch[j].count++; else { strncpy(rep_ch[rep_ch_count].ch, word_str + offset, word->best_choice->lengths()[i]); rep_ch[rep_ch_count].ch[word->best_choice->lengths()[i]] = '\0'; rep_ch[rep_ch_count].count = 1; rep_ch_count++; } } for (j = 0; j < rep_ch_count; j++) { total += rep_ch[j].count; if ((rep_ch[j].count > max) && (*rep_ch[j].ch != ' ')) { max = rep_ch[j].count; maxch = rep_ch[j].ch; }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?