control.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 1,843 行 · 第 1/5 页
CPP
1,843 行
return FALSE; tessedit_rejection_debug.set_value (FALSE); debug_x_ht_level.set_value (0); tessedit_cluster_debug.set_value (FALSE); nn_debug.set_value (FALSE); nn_reject_debug.set_value (FALSE); if (word->word->bounding_box ().contains (FCOORD (test_pt_x, test_pt_y))) { if (location < 0) return TRUE; //For breakpoint use tessedit_rejection_debug.set_value (TRUE); debug_x_ht_level.set_value (20); tessedit_cluster_debug.set_value (TRUE); nn_debug.set_value (TRUE); nn_reject_debug.set_value (TRUE); tprintf ("\n\nTESTWD::"); switch (location) { case 0: tprintf ("classify_word_pass1 start\n"); word->word->print (debug_fp); break; case 10: tprintf ("make_reject_map: initial map"); break; case 20: tprintf ("make_reject_map: after NN"); break; case 30: tprintf ("classify_word_pass2 - START"); break; case 40: tprintf ("classify_word_pass2 - Pre Xht"); break; case 50: tprintf ("classify_word_pass2 - END"); show_map_detail = TRUE; break; case 60: tprintf ("fixspace"); break; case 70: tprintf ("MM pass START"); break; case 80: tprintf ("MM pass END"); break; case 90: tprintf ("After Poor quality rejection"); break; case 100: tprintf ("unrej_good_quality_words - START"); break; case 110: tprintf ("unrej_good_quality_words - END"); break; case 120: tprintf ("Write results pass"); show_map_detail = TRUE; break; } tprintf (" \"%s\" ", word->best_choice->string ().string ()); word->reject_map.print (debug_fp); tprintf ("\n"); if (show_map_detail) { tprintf ("\"%s\"\n", word->best_choice->string ().string ()); for (i = 0; word->best_choice->string ()[i] != '\0'; i++) { tprintf ("**** \"%c\" ****\n", word->best_choice->string ()[i]); word->reject_map[i].full_print (debug_fp); } } tprintf ("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE"); tprintf ("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE"); return TRUE; } else #endif return FALSE;}/********************************************************************** * set_word_fonts * * Get the fonts for the word. **********************************************************************/void set_word_fonts( //good chars in word WERD_RES *word, //word to adapt to //detailed results BLOB_CHOICE_LIST_CLIST *blob_choices) { inT32 index; //char index inT32 offset; //char offset char choice_char[UNICHAR_LEN + 1]; //char from word inT8 config; //font of char //character iterator BLOB_CHOICE_LIST_C_IT char_it = blob_choices; BLOB_CHOICE_IT choice_it; //choice iterator STATS fonts (0, 32); //font counters static inT8 italic_table[32] = { 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 }; static inT8 bold_table[32] = { 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 }; static inT8 font_table[32] = { 2, 2, 2, 2, -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 2, 2, 2, 2 }; word->italic = 0; word->bold = 0; for (char_it.mark_cycle_pt (), index = 0, offset = 0; !char_it.cycled_list (); char_it.forward (), offset += word->best_choice->lengths()[index++]) { strncpy(choice_char, word->best_choice->string ().string() + offset, word->best_choice->lengths()[index]); choice_char[word->best_choice->lengths()[index]] = '\0'; choice_it.set_to_list (char_it.data ()); for (choice_it.mark_cycle_pt (); !choice_it.cycled_list (); choice_it.forward ()) { if (strcmp(choice_it.data ()->unichar (), choice_char) == 0) { config = choice_it.data ()->config (); if (tessedit_debug_fonts) tprintf ("%s(%d=%d%c%c)", choice_char, config, (config & 31) >> 2, config & 2 ? 'N' : 'B', config & 1 ? 'N' : 'I'); if (config != -1) { config &= 31; word->italic += italic_table[config]; word->bold += bold_table[config]; if (font_table[config] != -1) fonts.add (font_table[config], 1); } break; } } } find_modal_font (&fonts, &word->font1, &word->font1_count); find_modal_font (&fonts, &word->font2, &word->font2_count); if (tessedit_debug_fonts) tprintf ("\n"); /* if (word->font1_count>0) { for (char_it.mark_cycle_pt(),index=0; !char_it.cycled_list();char_it.forward(),index++) { choice_char=word->best_choice->string()[index]; choice_it.set_to_list(char_it.data()); for (choice_it.mark_cycle_pt();!choice_it.cycled_list();choice_it.forward()) { if (choice_it.data()->char_class()==choice_char) { config=choice_it.data()->config(); if (config!=-1 && font_table[config&31]==word->font1) { word->italic+=italic_table[config]; word->bold+=bold_table[config]; } break; } } } }*/}/********************************************************************** * font_recognition_pass * * Smooth the fonts for the document. **********************************************************************/void font_recognition_pass( //good chars in word PAGE_RES_IT &page_res_it) { inT32 length; //of word inT32 count; //of a feature inT8 doc_font; //modal font inT8 doc_font_count; //modal font inT32 doc_italic; //total italics inT32 doc_bold; //total bolds ROW_RES *row = NULL; //current row WERD_RES *word; //current word STATS fonts (0, 32); //font counters STATS doc_fonts (0, 32); //font counters doc_italic = 0; doc_bold = 0; page_res_it.restart_page (); while (page_res_it.word () != NULL) { if (row != page_res_it.row ()) { if (row != NULL) { find_modal_font (&fonts, &row->font1, &row->font1_count); find_modal_font (&fonts, &row->font2, &row->font2_count); } row = page_res_it.row (); //current row fonts.clear (); //clear counters row->italic = 0; row->bold = 0; } word = page_res_it.word (); row->italic += word->italic; row->bold += word->bold; fonts.add (word->font1, word->font1_count); fonts.add (word->font2, word->font2_count); doc_italic += word->italic; doc_bold += word->bold; doc_fonts.add (word->font1, word->font1_count); doc_fonts.add (word->font2, word->font2_count); page_res_it.forward (); } if (row != NULL) { find_modal_font (&fonts, &row->font1, &row->font1_count); find_modal_font (&fonts, &row->font2, &row->font2_count); } find_modal_font(&doc_fonts, &doc_font, &doc_font_count); /* row=NULL; page_res_it.restart_page(); while (page_res_it.word() != NULL) { if (row!=page_res_it.row()) { row2=row; row=page_res_it.row(); if (row->font1_count<MIN_FONT_ROW_COUNT) { fonts.clear(); italic=0; bold=0; add_in_one_row(row,&fonts,&italic,&bold); if (row2!=NULL) { hdiff=row->row->x_height()-row2->row->x_height(); if (hdiff<0) hdiff=-hdiff; if (hdiff<MAX_XHEIGHT_DIFF) add_in_one_row(row2,&fonts,&italic,&bold); } do page_res_it.forward(); while (page_res_it.row()==row); row2=page_res_it.row(); if (row2!=NULL) { hdiff=row->row->x_height()-row2->row->x_height(); if (hdiff<0) hdiff=-hdiff; if (hdiff<MAX_XHEIGHT_DIFF) add_in_one_row(row2,&fonts,&italic,&bold); } row->italic=italic; row->bold=bold; find_modal_font(&fonts,&row->font1,&row->font1_count); find_modal_font(&fonts,&row->font2,&row->font2_count); } else page_res_it.forward(); } else page_res_it.forward(); }*/ page_res_it.restart_page (); while (page_res_it.word () != NULL) { row = page_res_it.row (); //current row word = page_res_it.word (); length = word->best_choice->string ().length (); count = word->italic; if (count < 0) count = -count; if (!(count == length || (length > 3 && count >= length * 3 / 4))) word->italic = doc_italic > 0 ? 1 : -1; count = word->bold; if (count < 0) count = -count; if (!(count == length || (length > 3 && count >= length * 3 / 4))) word->bold = doc_bold > 0 ? 1 : -1; count = word->font1_count; if (!(count == length || (length > 3 && count >= length * 3 / 4))) { word->font1 = doc_font; word->font1_count = doc_font_count; } page_res_it.forward (); }}/********************************************************************** * add_in_one_row * * Add into the stats for one row. **********************************************************************/void add_in_one_row( //good chars in word ROW_RES *row, //current row STATS *fonts, //font stats inT8 *italic, //output count inT8 *bold //output count ) { WERD_RES *word; //current word WERD_RES_IT word_it = &row->word_res_list; for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { word = word_it.data (); *italic += word->italic; *bold += word->bold; if (word->font1_count > 0) fonts->add (word->font1, word->font1_count); if (word->font2_count > 0) fonts->add (word->font2, word->font2_count); }}/********************************************************************** * find_modal_font * * Find the modal font and remove from the stats. **********************************************************************/void find_modal_font( //good chars in word STATS *fonts, //font stats inT8 *font_out, //output font inT8 *font_count //output count ) { inT8 font; //font index inT32 count; //pile couat if (fonts->get_total () > 0) { font = (inT8) fonts->mode (); *font_out = font; count = fonts->pile_count (font); *font_count = count < MAX_INT8 ? count : MAX_INT8; fonts->add (font, -*font_count); } else { *font_out = -1; *font_count = 0; }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?