📄 control.cpp
字号:
else { /* Allow "'s" in NON hyphenated lower case words */ if ((s[i] == '\'') && (s[i + 1] == 's')) i += 2; } if (upper_count > 0) word_type = AC_INITIAL_CAP; else word_type = AC_LOWER_CASE; } /* Up to two different, constrained trailing punctuation chars */ if ((s[i] != '\0') && (STRING (chs_trailing_punct1).contains (s[i]))) i++; if ((s[i] != '\0') && (s[i - 1] != s[i]) && (STRING (chs_trailing_punct2).contains (s[i]))) i++; if (s[i] != '\0') word_type = AC_UNACCEPTABLE; not_a_word: if (word_type == AC_UNACCEPTABLE) { /* Look for abbreviation string */ i = 0; if (isupper (s[0])) { word_type = AC_UC_ABBREV; while ((s[i] != '\0') && isupper (s[i]) && (s[i + 1] == '.')) i += 2; } else if (islower (s[0])) { word_type = AC_LC_ABBREV; while ((s[i] != '\0') && islower (s[i]) && (s[i + 1] == '.')) i += 2; } if (s[i] != '\0') word_type = AC_UNACCEPTABLE; } return word_type;}/* DEBUGGING ROUTINE */BOOL8 check_debug_pt(WERD_RES *word, int location) { BOOL8 show_map_detail = FALSE; INT16 i; #ifndef SECURE_NAMES if (!test_pt) return FALSE; tessedit_rejection_debug.set_value (FALSE); debug_x_ht_level.set_value (0); tessedit_cluster_debug.set_value (FALSE); nn_debug.set_value (FALSE); nn_reject_debug.set_value (FALSE); if (word->word->bounding_box ().contains (FCOORD (test_pt_x, test_pt_y))) { if (location < 0) return TRUE; //For breakpoint use tessedit_rejection_debug.set_value (TRUE); debug_x_ht_level.set_value (20); tessedit_cluster_debug.set_value (TRUE); nn_debug.set_value (TRUE); nn_reject_debug.set_value (TRUE); tprintf ("\n\nTESTWD::"); switch (location) { case 0: tprintf ("classify_word_pass1 start\n"); word->word->print (debug_fp); break; case 10: tprintf ("make_reject_map: initial map"); break; case 20: tprintf ("make_reject_map: after NN"); break; case 30: tprintf ("classify_word_pass2 - START"); break; case 40: tprintf ("classify_word_pass2 - Pre Xht"); break; case 50: tprintf ("classify_word_pass2 - END"); show_map_detail = TRUE; break; case 60: tprintf ("fixspace"); break; case 70: tprintf ("MM pass START"); break; case 80: tprintf ("MM pass END"); break; case 90: tprintf ("After Poor quality rejection"); break; case 100: tprintf ("unrej_good_quality_words - START"); break; case 110: tprintf ("unrej_good_quality_words - END"); break; case 120: tprintf ("Write results pass"); show_map_detail = TRUE; break; } tprintf (" \"%s\" ", word->best_choice->string ().string ()); word->reject_map.print (debug_fp); tprintf ("\n"); if (show_map_detail) { tprintf ("\"%s\"\n", word->best_choice->string ().string ()); for (i = 0; word->best_choice->string ()[i] != '\0'; i++) { tprintf ("**** \"%c\" ****\n", word->best_choice->string ()[i]); word->reject_map[i].full_print (debug_fp); } } tprintf ("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE"); tprintf ("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE"); return TRUE; } else #endif return FALSE;}/********************************************************************** * set_word_fonts * * Get the fonts for the word. **********************************************************************/void set_word_fonts( //good chars in word WERD_RES *word, //word to adapt to //detailed results BLOB_CHOICE_LIST_CLIST *blob_choices) { INT32 index; //char index char choice_char; //char from word INT8 config; //font of char //character iterator BLOB_CHOICE_LIST_C_IT char_it = blob_choices; BLOB_CHOICE_IT choice_it; //choice iterator STATS fonts (0, 32); //font counters static INT8 italic_table[32] = { 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 }; static INT8 bold_table[32] = { 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 1, 1, -1, -1 }; static INT8 font_table[32] = { 2, 2, 2, 2, -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 2, 2, 2, 2 }; word->italic = 0; word->bold = 0; for (char_it.mark_cycle_pt (), index = 0; !char_it.cycled_list (); char_it.forward (), index++) { choice_char = word->best_choice->string ()[index]; choice_it.set_to_list (char_it.data ()); for (choice_it.mark_cycle_pt (); !choice_it.cycled_list (); choice_it.forward ()) { if (choice_it.data ()->char_class () == choice_char) { config = choice_it.data ()->config (); if (tessedit_debug_fonts) tprintf ("%c(%d=%d%c%c)", choice_char, config, (config & 31) >> 2, config & 2 ? 'N' : 'B', config & 1 ? 'N' : 'I'); if (config != -1) { config &= 31; word->italic += italic_table[config]; word->bold += bold_table[config]; if (font_table[config] != -1) fonts.add (font_table[config], 1); } break; } } } find_modal_font (&fonts, &word->font1, &word->font1_count); find_modal_font (&fonts, &word->font2, &word->font2_count); if (tessedit_debug_fonts) tprintf ("\n"); /* if (word->font1_count>0) { for (char_it.mark_cycle_pt(),index=0; !char_it.cycled_list();char_it.forward(),index++) { choice_char=word->best_choice->string()[index]; choice_it.set_to_list(char_it.data()); for (choice_it.mark_cycle_pt();!choice_it.cycled_list();choice_it.forward()) { if (choice_it.data()->char_class()==choice_char) { config=choice_it.data()->config(); if (config!=-1 && font_table[config&31]==word->font1) { word->italic+=italic_table[config]; word->bold+=bold_table[config]; } break; } } } }*/}/********************************************************************** * font_recognition_pass * * Smooth the fonts for the document. **********************************************************************/void font_recognition_pass( //good chars in word PAGE_RES_IT &page_res_it) { INT32 length; //of word INT32 count; //of a feature INT8 doc_font; //modal font INT8 doc_font_count; //modal font INT32 doc_italic; //total italics INT32 doc_bold; //total bolds ROW_RES *row = NULL; //current row WERD_RES *word; //current word STATS fonts (0, 32); //font counters STATS doc_fonts (0, 32); //font counters doc_italic = 0; doc_bold = 0; page_res_it.restart_page (); while (page_res_it.word () != NULL) { if (row != page_res_it.row ()) { if (row != NULL) { find_modal_font (&fonts, &row->font1, &row->font1_count); find_modal_font (&fonts, &row->font2, &row->font2_count); } row = page_res_it.row (); //current row fonts.clear (); //clear counters row->italic = 0; row->bold = 0; } word = page_res_it.word (); row->italic += word->italic; row->bold += word->bold; fonts.add (word->font1, word->font1_count); fonts.add (word->font2, word->font2_count); doc_italic += word->italic; doc_bold += word->bold; doc_fonts.add (word->font1, word->font1_count); doc_fonts.add (word->font2, word->font2_count); page_res_it.forward (); } if (row != NULL) { find_modal_font (&fonts, &row->font1, &row->font1_count); find_modal_font (&fonts, &row->font2, &row->font2_count); } find_modal_font(&doc_fonts, &doc_font, &doc_font_count); /* row=NULL; page_res_it.restart_page(); while (page_res_it.word() != NULL) { if (row!=page_res_it.row()) { row2=row; row=page_res_it.row(); if (row->font1_count<MIN_FONT_ROW_COUNT) { fonts.clear(); italic=0; bold=0; add_in_one_row(row,&fonts,&italic,&bold); if (row2!=NULL) { hdiff=row->row->x_height()-row2->row->x_height(); if (hdiff<0) hdiff=-hdiff; if (hdiff<MAX_XHEIGHT_DIFF) add_in_one_row(row2,&fonts,&italic,&bold); } do page_res_it.forward(); while (page_res_it.row()==row); row2=page_res_it.row(); if (row2!=NULL) { hdiff=row->row->x_height()-row2->row->x_height(); if (hdiff<0) hdiff=-hdiff; if (hdiff<MAX_XHEIGHT_DIFF) add_in_one_row(row2,&fonts,&italic,&bold); } row->italic=italic; row->bold=bold; find_modal_font(&fonts,&row->font1,&row->font1_count); find_modal_font(&fonts,&row->font2,&row->font2_count); } else page_res_it.forward(); } else page_res_it.forward(); }*/ page_res_it.restart_page (); while (page_res_it.word () != NULL) { row = page_res_it.row (); //current row word = page_res_it.word (); length = word->best_choice->string ().length (); count = word->italic; if (count < 0) count = -count; if (!(count == length || length > 3 && count >= length * 3 / 4)) word->italic = doc_italic > 0 ? 1 : -1; count = word->bold; if (count < 0) count = -count; if (!(count == length || length > 3 && count >= length * 3 / 4)) word->bold = doc_bold > 0 ? 1 : -1; count = word->font1_count; if (!(count == length || length > 3 && count >= length * 3 / 4)) { word->font1 = doc_font; word->font1_count = doc_font_count; } page_res_it.forward (); }}/********************************************************************** * add_in_one_row * * Add into the stats for one row. **********************************************************************/void add_in_one_row( //good chars in word ROW_RES *row, //current row STATS *fonts, //font stats INT8 *italic, //output count INT8 *bold //output count ) { WERD_RES *word; //current word WERD_RES_IT word_it = &row->word_res_list; for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { word = word_it.data (); *italic += word->italic; *bold += word->bold; if (word->font1_count > 0) fonts->add (word->font1, word->font1_count); if (word->font2_count > 0) fonts->add (word->font2, word->font2_count); }}/********************************************************************** * find_modal_font * * Find the modal font and remove from the stats. **********************************************************************/void find_modal_font( //good chars in word STATS *fonts, //font stats INT8 *font_out, //output font INT8 *font_count //output count ) { INT8 font; //font index INT32 count; //pile couat if (fonts->get_total () > 0) { font = (INT8) fonts->mode (); *font_out = font; count = fonts->pile_count (font); *font_count = count < MAX_INT8 ? count : MAX_INT8; fonts->add (font, -*font_count); } else { *font_out = -1; *font_count = 0; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -