📄 adaptions.cpp
字号:
CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting) { PBLOB_LIST *blobs = word->outword->blob_list (); PBLOB_IT blob_it(blobs); INT16 i; CHAR_SAMPLE *sample; CHAR_SAMPLES_IT c_it = char_clusters; CHAR_SAMPLE_IT cw_it = chars_waiting; float score; float best_score; char best_char; CHAR_SAMPLES *best_cluster; PIXROW_LIST *pixrow_list; PIXROW_IT pixrow_it; IMAGELINE *imlines; // lines of the image BOX pix_box; // box of imlines // extent WERD copy_outword; // copy to denorm BOX b_box; PBLOB_IT copy_blob_it; PIXROW *pixrow = NULL; static INT32 word_number = 0;#ifndef GRAPHICS_DISABLED WINDOW demo_win = NULL;#endif INT32 resolution = page_image.get_res (); word_number++; if (tessedit_test_cluster_input) return; if (word->word->bounding_box ().height () > resolution / 3) return; if (char_clusters->length () == 0) { #ifndef SECURE_NAMES if (tessedit_cluster_debug) tprintf ("No clusters to use for adaption\n"); #endif return; } if (!cw_it.empty ()) { complete_clustering(char_clusters, chars_waiting); print_em_stats(char_clusters, chars_waiting); } if ((!word_adaptable (word, tessedit_cluster_adaption_mode) && word->reject_map.reject_count () != 0) || tessedit_mm_use_rejmap) { if (tessedit_cluster_debug) { tprintf ("\nChecking: \"%s\" MAP ", word->best_choice->string ().string ()); word->reject_map.print (debug_fp); tprintf ("\n"); } copy_outword = *(word->outword); copy_outword.baseline_denormalise (&word->denorm); copy_blob_it.set_to_list (copy_outword.blob_list ()); char_clip_word(©_outword, page_image, pixrow_list, imlines, pix_box); pixrow_it.set_to_list (pixrow_list); pixrow_it.move_to_first (); // For debugging only b_box = copy_outword.bounding_box (); pixrow = pixrow_it.data (); blob_it.move_to_first (); copy_blob_it.move_to_first (); for (i = 0; word->best_choice->string ()[i] != '\0'; i++, pixrow_it.forward (), blob_it.forward (), copy_blob_it.forward ()) { if (word->reject_map[i].recoverable () || (tessedit_mm_all_rejects && word->reject_map[i].rejected ())) { BOX copy_box = copy_blob_it.data ()->bounding_box (); if (tessedit_cluster_debug) tprintf ("Sample %c to check found in %s, index %d\n", word->best_choice->string ()[i], word->best_choice->string ().string (), i); if (tessedit_demo_adaption) tprintf ("Sample %c to check found in %s (%d), index %d\n", word->best_choice->string ()[i], word->best_choice->string ().string (), word_number, i); sample = clip_sample (pixrow_it.data (), imlines, pix_box, copy_outword.flag (W_INVERSE), word->best_choice->string ()[i]); if (sample == NULL) { //Clip failed tprintf ("Unable to clip sample from %s, index %d\n", word->best_choice->string ().string (), i); #ifndef SECURE_NAMES if (tessedit_cluster_debug) tprintf ("Sample rejected (no sample)\n"); #endif word->reject_map[i].setrej_mm_reject (); continue; } best_score = MAX_INT32; best_char = '\0'; best_cluster = NULL; for (c_it.mark_cycle_pt (); !c_it.cycled_list (); c_it.forward ()) { if (c_it.data ()->character () != '\0') { score = c_it.data ()->match_score (sample); if (score < best_score) { best_cluster = c_it.data (); best_score = score; best_char = c_it.data ()->character (); } } } if (best_score > tessedit_cluster_t1) { #ifndef SECURE_NAMES if (tessedit_cluster_debug) tprintf ("Sample rejected (score %f)\n", best_score); if (tessedit_demo_adaption) tprintf ("Sample rejected (score %f)\n", best_score); #endif word->reject_map[i].setrej_mm_reject (); } else { if (word->best_choice->string ()[i] == best_char) { #ifndef SECURE_NAMES if (tessedit_cluster_debug) tprintf ("Sample accepted (score %f)\n", best_score); if (tessedit_demo_adaption) tprintf ("Sample accepted (score %f)\n", best_score); #endif if (tessedit_test_adaption) word->reject_map[i].setrej_minimal_rej_accept (); else word->reject_map[i].setrej_mm_accept (); } else { #ifndef SECURE_NAMES if (tessedit_cluster_debug) tprintf ("Sample rejected (char %c, score %f)\n", best_char, best_score); if (tessedit_demo_adaption) tprintf ("Sample rejected (char %c, score %f)\n", best_char, best_score); #endif word->reject_map[i].setrej_mm_reject (); } } if (tessedit_demo_adaption) { if (strcmp (imagebasename.string (), tessedit_demo_file.string ()) != 0 || word_number == tessedit_demo_word1 || word_number == tessedit_demo_word2) {#ifndef GRAPHICS_DISABLED demo_win = display_clip_image(©_outword, page_image, pixrow_list, pix_box);#endif demo_word = word_number; best_cluster->match_score (sample); demo_word = 0; } } } } delete[]imlines; // Free array of imlines delete pixrow_list; if (tessedit_cluster_debug) { tprintf ("\nFinal: \"%s\" MAP ", word->best_choice->string ().string ()); word->reject_map.print (debug_fp); tprintf ("\n"); } }}void print_em_stats(CHAR_SAMPLES_LIST *char_clusters, CHAR_SAMPLE_LIST *chars_waiting) { CHAR_SAMPLES_IT c_it = char_clusters; if (!tessedit_cluster_debug) return; #ifndef SECURE_NAMES tprintf ("There are %d clusters and %d samples waiting\n", char_clusters->length (), chars_waiting->length ()); for (c_it.mark_cycle_pt (); !c_it.cycled_list (); c_it.forward ()) c_it.data ()->print (debug_fp); #endif tprintf ("\n");}CHAR_SAMPLE *clip_sample( //lines of the image PIXROW *pixrow, IMAGELINE *imlines, BOX pix_box, //box of imlines extent BOOL8 white_on_black, char c) { BOX b_box = pixrow->bounding_box (); float baseline_pos = 0; INT32 resolution = page_image.get_res (); if (!b_box.null_box ()) { ASSERT_HOST (b_box.width () < page_image.get_xsize () && b_box.height () < page_image.get_ysize ()); if (b_box.width () > resolution || b_box.height () > resolution) { tprintf ("clip sample: sample too big (%d x %d)\n", b_box.width (), b_box.height ()); return NULL; } IMAGE *image = new (IMAGE); if (image->create (b_box.width (), b_box.height (), 1) == -1) { tprintf ("clip sample: create image failed (%d x %d)\n", b_box.width (), b_box.height ()); delete image; return NULL; } if (!white_on_black) invert_image(image); // Set background to white pixrow->char_clip_image (imlines, pix_box, NULL, *image, baseline_pos); if (white_on_black) invert_image(image); //invert white on black for scaling &NN return new CHAR_SAMPLE (image, c); } else return NULL;}#ifndef GRAPHICS_DISABLEDvoid display_cluster_prototypes(CHAR_SAMPLES_LIST *char_clusters) { INT16 proto_number = 0; CHAR_SAMPLES_IT c_it = char_clusters; char title[WINDOWNAMESIZE]; for (c_it.mark_cycle_pt (); !c_it.cycled_list (); c_it.forward ()) { proto_number++; #ifndef SECURE_NAMES tprintf ("Displaying proto number %d\n", proto_number); #endif if (c_it.data ()->prototype () != NULL) { sprintf (title, "Proto - %d", proto_number); display_image (c_it.data ()->prototype ()->make_image (), title, (proto_number - 1) * 400, 0, FALSE); } }}#endif// *********************************************************************// Simplistic routines to test the effect of rejecting ems and fullstops// *********************************************************************void reject_all_ems(WERD_RES *word) { INT16 i; for (i = 0; word->best_choice->string ()[i] != '\0'; i++) { if (word->best_choice->string ()[i] == 'm') // reject all ems word->reject_map[i].setrej_mm_reject (); }}void reject_all_fullstops(WERD_RES *word) { INT16 i; for (i = 0; word->best_choice->string ()[i] != '\0'; i++) { if (word->best_choice->string ()[i] == '.') // reject all fullstops word->reject_map[i].setrej_mm_reject (); }}void reject_suspect_ems(WERD_RES *word) { INT16 i; if (!word_adaptable (word, tessedit_cluster_adaption_mode)) for (i = 0; word->best_choice->string ()[i] != '\0'; i++) { if (word->best_choice->string ()[i] == 'm' && suspect_em (word, i)) // reject all ems word->reject_map[i].setrej_mm_reject (); }}void reject_suspect_fullstops(WERD_RES *word) { INT16 i; for (i = 0; word->best_choice->string ()[i] != '\0'; i++) { if (word->best_choice->string ()[i] == '.' && suspect_fullstop (word, i)) // reject all commas word->reject_map[i].setrej_mm_reject (); }}BOOL8 suspect_em(WERD_RES *word, INT16 index) { PBLOB_LIST *blobs = word->outword->blob_list (); PBLOB_IT blob_it(blobs); INT16 j; for (j = 0; j < index; j++) blob_it.forward (); return (blob_it.data ()->out_list ()->length () != 1);}BOOL8 suspect_fullstop(WERD_RES *word, INT16 i) { float aspect_ratio; PBLOB_LIST *blobs = word->outword->blob_list (); PBLOB_IT blob_it(blobs); INT16 j; BOX box; INT16 width; INT16 height; for (j = 0; j < i; j++) blob_it.forward (); box = blob_it.data ()->bounding_box (); width = box.width (); height = box.height (); aspect_ratio = ((width > height) ? ((float) width) / height : ((float) height) / width); return (aspect_ratio > tessed_fullstop_aspect_ratio);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -