📄 fixxht.cpp
字号:
est_x_ht = x_ht_fraction_of_caps_ht * max_blob_ht; est_caps_ht = max_blob_ht; #ifndef SECURE_NAMES if (debug_x_ht_level >= 20) tprintf ("Mode20:M: Trial XHT:%f CAP:%f\n", est_x_ht, est_caps_ht); #endif } else { no_comment = TRUE; if (debug_x_ht_level >= 20) tprintf ("Mode20:N: No comment as nothing else matched\n"); } } } /* Sanity check - reject word if fails */ if (!no_comment && ((est_x_ht > 2 * bln_x_height) || (est_x_ht / word_res->denorm.scale () <= min_sane_x_ht_pixels) || (est_caps_ht <= est_x_ht) || (est_caps_ht >= 2.5 * est_x_ht))) { no_comment = TRUE; if (!trial && rej_use_xht) { if (debug_x_ht_level >= 2) { tprintf ("Sanity check rejecting %s ", word_str); word_res->reject_map.print (debug_fp); tprintf ("\n"); } word_res->reject_map.rej_word_xht_fixup (); } if (debug_x_ht_level >= 20) tprintf ("Mode20:O: No comment as nothing else matched\n"); } if (no_comment || trial) { word_res->x_height = bln_x_height / word_res->denorm.scale (); word_res->guessed_x_ht = TRUE; word_res->caps_height = (bln_x_height / x_ht_fraction_of_caps_ht) / word_res->denorm.scale (); word_res->guessed_caps_ht = TRUE; /* Reject ambigs in the current word if we are uncertain and: there are rejects OR there is only one char which is an ambig OR there is conflict between the case of the ambigs even though there is no height separation Eg "Ms" recognised from "MS" */ if (rej_trial_ambigs && ((word_res->reject_map.reject_count () > 0) || (word_res->reject_map.length () == 1) || ((x_ht_ambigs > 0) && (caps_ht_ambigs > 0)))) { #ifndef SECURE_NAMES if (debug_x_ht_level >= 2) { tprintf ("TRIAL Rej Ambigs %s ", word_str); word_res->reject_map.print (debug_fp); } #endif reject_ambigs(word_res); if (debug_x_ht_level >= 2) { tprintf (" "); word_res->reject_map.print (debug_fp); tprintf ("\n"); } } } else { word_res->x_height = est_x_ht / word_res->denorm.scale (); word_res->guessed_x_ht = !est_x_ht_certain; word_res->caps_height = est_caps_ht / word_res->denorm.scale (); word_res->guessed_caps_ht = !est_caps_ht_certain; } if (!no_comment && (fabs (est_x_ht - bln_x_height) > x_ht_ok_variation)) *trial_x_ht = est_x_ht / word_res->denorm.scale (); else *trial_x_ht = 0.0; #ifndef SECURE_NAMES if (((*trial_x_ht > 0) && (debug_x_ht_level >= 3)) || (debug_x_ht_level >= 5)) { tprintf ("%s ", word_str); word_res->reject_map.print (debug_fp); tprintf (" X:%0.2f Cps:%0.2f Mxht:%0.2f RJ MxHt:%d MxAr:%d Rematch:%c\n", est_x_ht, est_caps_ht, max_blob_ht, rej_blobs_max_height, rej_blobs_max_area, *trial_x_ht > 0 ? '*' : ' '); } #endif}/************************************************************************* * check_block_occ() * Checks word for coarse block occupancy, rejecting more chars and flipping * case of case ambiguous chars as required. *************************************************************************/void check_block_occ(WERD_RES *word_res) { PBLOB_IT blob_it; STRING new_string; REJMAP new_map = word_res->reject_map; WERD_CHOICE *new_choice; const char *word_str = word_res->best_choice->string ().string (); INT16 i; INT16 reject_count = 0; char confirmed_char; float x_ht; float caps_ht; if (word_res->x_height > 0) x_ht = word_res->x_height * word_res->denorm.scale (); else x_ht = bln_x_height; if (word_res->caps_height > 0) caps_ht = word_res->caps_height * word_res->denorm.scale (); else caps_ht = x_ht / x_ht_fraction_of_caps_ht; blob_it.set_to_list (word_res->outword->blob_list ()); for (blob_it.mark_cycle_pt (), i = 0; !blob_it.cycled_list (); blob_it.forward (), i++) { new_string += word_str[i]; //default copy if (word_res->reject_map[i].accepted ()) { confirmed_char = check_blob_occ (word_str[i], blob_it.data ()->bounding_box (). top () - bln_baseline_offset, x_ht, caps_ht); if (confirmed_char == '\0') { if (rej_use_check_block_occ) { new_map[i].setrej_xht_fixup (); reject_count++; } } else new_string[i] = confirmed_char; } } if ((reject_count > 0) || (new_string != word_str)) { if (debug_x_ht_level >= 2) { tprintf ("Shape Verification: %s ", word_str); word_res->reject_map.print (debug_fp); tprintf (" -> %s ", new_string.string ()); new_map.print (debug_fp); tprintf ("\n"); } new_choice = new WERD_CHOICE (new_string.string (), word_res->best_choice->rating (), word_res->best_choice->certainty (), word_res->best_choice->permuter ()); delete word_res->best_choice; word_res->best_choice = new_choice; word_res->reject_map = new_map; }}/************************************************************************* * check_blob_occ() * * Checks blob for position relative to position above baseline * Returns 0 for reject, or (possibly case shifted) confirmed char *************************************************************************/char check_blob_occ(char proposed_char, INT16 blob_ht_above_baseline, float x_ht, float caps_ht) { BOOL8 blob_definite_x_ht; BOOL8 blob_definite_caps_ht; float acceptable_variation; acceptable_variation = (caps_ht - x_ht) * x_ht_variation; /* ??? REJECT if expected descender and nothing significantly below BL */ /* ??? REJECT if expected ascender and nothing significantly above x-ht */ /* IF AMBIG_CAPS_X_CHS IF blob is definitely an ascender ( > xht + xht err )AND char is an x-ht char THEN flip case IF blob is defintiely an x-ht ( <= xht + xht err ) AND char is an ascender char THEN flip case */ blob_definite_x_ht = blob_ht_above_baseline <= x_ht + acceptable_variation; blob_definite_caps_ht = blob_ht_above_baseline >= caps_ht - acceptable_variation; if (STRING (chs_ambig_caps_x).contains (proposed_char)) { if ((!blob_definite_x_ht && !blob_definite_caps_ht) || (proposed_char == '0' && !blob_definite_caps_ht) || (proposed_char == 'o' && !blob_definite_x_ht)) return '\0'; else if (blob_definite_caps_ht && STRING (chs_x_ht).contains (proposed_char)) { if (x_ht_case_flip) //flip to upper case return (char) toupper (proposed_char); else return '\0'; } else if (blob_definite_x_ht && !STRING (chs_x_ht).contains (proposed_char)) { if (x_ht_case_flip) //flip to lower case return (char) tolower (proposed_char); else return '\0'; } } else if ((STRING (chs_non_ambig_x_ht).contains (proposed_char) && !blob_definite_x_ht) || (STRING (chs_non_ambig_caps_ht).contains (proposed_char) && !blob_definite_caps_ht)) return '\0'; return proposed_char;}float estimate_from_stats(STATS &stats) { if (stats.get_total () <= 0) return 0.0; else if (stats.get_total () >= 3) return stats.ile (0.5); //median else return stats.mean ();}void improve_estimate(WERD_RES *word_res, float &est_x_ht, float &est_caps_ht, STATS &x_ht, STATS &caps_ht) { PBLOB_IT blob_it; INT16 blob_ht_above_baseline; const char *word_str; INT16 i; BOX blob_box; //blob bounding box char confirmed_char; float new_val; /* IMPROVE estimates here - if good estimates, and case ambig chars, rescan blobs to fix case ambig blobs, re-estimate hts ??? maybe always do it after deciding x-height */ blob_it.set_to_list (word_res->outword->blob_list ()); word_str = word_res->best_choice->string ().string (); for (blob_it.mark_cycle_pt (), i = 0; !blob_it.cycled_list (); blob_it.forward (), i++) { if ((STRING (chs_ambig_caps_x).contains (word_str[i])) && (!dodgy_blob (blob_it.data ()))) { blob_box = blob_it.data ()->bounding_box (); blob_ht_above_baseline = blob_box.top () - bln_baseline_offset; confirmed_char = check_blob_occ (word_str[i], blob_ht_above_baseline, est_x_ht, est_caps_ht); if (confirmed_char != '\0') if (STRING (chs_x_ht).contains (confirmed_char)) x_ht.add (blob_ht_above_baseline, 1); else caps_ht.add (blob_ht_above_baseline, 1); } } new_val = estimate_from_stats (x_ht); if (new_val > 0) est_x_ht = new_val; new_val = estimate_from_stats (caps_ht); if (new_val > 0) est_caps_ht = new_val;}void reject_ambigs( //rej any accepted xht ambig chars WERD_RES *word) { const char *word_str; int i = 0; word_str = word->best_choice->string ().string (); while (*word_str != '\0') { if (STRING (chs_ambig_caps_x).contains (*word_str)) word->reject_map[i].setrej_xht_fixup (); word_str++; i++; }}void est_ambigs( //xht ambig ht stats WERD_RES *word_res, STATS &stats, float *ambig_lc_x_est, //xht est float *ambig_uc_caps_est //caps est ) { float x_ht_ok_variation; STATS short_ambigs (0, 300); STATS tall_ambigs (0, 300); PBLOB_IT blob_it; BOX blob_box; //blob bounding box INT16 blob_ht_above_baseline; const char *word_str; INT16 i; float min; //min ambig ch ht float max; //max ambig ch ht float short_limit; // for lower case float tall_limit; // for upper case x_ht_ok_variation = (bln_x_height / x_ht_fraction_of_caps_ht - bln_x_height) * x_ht_variation; if (stats.get_total () == 0) { *ambig_lc_x_est = 0; *ambig_uc_caps_est = 0; } else { min = stats.ile (0.0); max = stats.ile (0.99999); if ((max - min) < x_ht_ok_variation) { *ambig_lc_x_est = *ambig_uc_caps_est = stats.mean (); //close enough } else { /* Try reclustering into lower and upper case chars */ short_limit = min + (max - min) * x_ht_variation; tall_limit = max - (max - min) * x_ht_variation; word_str = word_res->best_choice->string ().string (); blob_it.set_to_list (word_res->outword->blob_list ()); for (blob_it.mark_cycle_pt (), i = 0; !blob_it.cycled_list (); blob_it.forward (), i++) { if (word_res->reject_map[i].accepted () && STRING (chs_ambig_caps_x).contains (word_str[i]) && (!dodgy_blob (blob_it.data ()))) { blob_box = blob_it.data ()->bounding_box (); blob_ht_above_baseline = blob_box.top () - bln_baseline_offset; if (blob_ht_above_baseline <= short_limit) short_ambigs.add (blob_ht_above_baseline, 1); else if (blob_ht_above_baseline >= tall_limit) tall_ambigs.add (blob_ht_above_baseline, 1); } } *ambig_lc_x_est = short_ambigs.mean (); *ambig_uc_caps_est = tall_ambigs.mean (); /* Cop out if we havent got sensible clusters. */ if (*ambig_uc_caps_est - *ambig_lc_x_est <= x_ht_ok_variation) *ambig_lc_x_est = *ambig_uc_caps_est = stats.mean (); //close enough } }}/************************************************************************* * dodgy_blob() * Returns true if the blob has more than one outline, one above the other. * These are dodgy as the top blob could be noise, causing the bounding box xht * to be misleading *************************************************************************/BOOL8 dodgy_blob(PBLOB *blob) { OUTLINE_IT outline_it = blob->out_list (); INT16 highest_bottom = -MAX_INT16; INT16 lowest_top = MAX_INT16; BOX outline_box; if (x_ht_include_dodgy_blobs) return FALSE; //no blob is ever dodgy for (outline_it.mark_cycle_pt (); !outline_it.cycled_list (); outline_it.forward ()) { outline_box = outline_it.data ()->bounding_box (); if (lowest_top > outline_box.top ()) lowest_top = outline_box.top (); if (highest_bottom < outline_box.bottom ()) highest_bottom = outline_box.bottom (); } return highest_bottom >= lowest_top;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -