📄 fixspace.cpp
字号:
for (i = 0, prev_char_punct = FALSE; i < word_len; i++) { current_char_punct = punct_chars.contains (word->best_choice->string ()[i]); if (prev_char_punct || (current_char_punct && (i > 0))) total_score++; prev_char_punct = current_char_punct; } } prev_char_digit = digit_or_numeric_punct (word, word->best_choice-> string ()[word_len - 1]); prev_char_1 = ((word_done && (word->best_choice->string ()[word_len - 1] == '1')) || (!word_done && STRING (conflict_set_I_l_1).contains (word->best_choice-> string ()[word_len - 1]))); } /* Find next word */ do word_res_it.forward (); while (word_res_it.data ()->part_of_combo); } while (!word_res_it.at_first ()); total_score += prev_word_score; if (prev_word_done) done_word_count++; if (done_word_count == word_count) return PERFECT_WERDS; else return total_score;}BOOL8 digit_or_numeric_punct(WERD_RES *word, char ch) { return (isdigit (ch) || (fixsp_numeric_fix && (word->best_choice->permuter () == NUMBER_PERM) && STRING (numeric_punctuation).contains (ch)));}/************************************************************************* * transform_to_next_perm() * Examines the current word list to find the smallest word gap size. Then walks * the word list closing any gaps of this size by either inserted new * combination words, or extending existing ones. * * The routine COULD be limited to stop it building words longer than N blobs. * * If there are no more gaps then it DELETES the entire list and returns the * empty list to cause termination. *************************************************************************/void transform_to_next_perm(WERD_RES_LIST &words) { WERD_RES_IT word_it(&words); WERD_RES_IT prev_word_it(&words); WERD_RES *word; WERD_RES *prev_word; WERD_RES *combo; WERD *copy_word; INT16 prev_right = -1; BOX box; INT16 gap; INT16 min_gap = MAX_INT16; for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { word = word_it.data (); if (!word->part_of_combo) { box = word->word->bounding_box (); if (prev_right >= 0) { gap = box.left () - prev_right; if (gap < min_gap) min_gap = gap; } prev_right = box.right (); } } if (min_gap < MAX_INT16) { prev_right = -1; //back to start word_it.set_to_list (&words); for (; //cant use cycle pt due to inserted combos at start of list (prev_right < 0) || !word_it.at_first (); word_it.forward ()) { word = word_it.data (); if (!word->part_of_combo) { box = word->word->bounding_box (); if (prev_right >= 0) { gap = box.left () - prev_right; if (gap <= min_gap) { prev_word = prev_word_it.data (); if (prev_word->combination) combo = prev_word; else { /* Make a new combination and insert before the first word being joined */ copy_word = new WERD; *copy_word = *(prev_word->word); //deep copy combo = new WERD_RES (copy_word); combo->combination = TRUE; prev_word->part_of_combo = TRUE; prev_word_it.add_before_then_move (combo); } combo->word->set_flag (W_EOL, word->word->flag (W_EOL)); if (word->combination) { combo->word->join_on (word->word); //Move blbs to combo //old combo no longer needed delete word_it.extract (); } else { //Cpy current wd to combo combo->copy_on (word); word->part_of_combo = TRUE; } combo->done = FALSE; if (combo->outword != NULL) { delete combo->outword; delete combo->best_choice; delete combo->raw_choice; combo->outword = NULL; combo->best_choice = NULL; combo->raw_choice = NULL; } } else //catch up prev_word_it = word_it; } prev_right = box.right (); } } } else words.clear (); //signal termination}void dump_words(WERD_RES_LIST &perm, INT16 score, INT16 mode, BOOL8 improved) { WERD_RES_IT word_res_it(&perm); static STRING initial_str; if (debug_fix_space_level > 0) { if (mode == 1) { initial_str = ""; for (word_res_it.mark_cycle_pt (); !word_res_it.cycled_list (); word_res_it.forward ()) { if (!word_res_it.data ()->part_of_combo) { initial_str += word_res_it.data ()->best_choice->string (); initial_str += ' '; } } } #ifndef SECURE_NAMES if (debug_fix_space_level > 1) { switch (mode) { case 1: tprintf ("EXTRACTED (%d): \"", score); break; case 2: tprintf ("TESTED (%d): \"", score); break; case 3: tprintf ("RETURNED (%d): \"", score); break; } for (word_res_it.mark_cycle_pt (); !word_res_it.cycled_list (); word_res_it.forward ()) { if (!word_res_it.data ()->part_of_combo) tprintf ("%s/%1d ", word_res_it.data ()->best_choice->string (). string (), (int) word_res_it.data ()->best_choice->permuter ()); } tprintf ("\"\n"); } else if (improved) { tprintf ("FIX SPACING \"%s\" => \"", initial_str.string ()); for (word_res_it.mark_cycle_pt (); !word_res_it.cycled_list (); word_res_it.forward ()) { if (!word_res_it.data ()->part_of_combo) tprintf ("%s/%1d ", word_res_it.data ()->best_choice->string (). string (), (int) word_res_it.data ()->best_choice->permuter ()); } tprintf ("\"\n"); } #endif }}/************************************************************************* * uniformly_spaced() * Return true if one of the following are true: * - All inter-char gaps are the same width * - The largest gap is no larger than twice the mean/median of the others * - The largest gap is < 64/5 = 13 and all others are <= 0 * **** REMEMBER - WE'RE NOW WORKING WITH A BLN WERD !!! *************************************************************************/BOOL8 uniformly_spaced( //sensible word WERD_RES *word) { PBLOB_IT blob_it; BOX box; INT16 prev_right = -MAX_INT16; INT16 gap; INT16 max_gap = -MAX_INT16; INT16 max_gap_count = 0; STATS gap_stats (0, MAXSPACING); BOOL8 result; const ROW *row = word->denorm.row (); float max_non_space; float normalised_max_nonspace; INT16 i = 0; STRING punct_chars = "\"`',.:;"; blob_it.set_to_list (word->outword->blob_list ()); for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { box = blob_it.data ()->bounding_box (); if ((prev_right > -MAX_INT16) && (!fixsp_ignore_punct || (!punct_chars.contains (word->best_choice->string ()[i - 1]) && !punct_chars.contains (word->best_choice->string ()[i])))) { gap = box.left () - prev_right; if (gap < max_gap) gap_stats.add (gap, 1); else if (gap == max_gap) max_gap_count++; else { if (max_gap_count > 0) gap_stats.add (max_gap, max_gap_count); max_gap = gap; max_gap_count = 1; } } prev_right = box.right (); i++; } max_non_space = (row->space () + 3 * row->kern ()) / 4; normalised_max_nonspace = max_non_space * bln_x_height / row->x_height (); result = ((gap_stats.get_total () == 0) || (max_gap <= normalised_max_nonspace) || ((gap_stats.get_total () > 2) && (max_gap <= 2 * gap_stats.median ())) || ((gap_stats.get_total () <= 2) && (max_gap <= 2 * gap_stats.mean ()))); #ifndef SECURE_NAMES if ((debug_fix_space_level > 1)) { if (result) tprintf ("ACCEPT SPACING FOR: \"%s\" norm_maxnon = %f max=%d maxcount=%d total=%d mean=%f median=%f\n", word->best_choice->string ().string (), normalised_max_nonspace, max_gap, max_gap_count, gap_stats.get_total (), gap_stats.mean (), gap_stats.median ()); else tprintf ("REJECT SPACING FOR: \"%s\" norm_maxnon = %f max=%d maxcount=%d total=%d mean=%f median=%f\n", word->best_choice->string ().string (), normalised_max_nonspace, max_gap, max_gap_count, gap_stats.get_total (), gap_stats.mean (), gap_stats.median ()); } #endif return result;}BOOL8 fixspace_thinks_word_done(WERD_RES *word) { if (word->done) return TRUE; /* Use all the standard pass 2 conditions for mode 5 in set_done() in reject.c BUT DONT REJECT IF THE WERD IS AMBIGUOUS - FOR SPACING WE DONT CARE WHETHER WE HAVE of/at on/an etc. */ if ((fixsp_done_mode > 0) && (word->tess_accepted || ((fixsp_done_mode == 2) && (word->reject_map.reject_count () == 0)) || (fixsp_done_mode == 3)) && (strchr (word->best_choice->string ().string (), ' ') == NULL) && ((word->best_choice->permuter () == SYSTEM_DAWG_PERM) || (word->best_choice->permuter () == FREQ_DAWG_PERM) || (word->best_choice->permuter () == USER_DAWG_PERM) || (word->best_choice->permuter () == NUMBER_PERM))) return TRUE; else return FALSE;}/************************************************************************* * fix_sp_fp_word() * Test the current word to see if it can be split by deleting noise blobs. If * so, do the buisiness. * Return with the iterator pointing to the same place if the word is unchanged, * or the last of the replacement words. *************************************************************************/void fix_sp_fp_word(WERD_RES_IT &word_res_it, ROW *row) { WERD_RES *word_res; WERD_RES_LIST sub_word_list; WERD_RES_IT sub_word_list_it(&sub_word_list); INT16 blob_index; INT16 new_length; float junk; word_res = word_res_it.data (); if (!fixsp_check_for_fp_noise_space || word_res->word->flag (W_REP_CHAR) || word_res->combination || word_res->part_of_combo || !word_res->word->flag (W_DONT_CHOP)) return; blob_index = worst_noise_blob (word_res, &junk); if (blob_index < 0) return; #ifndef SECURE_NAMES if (debug_fix_space_level > 1) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -