📄 docqual.cpp
字号:
if (longest_upper_run_len < upper_string_count) longest_upper_run_len = upper_string_count; if (last_char == *str) { alpha_repetition_count++; if (longest_alpha_repetition_count < alpha_repetition_count) { longest_alpha_repetition_count = alpha_repetition_count; } } else { last_char = *str; alpha_repetition_count = 1; } break; case FIRST_NUM: isolated_digits++; default: state = FIRST_UPPER; last_char = *str; alpha_repetition_count = 1; upper_string_count = 1; break; } } else if (islower (*str)) { total_alpha_count++; switch (state) { case SUBSEQUENT_LOWER: case FIRST_LOWER: state = SUBSEQUENT_LOWER; lower_string_count++; if (longest_lower_run_len < lower_string_count) longest_lower_run_len = lower_string_count; if (last_char == *str) { alpha_repetition_count++; if (longest_alpha_repetition_count < alpha_repetition_count) { longest_alpha_repetition_count = alpha_repetition_count; } } else { last_char = *str; alpha_repetition_count = 1; } break; case FIRST_NUM: isolated_digits++; default: state = FIRST_LOWER; last_char = *str; alpha_repetition_count = 1; lower_string_count = 1; break; } } else if (isdigit (*str)) { total_digit_count++; switch (state) { case FIRST_NUM: state = SUBSEQUENT_NUM; case SUBSEQUENT_NUM: break; case FIRST_UPPER: case FIRST_LOWER: isolated_alphas++; default: state = FIRST_NUM; break; } } else { if (*str == ' ') tess_rejs++; else bad_char_count++; switch (state) { case FIRST_NUM: isolated_digits++; break; case FIRST_UPPER: case FIRST_LOWER: isolated_alphas++; default: break; } state = JUNK; } } switch (state) { case FIRST_NUM: isolated_digits++; break; case FIRST_UPPER: case FIRST_LOWER: isolated_alphas++; default: break; } if (crunch_include_numerals) { total_alpha_count += total_digit_count - isolated_digits; } if (crunch_leave_ok_strings && (len >= 4) && (2 * (total_alpha_count - isolated_alphas) > len) && (longest_alpha_repetition_count < crunch_long_repetitions)) { if ((crunch_accept_ok && (acceptable_word_string (str) != AC_UNACCEPTABLE)) || (longest_lower_run_len > crunch_leave_lc_strings) || (longest_upper_run_len > crunch_leave_uc_strings)) return G_NEVER_CRUNCH; } if ((word->reject_map.length () > 1) && (strpbrk (str, " ") == NULL) && ((word->best_choice->permuter () == SYSTEM_DAWG_PERM) || (word->best_choice->permuter () == FREQ_DAWG_PERM) || (word->best_choice->permuter () == USER_DAWG_PERM) || (word->best_choice->permuter () == NUMBER_PERM) || (acceptable_word_string (str) != AC_UNACCEPTABLE) || ok_dict_word)) return G_OK; ok_chars = len - bad_char_count - isolated_digits - isolated_alphas - tess_rejs; if (crunch_debug > 3) { tprintf ("garbage_word: \"%s\"\n", word->best_choice->string ().string ()); tprintf ("LEN: %d bad: %d iso_N: %d iso_A: %d rej: %d\n", len, bad_char_count, isolated_digits, isolated_alphas, tess_rejs); } if ((bad_char_count == 0) && (tess_rejs == 0) && ((len > isolated_digits + isolated_alphas) || (len <= 2))) return G_OK; if ((tess_rejs > ok_chars) || ((tess_rejs > 0) && ((bad_char_count + tess_rejs) * 2 > len))) return G_TERRIBLE; if (len > 4) { dodgy_chars = 2 * tess_rejs + bad_char_count + isolated_digits + isolated_alphas; if ((dodgy_chars > 5) || ((dodgy_chars / (float) len) > 0.5)) return G_DODGY; else return G_OK; } else { dodgy_chars = 2 * tess_rejs + bad_char_count; if (((len == 4) && (dodgy_chars > 2)) || ((len == 3) && (dodgy_chars > 2)) || (dodgy_chars >= len)) return G_DODGY; else return G_OK; }}/************************************************************************* * word_deletable() * DELETE WERDS AT ENDS OF ROWS IF * Word is crunched && * ( string length = 0 OR * > 50% of chars are "|" (before merging) OR * certainty < -10 OR * rating /char > 60 OR * TOP of word is more than 0.5 xht BELOW baseline OR * BOTTOM of word is more than 0.5 xht ABOVE xht OR * length of word < 3xht OR * height of word < 0.7 xht OR * height of word > 3.0 xht OR * >75% of the outline BBs have longest dimension < 0.5xht *************************************************************************/CRUNCH_MODE word_deletable(WERD_RES *word, INT16 &delete_mode) { int word_len = word->reject_map.length (); float rating_per_ch; BOX box; //BB of word if (word->unlv_crunch_mode == CR_NONE) { delete_mode = 0; return CR_NONE; } if (word_len == 0) { delete_mode = 1; return CR_DELETE; } box = word->outword->bounding_box (); if (box.height () < crunch_del_min_ht * bln_x_height) { delete_mode = 4; return CR_DELETE; } if (noise_outlines (word->outword)) { delete_mode = 5; return CR_DELETE; } if ((failure_count (word) * 1.5) > word_len) { delete_mode = 2; return CR_LOOSE_SPACE; } if (word->best_choice->certainty () < crunch_del_cert) { delete_mode = 7; return CR_LOOSE_SPACE; } rating_per_ch = word->best_choice->rating () / word_len; if (rating_per_ch > crunch_del_rating) { delete_mode = 8; return CR_LOOSE_SPACE; } if (box.top () < bln_baseline_offset - crunch_del_low_word * bln_x_height) { delete_mode = 9; return CR_LOOSE_SPACE; } if (box.bottom () > bln_baseline_offset + crunch_del_high_word * bln_x_height) { delete_mode = 10; return CR_LOOSE_SPACE; } if (box.height () > crunch_del_max_ht * bln_x_height) { delete_mode = 11; return CR_LOOSE_SPACE; } if (box.width () < crunch_del_min_width * bln_x_height) { delete_mode = 3; return CR_LOOSE_SPACE; } delete_mode = 0; return CR_NONE;}INT16 failure_count(WERD_RES *word) { char *str = (char *) word->best_choice->string ().string (); int tess_rejs = 0; for (; *str != '\0'; str++) { if (*str == ' ') tess_rejs++; } return tess_rejs;}BOOL8 noise_outlines(WERD *word) { PBLOB_IT blob_it; OUTLINE_IT outline_it; BOX box; //BB of outline INT16 outline_count = 0; INT16 small_outline_count = 0; INT16 max_dimension; float small_limit = bln_x_height * crunch_small_outlines_size; blob_it.set_to_list (word->blob_list ()); for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { outline_it.set_to_list (blob_it.data ()->out_list ()); for (outline_it.mark_cycle_pt (); !outline_it.cycled_list (); outline_it.forward ()) { outline_count++; box = outline_it.data ()->bounding_box (); if (box.height () > box.width ()) max_dimension = box.height (); else max_dimension = box.width (); if (max_dimension < small_limit) small_outline_count++; } } return (small_outline_count >= outline_count);}/************************************************************************* * insert_rej_cblobs() * Put rejected word blobs back into the outword. * NOTE!!! AFTER THIS THE CHOICES LIST WILL NOT HAVE THE CORRECT NUMBER * OF ELEMENTS. *************************************************************************/void insert_rej_cblobs( //word to do WERD_RES *word) { PBLOB_IT blob_it; //blob iterator PBLOB_IT rej_blob_it; const STRING *wordstr; int old_len; int rej_len; char new_str[512]; REJMAP new_map; int i = 0; //new_str index int j = 0; //old_str index int new_len; gblob_sort_list (word->outword->rej_blob_list (), TRUE); rej_blob_it.set_to_list (word->outword->rej_blob_list ()); if (rej_blob_it.empty ()) return; rej_len = rej_blob_it.length (); blob_it.set_to_list (word->outword->blob_list ()); wordstr = &(word->best_choice->string ()); old_len = wordstr->length (); ASSERT_HOST (word->reject_map.length () == old_len); ASSERT_HOST (blob_it.length () == old_len); if ((old_len + rej_len) > 511) return; //Word is garbage anyway prevent abort new_map.initialise (old_len + rej_len); while (!rej_blob_it.empty ()) { if ((j >= old_len) || (rej_blob_it.data ()->bounding_box ().left () <= blob_it.data ()->bounding_box ().left ())) { /* Insert reject blob */ if (j >= old_len) blob_it.add_to_end (rej_blob_it.extract ()); else blob_it.add_before_stay_put (rej_blob_it.extract ()); if (!rej_blob_it.empty ()) rej_blob_it.forward (); new_str[i] = ' '; new_map[i].setrej_rej_cblob (); i++; } else { new_str[i] = (*wordstr)[j]; new_map[i] = word->reject_map[j]; i++; j++; blob_it.forward (); } } /* Add any extra normal blobs to strings */ while (j < wordstr->length ()) { new_str[i] = (*wordstr)[j]; new_map[i] = word->reject_map[j]; i++; j++; } new_str[i] = '\0'; /* tprintf( "\nOld len %d; New len %d; New str \"%s\"; New map \"%s\"\n", old_len, i, new_str, new_map ); */ ASSERT_HOST (i == blob_it.length ()); ASSERT_HOST (i == old_len + rej_len); word->reject_map = new_map; *((STRING *) wordstr) = new_str; new_len = strlen (word->best_choice->string ().string ()); ASSERT_HOST (word->reject_map.length () == new_len); ASSERT_HOST (word->outword->blob_list ()->length () == new_len);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -