📄 permute.cpp
字号:
} else { free_choice(result_1); result_1 = result_2; } } result_2 = number_permute_and_select (char_choices, rating_limit); if (class_probability (result_1) < class_probability (result_2) || class_string (result_2) == NULL) { free_choice(result_2); } else { free_choice(result_1); result_1 = result_2; } result_2 = permute_compound_words (char_choices, rating_limit); if (!result_2 || class_probability (result_1) < class_probability (result_2) || class_string (result_2) == NULL) { free_choice(result_2); } else { free_choice(result_1); result_1 = result_2; } return (result_1);}/********************************************************************** * permute_characters * * Permute these characters together according to each of the different * permuters that are enabled. **********************************************************************/void permute_characters(CHOICES_LIST char_choices, float limit, A_CHOICE *best_choice, A_CHOICE *raw_choice) { A_CHOICE *this_choice; permutation_count++; /* Global counter */ this_choice = permute_all (char_choices, limit, raw_choice); if (this_choice && class_probability (this_choice) < class_probability (best_choice)) { clone_choice(best_choice, this_choice); } free_choice(this_choice); if (display_ratings) cprintf ("permute_characters: %-15s %4.2f %4.2f\n", class_string (best_choice), class_probability (best_choice), class_certainty (best_choice));}/********************************************************************** * permute_compound_word * * Return the top choice for each character as the choice for the word. **********************************************************************/A_CHOICE *permute_compound_words(CHOICES_LIST character_choices, float rating_limit) { A_CHOICE *first_choice; A_CHOICE *best_choice = NULL; char word[MAX_WERD_LENGTH + 1]; float rating = 0; float certainty = 10000; char char_choice; int x; int first_index = 0; char *ptr; word[0] = '\0'; if (array_count (character_choices) > MAX_WERD_LENGTH) { return (new_choice (NULL, MAX_FLOAT32, -MAX_FLOAT32, -1, NO_PERM)); } array_loop(character_choices, x) { first_choice = (A_CHOICE *) first ((CHOICES) array_value (character_choices, x)); ptr = class_string (first_choice); char_choice = ptr != NULL ? *ptr : '\0'; if (x > first_index && (char_choice == '-' || char_choice == '/')) { if (compound_debug) cprintf ("Hyphenated word found\n"); permute_subword (character_choices, rating_limit, first_index, x - 1, word, &rating, &certainty); if (rating > rating_limit) break; first_index = x + 1; strcat (word, class_string (first_choice)); rating += class_probability (first_choice); certainty = min (class_certainty (first_choice), certainty); } } if (first_index > 0 && first_index < x && rating <= rating_limit) { permute_subword (character_choices, rating_limit, first_index, x - 1, word, &rating, &certainty); best_choice = new_choice (word, rating, certainty, -1, COMPOUND_PERM); } return (best_choice);}/********************************************************************** * permute_subword * * Permute a part of a compound word this subword is bounded by hyphens * and the start and end of the word. Call the standard word permute * function on a set of choices covering only part of the original * word. When it is done reclaim the memory that was used in the * excercise. **********************************************************************/void permute_subword(CHOICES_LIST character_choices, float rating_limit, int start, int end, char *word, float *rating, float *certainty) { int x; A_CHOICE *best_choice = NULL; A_CHOICE raw_choice; CHOICES_LIST subchoices; CHOICES choices; char this_char; char *ptr; DisableChoiceAccum(); raw_choice.string = NULL; raw_choice.rating = MAX_INT16; raw_choice.certainty = -MAX_INT16; subchoices = new_choice_list (); for (x = start; x <= end; x++) { choices = (CHOICES) array_value (character_choices, x); ptr = best_string (choices); this_char = ptr != NULL ? *ptr : '\0'; if (this_char != '-' && this_char != '/') { subchoices = array_push (subchoices, choices); } else { const char* str = best_string(choices); strcat (word, str); } } if (array_count (subchoices)) { if (compound_debug) dawg_debug = TRUE; best_choice = permute_all (subchoices, rating_limit, &raw_choice); if (compound_debug) dawg_debug = FALSE; if (best_choice && class_string (best_choice)) { strcat (word, class_string (best_choice)); *rating += class_probability (best_choice); *certainty = min (class_certainty (best_choice), *certainty); } else { *rating = MAX_FLOAT32; } } else { *rating = MAX_FLOAT32; } free_choice_list(subchoices); if (best_choice) free_choice(best_choice); if (compound_debug && *rating < MAX_FLOAT32) { cprintf ("Subword permuted = %s, %5.2f, %5.2f\n\n", word, *rating, *certainty); } if (raw_choice.string) strfree(raw_choice.string); EnableChoiceAccum();}/********************************************************************** * permute_top_choice * * Return the top choice for each character as the choice for the word. * In addition a choice is created for the best lower and upper case * non-words. In each character position the best lower (or upper) case * character is substituted for the best overall character. **********************************************************************/A_CHOICE *permute_top_choice(CHOICES_LIST character_choices, float rating_limit, A_CHOICE *raw_choice, BOOL8 *any_alpha) { CHOICES char_list; A_CHOICE *first_choice; A_CHOICE *best_choice; A_CHOICE *other_choice; char *ptr; char first_char; //first choice char second_char; //second choice char third_char; //third choice char prev_char = '\0'; //prev in word char next_char = '\0'; //next in word char next_next_char = '\0'; //after next next in word char word[MAX_PERM_LENGTH + 1]; char capital_word[MAX_PERM_LENGTH + 1]; char lower_word[MAX_PERM_LENGTH + 1]; int x; BOOL8 char_alpha; float rating = 0; float upper_rating = 0; float lower_rating = 0; float first_rating = 0; float certainty = 10000; float upper_certainty = 10000; float lower_certainty = 10000; float certainties[MAX_PERM_LENGTH + 1]; float lower_certainties[MAX_PERM_LENGTH + 1]; float upper_certainties[MAX_PERM_LENGTH + 1]; register CHOICES this_char; register char ch; register INT8 lower_done; register INT8 upper_done; if (any_alpha != NULL) *any_alpha = FALSE; if (array_count (character_choices) > MAX_PERM_LENGTH) { return (NULL); } array_loop(character_choices, x) { if (x + 1 < array_count (character_choices)) { char_list = (CHOICES) array_value (character_choices, x + 1); first_choice = (A_CHOICE *) first (char_list); ptr = class_string (first_choice); next_char = (ptr != NULL && *ptr != '\0') ? *ptr : ' '; } else next_char = '\0'; if (x + 2 < array_count (character_choices)) { char_list = (CHOICES) array_value (character_choices, x + 2); first_choice = (A_CHOICE *) first (char_list); ptr = class_string (first_choice); next_next_char = (ptr != NULL && *ptr != '\0') ? *ptr : ' '; } else next_next_char = '\0'; char_list = (CHOICES) array_value (character_choices, x); first_choice = (A_CHOICE *) first (char_list); ptr = class_string (first_choice); word[x] = (ptr != NULL && *ptr != '\0') ? *ptr : ' '; lower_word[x] = word[x]; capital_word[x] = word[x]; first_char = word[x]; first_rating = class_probability (first_choice); upper_rating += class_probability (first_choice); lower_rating += class_probability (first_choice); lower_certainty = min (class_certainty (first_choice), lower_certainty); upper_certainty = min (class_certainty (first_choice), upper_certainty); certainties[x] = class_certainty (first_choice); lower_certainties[x] = class_certainty (first_choice); upper_certainties[x] = class_certainty (first_choice); lower_done = FALSE; upper_done = FALSE; char_alpha = FALSE; second_char = '\0'; third_char = '\0'; iterate_list(this_char, char_list) { ptr = best_string (this_char); ch = ptr != NULL ? *ptr : '\0'; if (ch == 'l' && rest (this_char) != NULL && best_probability (rest (this_char)) == first_rating) { ptr = best_string (rest (this_char)); if (ptr != NULL && (*ptr == '1' || *ptr == 'I')) { second_char = *ptr; this_char = rest (this_char); if (rest (this_char) != NULL && best_probability (rest (this_char)) == first_rating) { ptr = best_string (rest (this_char)); if (ptr != NULL && (*ptr == '1' || *ptr == 'I')) { third_char = *ptr; this_char = rest (this_char); } } ch = choose_il1 (first_char, second_char, third_char, prev_char, next_char, next_next_char); if (ch != 'l' && word[x] == 'l') { word[x] = ch; lower_word[x] = ch; capital_word[x] = ch; } } } /* Find lower case */ if (!lower_done && (islower (ch) || (isupper (ch) && x == 0))) { lower_word[x] = ch; lower_rating += best_probability (this_char); lower_rating -= class_probability (first_choice); lower_certainty = min (best_certainty (this_char), lower_certainty); lower_certainties[x] = best_certainty (this_char); lower_done = TRUE; } /* Find upper case */ if (!upper_done && isupper (ch)) { capital_word[x] = ch; upper_rating += best_probability (this_char); upper_rating -= class_probability (first_choice); upper_certainty = min (best_certainty (this_char), upper_certainty); upper_certainties[x] = best_certainty (this_char); upper_done = TRUE; } if (!char_alpha && isalpha (ch)) char_alpha = TRUE; if (lower_done && upper_done) break; } if (char_alpha && any_alpha != NULL) *any_alpha = TRUE; if (first_choice == NULL) { cprintf ("Permuter giving up due to null choices list"); word[x + 1] = '$'; word[x + 2] = '\0'; cprintf (" word=%s\n", word); return (NULL); } rating += class_probability (first_choice); if (rating > rating_limit) return (NULL); certainty = min (class_certainty (first_choice), certainty); prev_char = word[x]; } lower_word[x] = '\0'; capital_word[x] = '\0'; word[x] = '\0'; if (rating < class_probability (raw_choice)) { if (class_string (raw_choice)) strfree (class_string (raw_choice)); class_probability (raw_choice) = rating; class_certainty (raw_choice) = certainty; class_string (raw_choice) = strsave (word); class_permuter (raw_choice) = TOP_CHOICE_PERM; LogNewRawChoice (raw_choice, 1.0, certainties); } best_choice = new_choice (word, rating, certainty, -1, TOP_CHOICE_PERM); adjust_non_word(best_choice, certainties); other_choice = new_choice (lower_word, lower_rating, lower_certainty, -1, LOWER_CASE_PERM); adjust_non_word(other_choice, lower_certainties); if (class_probability (best_choice) > class_probability (other_choice)) { clone_choice(best_choice, other_choice); } free_choice(other_choice); other_choice = new_choice (capital_word, upper_rating, upper_certainty, -1, UPPER_CASE_PERM); adjust_non_word(other_choice, upper_certainties); if (class_probability (best_choice) > class_probability (other_choice)) { clone_choice(best_choice, other_choice); } free_choice(other_choice); return (best_choice);}/********************************************************************** * choose_il1 * * Choose between the candidate il1 chars. **********************************************************************/char choose_il1(char first_char, //first choice char second_char, //second choice char third_char, //third choice char prev_char, //prev in word char next_char, //next in word char next_next_char) { //after next next in word INT32 type1; //1/I/l type of first choice INT32 type2; //1/I/l type of second choice INT32 type3; //1/I/l type of third choice if (first_char == 'l' && second_char != '\0') { if (second_char == 'I' && (isupper (prev_char) && !islower (next_char) && !isdigit (next_char) || isupper (next_char) && !islower (prev_char) && !isdigit (prev_char))) first_char = second_char; //override else if (second_char == '1' || third_char == '1') { if (isdigit (next_char) || isdigit (prev_char) || next_char == 'l' && isdigit (next_next_char)) { first_char = '1'; } else if (!islower (prev_char) && (!islower (next_char) || next_char == 's' && next_next_char == 't')) { if ((prev_char != '\'' && prev_char != '`' || next_char != '\0') && (next_char != '\'' && next_char != '`' || prev_char != '\0')) { first_char = '1'; } } } if (first_char == 'l' && next_char != '\0' && !isalpha (prev_char)) { type1 = 2; if (second_char == '1') type2 = 0; else if (second_char == 'I') type2 = 1; else if (second_char == 'l') type2 = 2; else type2 = type1; if (third_char == '1') type3 = 0; else if (third_char == 'I') type3 = 1; else if (third_char == 'l') type3 = 2; else type3 = type1; if (bigram_counts[next_char][type2] > bigram_counts[next_char][type1]) { first_char = second_char; type1 = type2; } if (bigram_counts[next_char][type3] > bigram_counts[next_char][type1]) { first_char = third_char; } } } return first_char;}/********************************************************************** * permute_words * * Permute all the characters together using the dawg to prune all * but the valid words. **********************************************************************/A_CHOICE *permute_words(CHOICES_LIST char_choices, float rating_limit) { A_CHOICE *best_choice; int hyphen_len; best_choice = new_choice (NULL, rating_limit, -MAX_FLOAT32, -1, NO_PERM); hyphen_len = hyphen_string != NULL ? strlen (hyphen_string) : 0; if (hyphen_len + array_count (char_choices) > MAX_WERD_LENGTH) { class_probability (best_choice) = MAX_FLOAT32; } else { dawg_permute_and_select ("system words:", word_dawg, SYSTEM_DAWG_PERM, char_choices, best_choice, TRUE); dawg_permute_and_select ("document_words", document_words, DOC_DAWG_PERM, char_choices, best_choice, FALSE); dawg_permute_and_select ("user words", user_words, USER_DAWG_PERM, char_choices, best_choice, FALSE); case_sensative = FALSE; } return (best_choice);}/********************************************************************** * valid_word * * Check all the DAWGs to see if this word is in any of them. **********************************************************************/int valid_word(const char *string) { int result = NO_PERM; if (word_in_dawg (word_dawg, string)) result = SYSTEM_DAWG_PERM; else { if (word_in_dawg (document_words, string)) result = DOC_DAWG_PERM; else if (word_in_dawg (user_words, string)) result = USER_DAWG_PERM; case_sensative = FALSE; } return (result);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -