permnum.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 523 行 · 第 1/2 页
CPP
523 行
free_choice (first_node (*result)); pop_off(*result); } else { *limit = best_probability (*result); break; } } } } else { JOIN_ON (*result, number_permute (state, choices, char_index + 1, limit, word, unichar_lengths, unichar_offsets, rating, certainty, certainty_array)); } } } else { if (number_debug) cprintf ("pruned word (%s, rating=%4.2f, limit=%4.2f)\n", word, rating, *limit); }}/********************************************************************** * init_permute * * Initialize anything that needs to be set up for the permute * functions. **********************************************************************/void init_permnum() { make_good_number(); make_ok_number(); make_number_debug(); make_number_depth();}/********************************************************************** * number_character_type * * Decide which type of a character (with regard to the numeric state * table) we are looking at. **********************************************************************/int number_character_type( //current state const char* ch, int length, int state) { if (unicharset.get_isalpha (ch, length)) {#if 0 if (state < 4 && strchr (allowed_char_strs[0], lower_char) != NULL) return 5; else if (state == 4 && strchr (allowed_char_strs[1], lower_char) != NULL) return 6; else if (state == 5 && strchr (allowed_char_strs[2], lower_char) != NULL) return 7;#endif return 3; } else if (unicharset.get_isdigit (ch, length)) return (1); else if (length == 1 && isoperator (*ch)) return (2); else if (length == 1 && istrailing (*ch)) return (4); else if (length == 1 && isleading (*ch)) return (0); else return (-1);}/********************************************************************** * number_state_change * * Execute a state transition according to the state table and * additional rules. **********************************************************************/int number_state_change(int state, //current state const char *word, //current char const char *lengths) { //length of current char int char_type; //type of char int new_state; //state to return int old_state = state >> kStateShift; int repeats = state & kRepeatMask;#if 0 int index; char copy_word[4]; //tolowered chars#endif char_type = number_character_type (word, *lengths, old_state); if (char_type == -1) return -1; new_state = number_state_table[old_state][char_type]; if (new_state == old_state) { ++repeats; if (repeats >= kMaxRepeats[old_state]) return -1; } else { repeats = 0; } if (new_state >= 0) return (new_state << kStateShift) | repeats; if (new_state == -99) return -1; //now check to see if the last state-3 chars in the word //make an allowable word. For now only 3 letter words //are allowed if (old_state != 6) return -1; //only 3 letters now#if 0 copy_word[0] = tolower (word[-3]); copy_word[1] = tolower (word[-2]); copy_word[2] = tolower (word[-1]); copy_word[3] = '\0'; for (index = 0; allowed_alpha_strs[index] != NULL; index++) { if (strcmp (copy_word, allowed_alpha_strs[index]) == 0) return (-new_state) << kStateShift; }#endif return -1; //not a good word}/********************************************************************** * number_permute * * Permute all the valid string that match the 'grammar' of numbers. * The valid syntax for numbers is encoded in a state table. The * permuter uses this state table to enumerate all the string that * can be produced using the input choices. **********************************************************************/CHOICES number_permute(int state, CHOICES_LIST choices, int char_index, float *limit, char *word, char unichar_lengths[], int unichar_offsets[], float rating, float certainty, float *certainty_array) { CHOICES result = NIL; CHOICES c; int depth = 0; if (number_debug) { cprintf ("number_permute (state=%d, char_index=%d, limit=%4.2f, ", state, char_index, *limit); cprintf ("word=%s, rating=%4.2f, certainty=%4.2f)\n", word, rating, certainty); } if (char_index < array_count (choices)) { iterate_list (c, (CHOICES) array_index (choices, char_index)) { if (depth++ < number_depth) append_number_choices (state, word, unichar_lengths, unichar_offsets, choices, char_index, (A_CHOICE *) first_node (c), limit, rating, certainty, certainty_array, &result); } } if (result && number_debug == 1) print_choices ("number_permute:", result); return (result);}/********************************************************************** * number_permute_and_select * * Permute all the possible valid numbers and adjust their ratings. * Save the best rating. **********************************************************************/A_CHOICE *number_permute_and_select(CHOICES_LIST char_choices, float rating_limit) { CHOICES result = NIL; char word[UNICHAR_LEN * MAX_WERD_LENGTH + 1]; char unichar_lengths[MAX_WERD_LENGTH + 1]; int unichar_offsets[MAX_WERD_LENGTH + 1]; float certainty_array[MAX_WERD_LENGTH + 1]; float rating = rating_limit; A_CHOICE *best_choice; best_choice = new_choice (NULL, NULL, MAXFLOAT, -MAXFLOAT, -1, NO_PERM); if (array_count (char_choices) <= MAX_WERD_LENGTH) { word[0] = '\0'; unichar_lengths[0] = 0; unichar_offsets[0] = 0; result = number_permute (0, char_choices, 0, &rating, word, unichar_lengths, unichar_offsets, 0.0, 0.0, certainty_array); if (display_ratings && result) print_choices ("number_permuter", result); while (result != NIL) { if (best_probability (result) < class_probability (best_choice)) { clone_choice (best_choice, first_node (result)); } free_choice (first_node (result)); pop_off(result); } } return (best_choice);}/********************************************************************** * pure_number * * Check to see if this string is a pure number (one that does not end * with alphabetic characters). **********************************************************************/int pure_number(const char *string, const char *lengths) { int x; int offset; x = strlen (lengths) - 1; offset = strlen (string) - lengths[x]; for (;x >= 0; offset -= lengths[--x]) { if (unicharset.get_isdigit (string + offset, lengths[x])) { return (TRUE); } else if (unicharset.get_isalpha (string + offset, lengths[x])) return (FALSE); } return (FALSE);}/********************************************************************** * valid_number * * Check this string to see if it is a valid number. Return TRUE if * it is. **********************************************************************/int valid_number(const char *string, const char *lengths) { int state = 0; int char_index; int offset; int num_chars = strlen (lengths); int num_digits = 0; for (char_index = 0, offset = 0; char_index < num_chars; offset += lengths[char_index++]) { state = number_state_change (state, string + offset, lengths + char_index); if (state == -1) return (FALSE); if (unicharset.get_isdigit (string + offset, lengths[char_index])) num_digits++; } return num_digits > num_chars - num_digits;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?