permute.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 1,706 行 · 第 1/3 页

CPP
1,706
字号
  },  {                              //0xa0=.    0, 0, 0  },  {                              //0xa1=.    0, 0, 0  },  {                              //0xa2=.    0, 0, 0  },  {                              //0xa3=.    0, 0, 0  },  {                              //0xa4=.    0, 0, 0  },  {                              //0xa5=.    0, 0, 0  },  {                              //0xa6=.    0, 0, 0  },  {                              //0xa7=.    0, 0, 0  },  {                              //0xa8=.    0, 0, 0  },  {                              //0xa9=.    0, 0, 0  },  {                              //0xaa=.    0, 0, 0  },  {                              //0xab=.    0, 0, 0  },  {                              //0xac=.    0, 0, 0  },  {                              //0xad=.    0, 0, 0  },  {                              //0xae=.    0, 0, 0  },  {                              //0xaf=.    0, 0, 0  },  {                              //0xb0=.    0, 0, 0  },  {                              //0xb1=.    0, 0, 0  },  {                              //0xb2=.    0, 0, 0  },  {                              //0xb3=.    0, 0, 0  },  {                              //0xb4=.    0, 0, 0  },  {                              //0xb5=.    0, 0, 0  },  {                              //0xb6=.    0, 0, 0  },  {                              //0xb7=.    0, 0, 0  },  {                              //0xb8=.    0, 0, 0  },  {                              //0xb9=.    0, 0, 0  },  {                              //0xba=.    0, 0, 0  },  {                              //0xbb=.    0, 0, 0  },  {                              //0xbc=.    0, 0, 0  },  {                              //0xbd=.    0, 0, 0  },  {                              //0xbe=.    0, 0, 0  },  {                              //0xbf=.    0, 0, 0  },  {                              //0xc0=.    0, 0, 0  },  {                              //0xc1=.    0, 0, 0  },  {                              //0xc2=.    0, 0, 0  },  {                              //0xc3=.    0, 0, 0  },  {                              //0xc4=.    0, 0, 0  },  {                              //0xc5=.    0, 0, 0  },  {                              //0xc6=.    0, 0, 0  },  {                              //0xc7=.    0, 0, 0  },  {                              //0xc8=.    0, 0, 0  },  {                              //0xc9=.    0, 0, 0  },  {                              //0xca=.    0, 0, 0  },  {                              //0xcb=.    0, 0, 0  },  {                              //0xcc=.    0, 0, 0  },  {                              //0xcd=.    0, 0, 0  },  {                              //0xce=.    0, 0, 0  },  {                              //0xcf=.    0, 0, 0  },  {                              //0xd0=.    0, 0, 0  },  {                              //0xd1=.    0, 0, 0  },  {                              //0xd2=.    0, 0, 0  },  {                              //0xd3=.    0, 0, 0  },  {                              //0xd4=.    0, 0, 0  },  {                              //0xd5=.    0, 0, 0  },  {                              //0xd6=.    0, 0, 0  },  {                              //0xd7=.    0, 0, 0  },  {                              //0xd8=.    0, 0, 0  },  {                              //0xd9=.    0, 0, 0  },  {                              //0xda=.    0, 0, 0  },  {                              //0xdb=.    0, 0, 0  },  {                              //0xdc=.    0, 0, 0  },  {                              //0xdd=.    0, 0, 0  },  {                              //0xde=.    0, 0, 0  },  {                              //0xdf=.    0, 0, 0  },  {                              //0xe0=.    0, 0, 0  },  {                              //0xe1=.    0, 0, 0  },  {                              //0xe2=.    0, 0, 0  },  {                              //0xe3=.    0, 0, 0  },  {                              //0xe4=.    0, 0, 0  },  {                              //0xe5=.    0, 0, 0  },  {                              //0xe6=.    0, 0, 0  },  {                              //0xe7=.    0, 0, 0  },  {                              //0xe8=.    0, 0, 0  },  {                              //0xe9=.    0, 0, 0  },  {                              //0xea=.    0, 0, 0  },  {                              //0xeb=.    0, 0, 0  },  {                              //0xec=.    0, 0, 0  },  {                              //0xed=.    0, 0, 0  },  {                              //0xee=.    0, 0, 0  },  {                              //0xef=.    0, 0, 0  },  {                              //0xf0=.    0, 0, 0  },  {                              //0xf1=.    0, 0, 0  },  {                              //0xf2=.    0, 0, 0  },  {                              //0xf3=.    0, 0, 0  },  {                              //0xf4=.    0, 0, 0  },  {                              //0xf5=.    0, 0, 0  },  {                              //0xf6=.    0, 0, 0  },  {                              //0xf7=.    0, 0, 0  },  {                              //0xf8=.    0, 0, 0  },  {                              //0xf9=.    0, 0, 0  },  {                              //0xfa=.    0, 0, 0  },  {                              //0xfb=.    0, 0, 0  },  {                              //0xfc=.    0, 0, 0  },  {                              //0xfd=.    0, 0, 0  },  {                              //0xfe=.    0, 0, 0  },  {                              //0xff=.    0, 0, 0  },};#endif//extern "C" double permuter_pending_threshold;                                 /* Similarity matcher values */#define SIM_CERTAINTY_SCALE  -10.0                                 /* Similarity matcher values */#define SIM_CERTAINTY_OFFSET -10.0                                 /* Worst E*L product to stop on */#define SIMILARITY_FLOOR     100.0/*----------------------------------------------------------------------              F u n c t i o n s----------------------------------------------------------------------*//********************************************************************** * good_choice * * Return TRUE if a good answer is found for the unknown blob rating. **********************************************************************/int good_choice(A_CHOICE *choice) {  register float certainty;  if (choice == NULL)    return (FALSE);  if (similarity_enable) {    if ((class_probability (choice) + 1) * class_certainty (choice) >      SIMILARITY_FLOOR)      return (FALSE);    certainty =      SIM_CERTAINTY_OFFSET +      class_probability (choice) * SIM_CERTAINTY_SCALE;  }  else {    certainty = class_certainty (choice);  }  if (certainty > certainty_threshold) {    return (TRUE);  }  else {    return (FALSE);  }}/********************************************************************** * add_document_word * * Add a word found on this document to the document specific * dictionary. **********************************************************************/void add_document_word(A_CHOICE *best_choice) {  char filename[CHARS_PER_LINE];  FILE *doc_word_file;  char *string;  char *lengths;  int stringlen;                 //length of word  string = class_string (best_choice);  lengths = class_lengths (best_choice);  stringlen = strlen (lengths);  // Skip if using external dictionary.  if (letter_is_okay != &def_letter_is_okay) return;  if (!doc_dict_enable    || valid_word (string) || CurrentWordAmbig () || stringlen < 2)    return;  if (!good_choice (best_choice) || stringlen == 2) {    if (class_certainty (best_choice) < permuter_pending_threshold)      return;    if (!word_in_dawg (pending_words, string)) {      if (stringlen > 2 ||          (stringlen >= 2 && unicharset.get_isupper (string, lengths[0]) &&           unicharset.get_isupper (string + lengths[0], lengths[1])))        add_word_to_dawg(pending_words,                         string,                         MAX_DOC_EDGES,                         RESERVED_DOC_EDGES);      return;    }  }  if (save_doc_words) {    strcpy(filename, imagefile);    strcat (filename, ".doc");    doc_word_file = open_file (filename, "a");    fprintf (doc_word_file, "%s\n", string);    fclose(doc_word_file);  }  add_word_to_dawg(document_words, string, MAX_DOC_EDGES, RESERVED_DOC_EDGES);}/********************************************************************** * adjust_non_word * * Assign an adjusted value to a string that is a non-word.  The value * that this word choice has is based on case and punctuation rules. **********************************************************************/voidadjust_non_word (A_CHOICE * best_choice, float certainties[]) {  char *this_word;  float adjust_factor;  if (adjust_debug)    cprintf ("%s %4.2f ",      class_string (best_choice), class_probability (best_choice));  this_word = class_string (best_choice);  class_probability (best_choice) += RATING_PAD;  if (case_ok (this_word, class_lengths (best_choice))      && punctuation_ok (this_word, class_lengths (best_choice)) != -1) {    class_probability (best_choice) *= non_word;    adjust_factor = non_word;    if (adjust_debug)      cprintf (", %4.2f ", non_word);  }  else {    class_probability (best_choice) *= garbage;    adjust_factor = garbage;    if (adjust_debug) {      if (!case_ok (this_word, class_lengths (best_choice)))        cprintf (", C");      if (punctuation_ok (this_word, class_lengths (best_choice)) == -1)        cprintf (", P");      cprintf (", %4.2f ", garbage);    }  }  class_probability (best_choice) -= RATING_PAD;  LogNewWordChoice(best_choice, adjust_factor, certainties);  if (adjust_debug)    cprintf (" --> %4.2f\n", class_probability (best_choice));}/********************************************************************** * init_permute * * Initialize anything that needs to be set up for the permute * functions. **********************************************************************/void init_permute_vars() {  make_adjust_debug();  make_compound_debug();  make_non_word();  make_garbage();  make_doc_words();  make_doc_dict();  init_permdawg_vars();  init_permnum();}void init_permute() {  if (word_dawg != NULL)    end_permute();  init_permdawg();  STRING name;  name = language_data_path_prefix;  name += "word-dawg";  word_dawg = read_squished_dawg(name.string());  document_words =    (EDGE_ARRAY) memalloc (sizeof (EDGE_RECORD) * MAX_DOC_EDGES);  initialize_dawg(document_words, MAX_DOC_EDGES);  pending_words =    (EDGE_ARRAY) memalloc (sizeof (EDGE_RECORD) * MAX_DOC_EDGES);  initialize_dawg(pending_words, MAX_DOC_EDGES);  user_words = (EDGE_ARRAY) memalloc (sizeof (EDGE_RECORD) * MAX_USER_EDGES);  name = language_data_path_prefix;  name += "user-words";  read_word_list(name.string(), user_words, MAX_USER_EDGES, USER_RESERVED_EDGES);}void end_permute() {  if (word_dawg == NULL)    return;  // Not safe to call twice.  memfree(word_dawg);  word_dawg = NULL;  memfree(document_words);  document_words =  NULL;  memfree(pending_words);  pending_words = NULL;  memfree(user_words);  user_words = NULL;  end_permdawg();}/********************************************************************** * permute_all * * Permute all the characters together using all of the different types * of permuters/selectors available.  Each of the characters must have * a non-NIL choice list. **********************************************************************/A_CHOICE *permute_all(CHOICES_LIST char_choices,                      float rating_limit,                      A_CHOICE *raw_choice) {  A_CHOICE *result_1;  A_CHOICE *result_2 = NULL;  BOOL8 any_alpha;  result_1 = permute_top_choice (char_choices, rating_limit, raw_choice,    &any_alpha);  if (ngram_permuter_activated)    return ngram_permute_and_select(char_choices, rating_limit, word_dawg);  if (result_1 == NULL)    return (NULL);  if (permute_only_top)    return result_1;  if (any_alpha && array_count (char_choices) <= MAX_WERD_LENGTH) {    result_2 = permute_words (char_choices, rating_limit);    if (class_probability (result_1) < class_probability (result_2)    || class_string (result_2) == NULL) {      free_choice(result_2);    }    else {      free_choice(result_1);      result_1 = result_2;    }  }  result_2 = number_permute_and_select (char_choices, rating_limit);  if (class_probability (result_1) < class_probability (result_2)  || class_string (result_2) == NULL) {    free_choice(result_2);  }  else {    free_choice(result_1);    result_1 = result_2;  }  result_2 = permute_compound_words (char_choices, rating_limit);  if (!result_2 ||    class_probability (result_1) < class_probability (result_2)  || class_string (result_2) == NULL) {    free_choice(result_2);  }  else {    free_choice(result_1);    result_1 = result_2;  }  return (result_1);}/********************************************************************** * permute_characters * * Permute these characters together according to each of the different * permuters that are enabled. **********************************************************************/void permute_characters(CHOICES_LIST char_choices,                        float limit,                        A_CHOICE *best_choice,                        A_CHOICE *raw_choice) {  A_CHOICE *this_choice;  permutation_count++;           /* Global counter */  this_choice = permute_all (char_choices, limit, raw_choice);  if (this_choice &&  class_probability (this_choice) < class_probability (best_choice)) {    clone_choice(best_choice, this_choice);  }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?