📄 permute.cpp

📁 一ＯＣＲ的相关资料。．希望对研究ＯＣＲ的朋友有所帮助．
💻 CPP
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
    }    else {      free_choice(result_1);      result_1 = result_2;    }  }  result_2 = number_permute_and_select (char_choices, rating_limit);  if (class_probability (result_1) < class_probability (result_2)  || class_string (result_2) == NULL) {    free_choice(result_2);  }  else {    free_choice(result_1);    result_1 = result_2;  }  result_2 = permute_compound_words (char_choices, rating_limit);  if (!result_2 ||    class_probability (result_1) < class_probability (result_2)  || class_string (result_2) == NULL) {    free_choice(result_2);  }  else {    free_choice(result_1);    result_1 = result_2;  }  return (result_1);}/********************************************************************** * permute_characters * * Permute these characters together according to each of the different * permuters that are enabled. **********************************************************************/void permute_characters(CHOICES_LIST char_choices,                        float limit,                        A_CHOICE *best_choice,                        A_CHOICE *raw_choice) {  A_CHOICE *this_choice;  permutation_count++;           /* Global counter */  this_choice = permute_all (char_choices, limit, raw_choice);  if (this_choice &&  class_probability (this_choice) < class_probability (best_choice)) {    clone_choice(best_choice, this_choice);  }  free_choice(this_choice);  if (display_ratings)    cprintf ("permute_characters:   %-15s %4.2f %4.2f\n",      class_string (best_choice),      class_probability (best_choice), class_certainty (best_choice));}/********************************************************************** * permute_compound_word * * Return the top choice for each character as the choice for the word. **********************************************************************/A_CHOICE *permute_compound_words(CHOICES_LIST character_choices,                                 float rating_limit) {  A_CHOICE *first_choice;  A_CHOICE *best_choice = NULL;  char word[MAX_WERD_LENGTH + 1];  float rating = 0;  float certainty = 10000;  char char_choice;  int x;  int first_index = 0;  char *ptr;  word[0] = '\0';  if (array_count (character_choices) > MAX_WERD_LENGTH) {    return (new_choice (NULL, MAX_FLOAT32, -MAX_FLOAT32, -1, NO_PERM));  }  array_loop(character_choices, x) {    first_choice =      (A_CHOICE *) first ((CHOICES) array_value (character_choices, x));    ptr = class_string (first_choice);    char_choice = ptr != NULL ? *ptr : '\0';    if (x > first_index && (char_choice == '-' || char_choice == '/')) {      if (compound_debug)        cprintf ("Hyphenated word found\n");      permute_subword (character_choices, rating_limit,        first_index, x - 1, word, &rating, &certainty);      if (rating > rating_limit)        break;      first_index = x + 1;      strcat (word, class_string (first_choice));      rating += class_probability (first_choice);      certainty = min (class_certainty (first_choice), certainty);    }  }  if (first_index > 0 && first_index < x && rating <= rating_limit) {    permute_subword (character_choices, rating_limit,      first_index, x - 1, word, &rating, &certainty);    best_choice = new_choice (word, rating, certainty, -1, COMPOUND_PERM);  }  return (best_choice);}/********************************************************************** * permute_subword * * Permute a part of a compound word this subword is bounded by hyphens * and the start and end of the word.  Call the standard word permute * function on a set of choices covering only part of the original * word.  When it is done reclaim the memory that was used in the * excercise. **********************************************************************/void permute_subword(CHOICES_LIST character_choices,                     float rating_limit,                     int start,                     int end,                     char *word,                     float *rating,                     float *certainty) {  int x;  A_CHOICE *best_choice = NULL;  A_CHOICE raw_choice;  CHOICES_LIST subchoices;  CHOICES choices;  char this_char;  char *ptr;  DisableChoiceAccum();  raw_choice.string = NULL;  raw_choice.rating = MAX_INT16;  raw_choice.certainty = -MAX_INT16;  subchoices = new_choice_list ();  for (x = start; x <= end; x++) {    choices = (CHOICES) array_value (character_choices, x);    ptr = best_string (choices);    this_char = ptr != NULL ? *ptr : '\0';    if (this_char != '-' && this_char != '/') {      subchoices = array_push (subchoices, choices);    } else {      const char* str = best_string(choices);      strcat (word, str);    }  }  if (array_count (subchoices)) {    if (compound_debug)      dawg_debug = TRUE;    best_choice = permute_all (subchoices, rating_limit, &raw_choice);    if (compound_debug)      dawg_debug = FALSE;    if (best_choice && class_string (best_choice)) {      strcat (word, class_string (best_choice));      *rating += class_probability (best_choice);      *certainty = min (class_certainty (best_choice), *certainty);    }    else {      *rating = MAX_FLOAT32;    }  }  else {    *rating = MAX_FLOAT32;  }  free_choice_list(subchoices);  if (best_choice)    free_choice(best_choice);  if (compound_debug && *rating < MAX_FLOAT32) {    cprintf ("Subword permuted = %s, %5.2f, %5.2f\n\n",      word, *rating, *certainty);  }  if (raw_choice.string)    strfree(raw_choice.string);  EnableChoiceAccum();}/********************************************************************** * permute_top_choice * * Return the top choice for each character as the choice for the word. * In addition a choice is created for the best lower and upper case * non-words.  In each character position the best lower (or upper) case * character is substituted for the best overall character. **********************************************************************/A_CHOICE *permute_top_choice(CHOICES_LIST character_choices,                             float rating_limit,                             A_CHOICE *raw_choice,                             BOOL8 *any_alpha) {  CHOICES char_list;  A_CHOICE *first_choice;  A_CHOICE *best_choice;  A_CHOICE *other_choice;  char *ptr;  char first_char;               //first choice  char second_char;              //second choice  char third_char;               //third choice  char prev_char = '\0';         //prev in word  char next_char = '\0';         //next in word  char next_next_char = '\0';    //after next next in word  char word[MAX_PERM_LENGTH + 1];  char capital_word[MAX_PERM_LENGTH + 1];  char lower_word[MAX_PERM_LENGTH + 1];  int x;  BOOL8 char_alpha;  float rating = 0;  float upper_rating = 0;  float lower_rating = 0;  float first_rating = 0;  float certainty = 10000;  float upper_certainty = 10000;  float lower_certainty = 10000;  float certainties[MAX_PERM_LENGTH + 1];  float lower_certainties[MAX_PERM_LENGTH + 1];  float upper_certainties[MAX_PERM_LENGTH + 1];  register CHOICES this_char;  register char ch;  register INT8 lower_done;  register INT8 upper_done;  if (any_alpha != NULL)    *any_alpha = FALSE;  if (array_count (character_choices) > MAX_PERM_LENGTH) {    return (NULL);  }  array_loop(character_choices, x) {    if (x + 1 < array_count (character_choices)) {      char_list = (CHOICES) array_value (character_choices, x + 1);      first_choice = (A_CHOICE *) first (char_list);      ptr = class_string (first_choice);      next_char = (ptr != NULL && *ptr != '\0') ? *ptr : ' ';    }    else      next_char = '\0';    if (x + 2 < array_count (character_choices)) {      char_list = (CHOICES) array_value (character_choices, x + 2);      first_choice = (A_CHOICE *) first (char_list);      ptr = class_string (first_choice);      next_next_char = (ptr != NULL && *ptr != '\0') ? *ptr : ' ';    }    else      next_next_char = '\0';    char_list = (CHOICES) array_value (character_choices, x);    first_choice = (A_CHOICE *) first (char_list);    ptr = class_string (first_choice);    word[x] = (ptr != NULL && *ptr != '\0') ? *ptr : ' ';    lower_word[x] = word[x];    capital_word[x] = word[x];    first_char = word[x];    first_rating = class_probability (first_choice);    upper_rating += class_probability (first_choice);    lower_rating += class_probability (first_choice);    lower_certainty = min (class_certainty (first_choice), lower_certainty);    upper_certainty = min (class_certainty (first_choice), upper_certainty);    certainties[x] = class_certainty (first_choice);    lower_certainties[x] = class_certainty (first_choice);    upper_certainties[x] = class_certainty (first_choice);    lower_done = FALSE;    upper_done = FALSE;    char_alpha = FALSE;    second_char = '\0';    third_char = '\0';    iterate_list(this_char, char_list) {      ptr = best_string (this_char);      ch = ptr != NULL ? *ptr : '\0';      if (ch == 'l' && rest (this_char) != NULL      && best_probability (rest (this_char)) == first_rating) {        ptr = best_string (rest (this_char));        if (ptr != NULL && (*ptr == '1' || *ptr == 'I')) {          second_char = *ptr;          this_char = rest (this_char);          if (rest (this_char) != NULL          && best_probability (rest (this_char)) == first_rating) {            ptr = best_string (rest (this_char));            if (ptr != NULL && (*ptr == '1' || *ptr == 'I')) {              third_char = *ptr;              this_char = rest (this_char);            }          }          ch = choose_il1 (first_char, second_char, third_char,            prev_char, next_char, next_next_char);          if (ch != 'l' && word[x] == 'l') {            word[x] = ch;            lower_word[x] = ch;            capital_word[x] = ch;          }        }      }      /* Find lower case */      if (!lower_done && (islower (ch) || (isupper (ch) && x == 0))) {        lower_word[x] = ch;        lower_rating += best_probability (this_char);        lower_rating -= class_probability (first_choice);        lower_certainty = min (best_certainty (this_char), lower_certainty);        lower_certainties[x] = best_certainty (this_char);        lower_done = TRUE;      }      /* Find upper case */      if (!upper_done && isupper (ch)) {        capital_word[x] = ch;        upper_rating += best_probability (this_char);        upper_rating -= class_probability (first_choice);        upper_certainty = min (best_certainty (this_char), upper_certainty);        upper_certainties[x] = best_certainty (this_char);        upper_done = TRUE;      }      if (!char_alpha && isalpha (ch))        char_alpha = TRUE;      if (lower_done && upper_done)        break;    }    if (char_alpha && any_alpha != NULL)      *any_alpha = TRUE;    if (first_choice == NULL) {      cprintf ("Permuter giving up due to null choices list");      word[x + 1] = '$';      word[x + 2] = '\0';      cprintf (" word=%s\n", word);      return (NULL);    }    rating += class_probability (first_choice);    if (rating > rating_limit)      return (NULL);    certainty = min (class_certainty (first_choice), certainty);    prev_char = word[x];  }  lower_word[x] = '\0';  capital_word[x] = '\0';  word[x] = '\0';  if (rating < class_probability (raw_choice)) {    if (class_string (raw_choice))      strfree (class_string (raw_choice));    class_probability (raw_choice) = rating;    class_certainty (raw_choice) = certainty;    class_string (raw_choice) = strsave (word);    class_permuter (raw_choice) = TOP_CHOICE_PERM;    LogNewRawChoice (raw_choice, 1.0, certainties);  }  best_choice = new_choice (word, rating, certainty, -1, TOP_CHOICE_PERM);  adjust_non_word(best_choice, certainties);  other_choice = new_choice (lower_word, lower_rating, lower_certainty,    -1, LOWER_CASE_PERM);  adjust_non_word(other_choice, lower_certainties);  if (class_probability (best_choice) > class_probability (other_choice)) {    clone_choice(best_choice, other_choice);  }  free_choice(other_choice);  other_choice = new_choice (capital_word, upper_rating, upper_certainty,    -1, UPPER_CASE_PERM);  adjust_non_word(other_choice, upper_certainties);  if (class_probability (best_choice) > class_probability (other_choice)) {    clone_choice(best_choice, other_choice);  }  free_choice(other_choice);  return (best_choice);}/********************************************************************** * choose_il1 * * Choose between the candidate il1 chars. **********************************************************************/char choose_il1(char first_char,        //first choice                char second_char,       //second choice                char third_char,        //third choice                char prev_char,         //prev in word                char next_char,         //next in word                char next_next_char) {  //after next next in word  INT32 type1;                   //1/I/l type of first choice  INT32 type2;                   //1/I/l type of second choice  INT32 type3;                   //1/I/l type of third choice  if (first_char == 'l' && second_char != '\0') {    if (second_char == 'I'      && (isupper (prev_char) && !islower (next_char)      && !isdigit (next_char) || isupper (next_char)      && !islower (prev_char) && !isdigit (prev_char)))      first_char = second_char;  //override    else if (second_char == '1' || third_char == '1') {      if (isdigit (next_char) || isdigit (prev_char)      || next_char == 'l' && isdigit (next_next_char)) {        first_char = '1';      }      else if (!islower (prev_char)        && (!islower (next_char) || next_char == 's'      && next_next_char == 't')) {        if ((prev_char != '\'' && prev_char != '`' || next_char != '\0')          && (next_char != '\'' && next_char != '`'        || prev_char != '\0')) {          first_char = '1';        }      }    }    if (first_char == 'l' && next_char != '\0' && !isalpha (prev_char)) {      type1 = 2;      if (second_char == '1')        type2 = 0;      else if (second_char == 'I')        type2 = 1;      else if (second_char == 'l')        type2 = 2;      else        type2 = type1;      if (third_char == '1')        type3 = 0;      else if (third_char == 'I')        type3 = 1;      else if (third_char == 'l')        type3 = 2;      else        type3 = type1;      if (bigram_counts[next_char][type2] >      bigram_counts[next_char][type1]) {        first_char = second_char;        type1 = type2;      }      if (bigram_counts[next_char][type3] >      bigram_counts[next_char][type1]) {        first_char = third_char;      }    }  }  return first_char;}/********************************************************************** * permute_words * * Permute all the characters together using the dawg to prune all * but the valid words. **********************************************************************/A_CHOICE *permute_words(CHOICES_LIST char_choices, float rating_limit) {  A_CHOICE *best_choice;  int hyphen_len;  best_choice = new_choice (NULL, rating_limit, -MAX_FLOAT32, -1, NO_PERM);  hyphen_len = hyphen_string != NULL ? strlen (hyphen_string) : 0;  if (hyphen_len + array_count (char_choices) > MAX_WERD_LENGTH) {    class_probability (best_choice) = MAX_FLOAT32;  }  else {    dawg_permute_and_select ("system words:", word_dawg, SYSTEM_DAWG_PERM,      char_choices, best_choice, TRUE);    dawg_permute_and_select ("document_words", document_words,      DOC_DAWG_PERM, char_choices, best_choice,      FALSE);    dawg_permute_and_select ("user words", user_words, USER_DAWG_PERM,      char_choices, best_choice, FALSE);    case_sensative = FALSE;  }  return (best_choice);}/********************************************************************** * valid_word * * Check all the DAWGs to see if this word is in any of them. **********************************************************************/int valid_word(const char *string) {  int result = NO_PERM;  if (word_in_dawg (word_dawg, string))    result = SYSTEM_DAWG_PERM;  else {    if (word_in_dawg (document_words, string))      result = DOC_DAWG_PERM;    else if (word_in_dawg (user_words, string))      result = USER_DAWG_PERM;    case_sensative = FALSE;  }  return (result);}
上一页 1 23
💿 文件大小 2763 K
👤 上传用户 danlong
📂 所属分类其他书籍
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -