⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 chopper.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
    }    if (!found_em[1] &&      ((seam->split2 == NULL) ||    is_split_outline (outline, seam->split2))) {      found_em[1] = TRUE;    }    if (!found_em[2] &&      ((seam->split3 == NULL) ||    is_split_outline (outline, seam->split3))) {      found_em[2] = TRUE;    }    last_outline = outline;  }  if (!found_em[0] || !found_em[1] || !found_em[2])    return (FALSE);  else    return (TRUE);}/********************************************************************** * chop_word_main * * Classify the blobs in this word and permute the results.  Find the * worst blob in the word and chop it up.  Continue this process until * a good answer has been found or all the blobs have been chopped up * enough.  Return the word level ratings. **********************************************************************/CHOICES_LIST chop_word_main(register TWERD *word,                            int fx,                            A_CHOICE *best_choice,                            A_CHOICE *raw_choice,                            BOOL8 tester,                            BOOL8 trainer) {  TBLOB *pblob;  TBLOB *blob;  CHOICES_LIST char_choices;  int index;  int did_chopping;  float rating_limit = 1000.0;  STATE state;  SEAMS seam_list = NULL;  CHOICES match_result;  MATRIX ratings = NULL;  DANGERR fixpt;                 /*dangerous ambig */  INT32 state_count;             //no of states  INT32 bit_count;               //no of bits  static STATE best_state;  static STATE chop_states[64];  //in between states  state_count = 0;  set_null_choice(best_choice);  set_null_choice(raw_choice);  char_choices = new_choice_list ();  did_chopping = 0;  for (blob = word->blobs, pblob = NULL, index = 0; blob != NULL;  blob = blob->next, index++) {    match_result =      (CHOICES) classify_blob (pblob, blob, blob->next, NULL, fx,      "chop_word:", Green, &chop_states[0],      &best_state, matcher_pass, index);    char_choices = array_push (char_choices, match_result);    pblob = blob;  }  bit_count = index - 1;  permute_characters(char_choices, rating_limit, best_choice, raw_choice);  set_n_ones (&state, array_count (char_choices) - 1);  if (matcher_fp != NULL) {    if (matcher_pass == 0) {      bits_in_states = bit_count;      chop_states[state_count] = state;    }    state_count++;  }  if (!AcceptableChoice (char_choices, best_choice, raw_choice, &fixpt)    || (tester || trainer)  && strcmp (word->correct, class_string (best_choice))) {    did_chopping = 1;    if (first_pass)      words_chopped1++;    else      words_chopped2++;    seam_list = start_seam_list (word->blobs);    if (chop_enable)      improve_by_chopping(word,                          &char_choices,                          fx,                          &state,                          best_choice,                          raw_choice,                          &seam_list,                          &fixpt,                          chop_states,                          &state_count,                          &best_state,                          matcher_pass);    if (chop_debug)      print_seams ("Final seam list:", seam_list);    if (enable_assoc &&      !AcceptableChoice (char_choices, best_choice, raw_choice, NULL)      || (tester || trainer)    && strcmp (word->correct, class_string (best_choice))) {      ratings = word_associator (word->blobs, seam_list, &state, fx,        best_choice, raw_choice, word->correct,        /*0, */ &fixpt,        &best_state, matcher_pass);    }    bits_in_states = bit_count + state_count - 1;  }  if (ratings != NULL)    free_matrix(ratings);  if (did_chopping || tester || trainer)    char_choices = rebuild_current_state (word->blobs, seam_list, &state,      char_choices, fx);  if (seam_list != NULL)    free_seam_list(seam_list);  if (matcher_fp != NULL) {    best_state = state;  }  FilterWordChoices();  return char_choices;}/********************************************************************** * improve_by_chopping * * Start with the current word of blobs and its classification.  Find * the worst blobs and try to divide them up to improve the ratings. * As long as ratings are produced by the new blob splitting.  When * all the splitting has been accomplished all the ratings memory is * reclaimed. **********************************************************************/void improve_by_chopping(register TWERD *word,                         CHOICES_LIST *char_choices,                         int fx,                         STATE *best_state,                         A_CHOICE *best_choice,                         A_CHOICE *raw_choice,                         SEAMS *seam_list,                         DANGERR *fixpt,                         STATE *chop_states,                         INT32 *state_count,                         STATE *correct_state,                         INT32 pass) {  INT32 blob_number;  INT32 index;                   //to states  CHOICES_LIST choices = *char_choices;  float old_best;  int fixpt_valid = 1;  static INT32 old_count;        //from pass1  do {                                 /* Improvement loop */    if (!fixpt_valid)      fixpt->index = -1;    old_best = class_probability (best_choice);    choices = improve_one_blob (word, *char_choices, fx,      &blob_number, seam_list, fixpt,      chop_states + *state_count, correct_state,      pass);    if (choices != NULL) {      LogNewSplit(blob_number);      permute_characters (choices,        class_probability (best_choice),        best_choice, raw_choice);      *char_choices = choices;      if (old_best > class_probability (best_choice)) {        set_n_ones (best_state, array_count (*char_choices) - 1);        fixpt_valid = 1;      }      else {        insert_new_chunk (best_state, blob_number,          array_count (*char_choices) - 2);        fixpt_valid = 0;      }      if (*state_count > 0) {        if (pass == 0) {          for (index = 0; index < *state_count; index++)            insert_new_chunk (&chop_states[index], blob_number,              array_count (*char_choices) - 2);          set_n_ones (&chop_states[index],            array_count (*char_choices) - 1);        }        (*state_count)++;      }      if (chop_debug)        print_state ("best state = ",          best_state, count_blobs (word->blobs) - 1);      if (first_pass)        chops_performed1++;      else        chops_performed2++;    }  }  while (choices &&    !AcceptableChoice (*char_choices, best_choice, raw_choice, fixpt) &&    !blob_skip && array_count (*char_choices) < MAX_NUM_CHUNKS);  if (pass == 0)    old_count = *state_count;  else {    if (old_count != *state_count)      fprintf (matcher_fp,        "Mis-matched state counts, " INT32FORMAT " pass1, "        INT32FORMAT " pass2\n", old_count, *state_count);  }  if (!fixpt_valid)    fixpt->index = -1;}/********************************************************************** * select_blob_to_split * * These are the results of the last classification.  Find a likely * place to apply splits. **********************************************************************/INT16 select_blob_to_split(CHOICES_LIST char_choices, float rating_ceiling) {  CHOICES this_choice;  int x;  float worst = -MAX_FLOAT32;  int worst_index = -1;  if (chop_debug)    if (rating_ceiling < MAX_FLOAT32)      cprintf ("rating_ceiling = %8.4f\n", rating_ceiling);  else    cprintf ("rating_ceiling = No Limit\n");  for_each_choice(char_choices, x) {    this_choice = (CHOICES) array_value (char_choices, x);    if (this_choice == NIL) {      return (x);    }    else {      if (best_probability (this_choice) > worst &&        best_probability (this_choice) < rating_ceiling &&      best_certainty (this_choice) < tessedit_certainty_threshold) {        worst_index = x;        worst = best_probability (this_choice);      }    }  }  if (chop_debug)    cprintf ("blob_number = %4d\n", worst_index);  return (worst_index);}/********************************************************************** * start_seam_list * * Initialize a list of seams that match the original number of blobs * present in the starting segmentation.  Each of the seams created * by this routine have location information only. **********************************************************************/SEAMS start_seam_list(TBLOB *blobs) {  TBLOB *blob;  SEAMS seam_list;  TPOINT topleft;  TPOINT botright;  int location;  /* Seam slot per char */  seam_list = new_seam_list ();  for (blob = blobs; blob->next != NULL; blob = blob->next) {    blob_bounding_box(blob, &topleft, &botright);    location = botright.x;    blob_bounding_box (blob->next, &topleft, &botright);    location += topleft.x;    location /= 2;    seam_list = add_seam (seam_list,      new_seam (0.0, location, NULL, NULL, NULL));  }  return (seam_list);}/********************************************************************** * total_containment * * Check to see if one of these outlines is totally contained within * the bounding box of the other. **********************************************************************/INT16 total_containment(TBLOB *blob1, TBLOB *blob2) {  TPOINT topleft1;  TPOINT botright1;  TPOINT topleft2;  TPOINT botright2;  blob_bounding_box(blob1, &topleft1, &botright1);  blob_bounding_box(blob2, &topleft2, &botright2);  return (bounds_inside (topleft1, botright1, topleft2, botright2) ||    bounds_inside (topleft2, botright2, topleft1, botright1));}/********************************************************************** * word_associator * * Reassociate and classify the blobs in a word.  Continue this process * until a good answer is found or all the possibilities have been tried. **********************************************************************/MATRIX word_associator(TBLOB *blobs,                       SEAMS seams,                       STATE *state,                       int fxid,                       A_CHOICE *best_choice,                       A_CHOICE *raw_choice,                       char *correct,                       DANGERR *fixpt,                       STATE *best_state,                       INT32 pass) {  CHUNKS_RECORD chunks_record;  BLOB_WEIGHTS blob_weights;  int x;  int num_chunks;  A_CHOICE *this_choice;  num_chunks = array_count (seams) + 1;  chunks_record.chunks = blobs;  chunks_record.splits = seams;  chunks_record.ratings = record_piece_ratings (blobs);  chunks_record.char_widths = blobs_widths (blobs);  chunks_record.chunk_widths = blobs_widths (blobs);  chunks_record.fx = fxid;  /* Save chunk weights */  for (x = 0; x < num_chunks; x++) {    this_choice =      (A_CHOICE *) first (matrix_get (chunks_record.ratings, x, x));    blob_weights[x] = -(INT16) (10 * class_probability (this_choice) /      class_certainty (this_choice));  }  chunks_record.weights = blob_weights;  if (chop_debug)    print_matrix (chunks_record.ratings);  best_first_search(&chunks_record,                    best_choice,                    raw_choice,                    state,                    fixpt,                    best_state,                    pass);  free_widths (chunks_record.chunk_widths);  free_widths (chunks_record.char_widths);  return chunks_record.ratings;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -