📄 chopper.cpp
字号:
} if (!found_em[1] && ((seam->split2 == NULL) || is_split_outline (outline, seam->split2))) { found_em[1] = TRUE; } if (!found_em[2] && ((seam->split3 == NULL) || is_split_outline (outline, seam->split3))) { found_em[2] = TRUE; } last_outline = outline; } if (!found_em[0] || !found_em[1] || !found_em[2]) return (FALSE); else return (TRUE);}/********************************************************************** * chop_word_main * * Classify the blobs in this word and permute the results. Find the * worst blob in the word and chop it up. Continue this process until * a good answer has been found or all the blobs have been chopped up * enough. Return the word level ratings. **********************************************************************/CHOICES_LIST chop_word_main(register TWERD *word, int fx, A_CHOICE *best_choice, A_CHOICE *raw_choice, BOOL8 tester, BOOL8 trainer) { TBLOB *pblob; TBLOB *blob; CHOICES_LIST char_choices; int index; int did_chopping; float rating_limit = 1000.0; STATE state; SEAMS seam_list = NULL; CHOICES match_result; MATRIX ratings = NULL; DANGERR fixpt; /*dangerous ambig */ INT32 state_count; //no of states INT32 bit_count; //no of bits static STATE best_state; static STATE chop_states[64]; //in between states state_count = 0; set_null_choice(best_choice); set_null_choice(raw_choice); char_choices = new_choice_list (); did_chopping = 0; for (blob = word->blobs, pblob = NULL, index = 0; blob != NULL; blob = blob->next, index++) { match_result = (CHOICES) classify_blob (pblob, blob, blob->next, NULL, fx, "chop_word:", Green, &chop_states[0], &best_state, matcher_pass, index); char_choices = array_push (char_choices, match_result); pblob = blob; } bit_count = index - 1; permute_characters(char_choices, rating_limit, best_choice, raw_choice); set_n_ones (&state, array_count (char_choices) - 1); if (matcher_fp != NULL) { if (matcher_pass == 0) { bits_in_states = bit_count; chop_states[state_count] = state; } state_count++; } if (!AcceptableChoice (char_choices, best_choice, raw_choice, &fixpt) || (tester || trainer) && strcmp (word->correct, class_string (best_choice))) { did_chopping = 1; if (first_pass) words_chopped1++; else words_chopped2++; seam_list = start_seam_list (word->blobs); if (chop_enable) improve_by_chopping(word, &char_choices, fx, &state, best_choice, raw_choice, &seam_list, &fixpt, chop_states, &state_count, &best_state, matcher_pass); if (chop_debug) print_seams ("Final seam list:", seam_list); if (enable_assoc && !AcceptableChoice (char_choices, best_choice, raw_choice, NULL) || (tester || trainer) && strcmp (word->correct, class_string (best_choice))) { ratings = word_associator (word->blobs, seam_list, &state, fx, best_choice, raw_choice, word->correct, /*0, */ &fixpt, &best_state, matcher_pass); } bits_in_states = bit_count + state_count - 1; } if (ratings != NULL) free_matrix(ratings); if (did_chopping || tester || trainer) char_choices = rebuild_current_state (word->blobs, seam_list, &state, char_choices, fx); if (seam_list != NULL) free_seam_list(seam_list); if (matcher_fp != NULL) { best_state = state; } FilterWordChoices(); return char_choices;}/********************************************************************** * improve_by_chopping * * Start with the current word of blobs and its classification. Find * the worst blobs and try to divide them up to improve the ratings. * As long as ratings are produced by the new blob splitting. When * all the splitting has been accomplished all the ratings memory is * reclaimed. **********************************************************************/void improve_by_chopping(register TWERD *word, CHOICES_LIST *char_choices, int fx, STATE *best_state, A_CHOICE *best_choice, A_CHOICE *raw_choice, SEAMS *seam_list, DANGERR *fixpt, STATE *chop_states, INT32 *state_count, STATE *correct_state, INT32 pass) { INT32 blob_number; INT32 index; //to states CHOICES_LIST choices = *char_choices; float old_best; int fixpt_valid = 1; static INT32 old_count; //from pass1 do { /* Improvement loop */ if (!fixpt_valid) fixpt->index = -1; old_best = class_probability (best_choice); choices = improve_one_blob (word, *char_choices, fx, &blob_number, seam_list, fixpt, chop_states + *state_count, correct_state, pass); if (choices != NULL) { LogNewSplit(blob_number); permute_characters (choices, class_probability (best_choice), best_choice, raw_choice); *char_choices = choices; if (old_best > class_probability (best_choice)) { set_n_ones (best_state, array_count (*char_choices) - 1); fixpt_valid = 1; } else { insert_new_chunk (best_state, blob_number, array_count (*char_choices) - 2); fixpt_valid = 0; } if (*state_count > 0) { if (pass == 0) { for (index = 0; index < *state_count; index++) insert_new_chunk (&chop_states[index], blob_number, array_count (*char_choices) - 2); set_n_ones (&chop_states[index], array_count (*char_choices) - 1); } (*state_count)++; } if (chop_debug) print_state ("best state = ", best_state, count_blobs (word->blobs) - 1); if (first_pass) chops_performed1++; else chops_performed2++; } } while (choices && !AcceptableChoice (*char_choices, best_choice, raw_choice, fixpt) && !blob_skip && array_count (*char_choices) < MAX_NUM_CHUNKS); if (pass == 0) old_count = *state_count; else { if (old_count != *state_count) fprintf (matcher_fp, "Mis-matched state counts, " INT32FORMAT " pass1, " INT32FORMAT " pass2\n", old_count, *state_count); } if (!fixpt_valid) fixpt->index = -1;}/********************************************************************** * select_blob_to_split * * These are the results of the last classification. Find a likely * place to apply splits. **********************************************************************/INT16 select_blob_to_split(CHOICES_LIST char_choices, float rating_ceiling) { CHOICES this_choice; int x; float worst = -MAX_FLOAT32; int worst_index = -1; if (chop_debug) if (rating_ceiling < MAX_FLOAT32) cprintf ("rating_ceiling = %8.4f\n", rating_ceiling); else cprintf ("rating_ceiling = No Limit\n"); for_each_choice(char_choices, x) { this_choice = (CHOICES) array_value (char_choices, x); if (this_choice == NIL) { return (x); } else { if (best_probability (this_choice) > worst && best_probability (this_choice) < rating_ceiling && best_certainty (this_choice) < tessedit_certainty_threshold) { worst_index = x; worst = best_probability (this_choice); } } } if (chop_debug) cprintf ("blob_number = %4d\n", worst_index); return (worst_index);}/********************************************************************** * start_seam_list * * Initialize a list of seams that match the original number of blobs * present in the starting segmentation. Each of the seams created * by this routine have location information only. **********************************************************************/SEAMS start_seam_list(TBLOB *blobs) { TBLOB *blob; SEAMS seam_list; TPOINT topleft; TPOINT botright; int location; /* Seam slot per char */ seam_list = new_seam_list (); for (blob = blobs; blob->next != NULL; blob = blob->next) { blob_bounding_box(blob, &topleft, &botright); location = botright.x; blob_bounding_box (blob->next, &topleft, &botright); location += topleft.x; location /= 2; seam_list = add_seam (seam_list, new_seam (0.0, location, NULL, NULL, NULL)); } return (seam_list);}/********************************************************************** * total_containment * * Check to see if one of these outlines is totally contained within * the bounding box of the other. **********************************************************************/INT16 total_containment(TBLOB *blob1, TBLOB *blob2) { TPOINT topleft1; TPOINT botright1; TPOINT topleft2; TPOINT botright2; blob_bounding_box(blob1, &topleft1, &botright1); blob_bounding_box(blob2, &topleft2, &botright2); return (bounds_inside (topleft1, botright1, topleft2, botright2) || bounds_inside (topleft2, botright2, topleft1, botright1));}/********************************************************************** * word_associator * * Reassociate and classify the blobs in a word. Continue this process * until a good answer is found or all the possibilities have been tried. **********************************************************************/MATRIX word_associator(TBLOB *blobs, SEAMS seams, STATE *state, int fxid, A_CHOICE *best_choice, A_CHOICE *raw_choice, char *correct, DANGERR *fixpt, STATE *best_state, INT32 pass) { CHUNKS_RECORD chunks_record; BLOB_WEIGHTS blob_weights; int x; int num_chunks; A_CHOICE *this_choice; num_chunks = array_count (seams) + 1; chunks_record.chunks = blobs; chunks_record.splits = seams; chunks_record.ratings = record_piece_ratings (blobs); chunks_record.char_widths = blobs_widths (blobs); chunks_record.chunk_widths = blobs_widths (blobs); chunks_record.fx = fxid; /* Save chunk weights */ for (x = 0; x < num_chunks; x++) { this_choice = (A_CHOICE *) first (matrix_get (chunks_record.ratings, x, x)); blob_weights[x] = -(INT16) (10 * class_probability (this_choice) / class_certainty (this_choice)); } chunks_record.weights = blob_weights; if (chop_debug) print_matrix (chunks_record.ratings); best_first_search(&chunks_record, best_choice, raw_choice, state, fixpt, best_state, pass); free_widths (chunks_record.chunk_widths); free_widths (chunks_record.char_widths); return chunks_record.ratings;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -