applybox.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 892 行 · 第 1/3 页
CPP
892 行
if (gblob_bounding_box (blob, polyg). overlap (box)) { outline_it. set_to_list (gblob_out_list (blob, polyg)); for (outline_it.mark_cycle_pt (); !outline_it.cycled_list (); outline_it.forward ()) { outline = outline_it.data (); if (goutline_bounding_box (outline, polyg).major_overlap (box)) { if ((row_to_process == NULL) || (row_to_process == row)) { row_to_process = row; row_id_to_process = row_id; } else /* RETURN ERROR Box overlaps blobs in more than one row */ return NULL; } } } } } } } } } } return row_to_process;}inT16 resegment_box( // ROW *row, TBOX box, UNICHAR_ID uch_id, inT16 block_id, inT16 row_id, inT16 boxfile_lineno, inT16 boxfile_charno) { WERD_IT word_it; WERD *word; WERD *new_word = NULL; BOOL8 polyg = false; PBLOB_IT blob_it; PBLOB_IT new_blob_it; PBLOB *blob; PBLOB *new_blob; OUTLINE_IT outline_it; OUTLINE_LIST dummy; // Just to initialize new_outline_it. OUTLINE_IT new_outline_it = &dummy; OUTLINE *outline; TBOX new_word_box; float word_x_centre; float baseline; inT16 error_count = 0; //number of chars lost word_it.set_to_list (row->word_list ()); for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { word = word_it.data (); polyg = word->flag (W_POLYGON); if (word->bounding_box ().overlap (box)) { blob_it.set_to_list (word->gblob_list ()); for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { blob = blob_it.data (); if (gblob_bounding_box (blob, polyg).overlap (box)) { outline_it.set_to_list (gblob_out_list (blob, polyg)); for (outline_it.mark_cycle_pt (); !outline_it.cycled_list (); outline_it.forward ()) { outline = outline_it.data (); if (goutline_bounding_box (outline, polyg). major_overlap (box)) { if (strlen (word->text ()) > 0) { if (error_count == 0) { error_count = 1; if (applybox_debug > 4) report_failed_box (boxfile_lineno, boxfile_charno, box, unicharset_boxes.id_to_unichar(uch_id), "FAILURE! box overlaps blob in labelled word"); } if (applybox_debug > 4) tprintf ("APPLY_BOXES: ALSO ignoring corrupted char blk:%d row:%d \"%s\"\n", block_id, row_id, word_it.data ()->text ()); word_it.data ()->set_text (""); //UN label it error_count++; } if (error_count == 0) { if (new_word == NULL) { /* Make a new word with a single blob */ new_word = word->shallow_copy (); new_word->set_text (unicharset_boxes.id_to_unichar(uch_id)); if (polyg) new_blob = new PBLOB; else new_blob = (PBLOB *) new C_BLOB; new_blob_it.set_to_list (new_word-> gblob_list ()); new_blob_it.add_to_end (new_blob); new_outline_it. set_to_list (gblob_out_list (new_blob, polyg)); } new_outline_it.add_to_end (outline_it. extract ()); //move blob } } } //no outlines in blob if (outline_it.empty ()) //so delete blob delete blob_it.extract (); } } if (blob_it.empty ()) //no blobs in word //so delete word delete word_it.extract (); } } if (error_count > 0) return error_count; if (new_word != NULL) { gblob_sort_list (new_word->gblob_list (), polyg); word_it.add_to_end (new_word); new_word_box = new_word->bounding_box (); word_x_centre = (new_word_box.left () + new_word_box.right ()) / 2.0f; baseline = row->base_line (word_x_centre);#if 0 if (strlen(unicharset_boxes.id_to_unichar(uch_id)) == 1) { if (STRING (chs_caps_ht).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) && (new_word_box.top () < baseline + (1 + applybox_error_band) * row->x_height ())) { report_failed_box (boxfile_lineno, boxfile_charno, box, unicharset_boxes.id_to_unichar(uch_id), "FAILURE! caps-ht char didn't ascend"); new_word->set_text (""); return 1; } if (STRING (chs_odd_top).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) && (new_word_box.top () < baseline + (1 - applybox_error_band) * row->x_height ())) { report_failed_box (boxfile_lineno, boxfile_charno, box, unicharset_boxes.id_to_unichar(uch_id), "FAILURE! Odd top char below xht"); new_word->set_text (""); return 1; } if (STRING (chs_x_ht).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) && ((new_word_box.top () > baseline + (1 + applybox_error_band) * row->x_height ()) || (new_word_box.top () < baseline + (1 - applybox_error_band) * row->x_height ()))) { report_failed_box (boxfile_lineno, boxfile_charno, box, unicharset_boxes.id_to_unichar(uch_id), "FAILURE! x-ht char didn't have top near xht"); new_word->set_text (""); return 1; } if (STRING (chs_non_ambig_bl).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) && ((new_word_box.bottom () < baseline - applybox_error_band * row->x_height ()) || (new_word_box.bottom () > baseline + applybox_error_band * row->x_height ()))) { report_failed_box (boxfile_lineno, boxfile_charno, box, unicharset_boxes.id_to_unichar(uch_id), "FAILURE! non ambig BL char didnt have bottom near baseline"); new_word->set_text (""); return 1; } if (STRING (chs_odd_bot).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) && (new_word_box.bottom () > baseline + applybox_error_band * row->x_height ())) { report_failed_box (boxfile_lineno, boxfile_charno, box, unicharset_boxes.id_to_unichar(uch_id), "FAILURE! Odd bottom char above baseline"); new_word->set_text (""); return 1; } if (STRING (chs_desc).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) && (new_word_box.bottom () > baseline - applybox_error_band * row->x_height ())) { report_failed_box (boxfile_lineno, boxfile_charno, box, unicharset_boxes.id_to_unichar(uch_id), "FAILURE! Descender doesn't descend"); new_word->set_text (""); return 1; } }#endif return 0; } else { report_failed_box (boxfile_lineno, boxfile_charno, box, unicharset_boxes.id_to_unichar(uch_id), "FAILURE! Couldn't find any blobs"); return 1; }}/************************************************************************* * tidy_up() * - report >1 block * - sort the words in each row. * - report any rows with no labelled words. * - report any remaining unlabelled words * - report total labelled words * *************************************************************************/void tidy_up( // BLOCK_LIST *block_list, //real blocks inT16 &ok_char_count, inT16 &ok_row_count, inT16 &unlabelled_words, inT16 *tgt_char_counts, inT16 &rebalance_count, UNICHAR_ID *min_uch_id, inT16 &min_samples, inT16 &final_labelled_blob_count) { BLOCK_IT block_it(block_list); ROW_IT row_it; ROW *row; WERD_IT word_it; WERD *word; WERD *duplicate_word; inT16 block_idx = 0; inT16 row_idx; inT16 all_row_idx = 0; BOOL8 row_ok; BOOL8 rebalance_needed = FALSE; //No. of unique labelled samples inT16 labelled_char_counts[MAX_NUM_CLASSES]; inT16 i; UNICHAR_ID uch_id; UNICHAR_ID prev_uch_id = -1; BOOL8 at_dupe_of_prev_word; ROW *prev_row = NULL; inT16 left; inT16 prev_left = -1; for (i = 0; i < MAX_NUM_CLASSES; i++) labelled_char_counts[i] = 0; ok_char_count = 0; ok_row_count = 0; unlabelled_words = 0; if ((applybox_debug > 4) && (block_it.length () != 1)) tprintf ("APPLY_BOXES: More than one block??\n"); for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { block_idx++; row_idx = 0; row_ok = FALSE; row_it.set_to_list (block_it.data ()->row_list ()); for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { row_idx++; all_row_idx++; row = row_it.data (); word_it.set_to_list (row->word_list ()); word_it.sort (word_comparator); for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { word = word_it.data (); if (strlen (word->text ()) == 0) { unlabelled_words++; if (applybox_debug > 4) { tprintf ("APPLY_BOXES: Unlabelled word blk:%d row:%d allrows:%d\n", block_idx, row_idx, all_row_idx); } } else { if (word->gblob_list ()->length () != 1) tprintf ("APPLY_BOXES: FATALITY - MULTIBLOB Labelled word blk:%d row:%d allrows:%d\n", block_idx, row_idx, all_row_idx); ok_char_count++; labelled_char_counts[unicharset_boxes.unichar_to_id(word->text ())]++; row_ok = TRUE; } } if ((applybox_debug > 4) && (!row_ok)) { tprintf ("APPLY_BOXES: Row with no labelled words blk:%d row:%d allrows:%d\n", block_idx, row_idx, all_row_idx);
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?