applybox.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 892 行 · 第 1/3 页

CPP
892
字号
                if (gblob_bounding_box (blob, polyg).                overlap (box)) {                  outline_it.                    set_to_list (gblob_out_list                    (blob, polyg));                  for (outline_it.mark_cycle_pt ();                    !outline_it.cycled_list ();                  outline_it.forward ()) {                    outline = outline_it.data ();                    if (goutline_bounding_box                    (outline, polyg).major_overlap (box)) {                      if ((row_to_process == NULL) ||                      (row_to_process == row)) {                        row_to_process = row;                        row_id_to_process = row_id;                      }                      else                        /* RETURN ERROR Box overlaps blobs in more than one row  */                        return NULL;                    }                  }                }              }            }          }        }      }    }  }  return row_to_process;}inT16 resegment_box(  //                    ROW *row,                    TBOX box,                    UNICHAR_ID uch_id,                    inT16 block_id,                    inT16 row_id,                    inT16 boxfile_lineno,                    inT16 boxfile_charno) {  WERD_IT word_it;  WERD *word;  WERD *new_word = NULL;  BOOL8 polyg = false;  PBLOB_IT blob_it;  PBLOB_IT new_blob_it;  PBLOB *blob;  PBLOB *new_blob;  OUTLINE_IT outline_it;  OUTLINE_LIST dummy;  // Just to initialize new_outline_it.  OUTLINE_IT new_outline_it = &dummy;  OUTLINE *outline;  TBOX new_word_box;  float word_x_centre;  float baseline;  inT16 error_count = 0;         //number of chars lost  word_it.set_to_list (row->word_list ());  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {    word = word_it.data ();    polyg = word->flag (W_POLYGON);    if (word->bounding_box ().overlap (box)) {      blob_it.set_to_list (word->gblob_list ());      for (blob_it.mark_cycle_pt ();      !blob_it.cycled_list (); blob_it.forward ()) {        blob = blob_it.data ();        if (gblob_bounding_box (blob, polyg).overlap (box)) {          outline_it.set_to_list (gblob_out_list (blob, polyg));          for (outline_it.mark_cycle_pt ();          !outline_it.cycled_list (); outline_it.forward ()) {            outline = outline_it.data ();            if (goutline_bounding_box (outline, polyg).            major_overlap (box)) {              if (strlen (word->text ()) > 0) {                if (error_count == 0) {                  error_count = 1;                  if (applybox_debug > 4)                    report_failed_box (boxfile_lineno,                      boxfile_charno,                      box, unicharset_boxes.id_to_unichar(uch_id),                      "FAILURE! box overlaps blob in labelled word");                }                if (applybox_debug > 4)                  tprintf                    ("APPLY_BOXES: ALSO ignoring corrupted char blk:%d row:%d \"%s\"\n",                    block_id, row_id,                    word_it.data ()->text ());                word_it.data ()->set_text ("");                //UN label it                error_count++;              }              if (error_count == 0) {                if (new_word == NULL) {                                 /* Make a new word with a single blob */                  new_word = word->shallow_copy ();                  new_word->set_text (unicharset_boxes.id_to_unichar(uch_id));                  if (polyg)                    new_blob = new PBLOB;                  else                    new_blob = (PBLOB *) new C_BLOB;                  new_blob_it.set_to_list (new_word->                    gblob_list ());                  new_blob_it.add_to_end (new_blob);                  new_outline_it.                    set_to_list (gblob_out_list                    (new_blob, polyg));                }                new_outline_it.add_to_end (outline_it.                  extract ());                //move blob              }            }          }                                 //no outlines in blob          if (outline_it.empty ())                                 //so delete blob            delete blob_it.extract ();        }      }      if (blob_it.empty ())      //no blobs in word                                 //so delete word          delete word_it.extract ();    }  }  if (error_count > 0)    return error_count;  if (new_word != NULL) {    gblob_sort_list (new_word->gblob_list (), polyg);    word_it.add_to_end (new_word);    new_word_box = new_word->bounding_box ();    word_x_centre = (new_word_box.left () + new_word_box.right ()) / 2.0f;    baseline = row->base_line (word_x_centre);#if 0    if (strlen(unicharset_boxes.id_to_unichar(uch_id)) == 1) {      if (STRING (chs_caps_ht).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) &&          (new_word_box.top () <           baseline + (1 + applybox_error_band) * row->x_height ())) {        report_failed_box (boxfile_lineno, boxfile_charno, box,                           unicharset_boxes.id_to_unichar(uch_id),                           "FAILURE! caps-ht char didn't ascend");        new_word->set_text ("");        return 1;      }      if (STRING (chs_odd_top).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) &&          (new_word_box.top () <           baseline + (1 - applybox_error_band) * row->x_height ())) {        report_failed_box (boxfile_lineno, boxfile_charno, box,                           unicharset_boxes.id_to_unichar(uch_id),                           "FAILURE! Odd top char below xht");        new_word->set_text ("");        return 1;      }      if (STRING (chs_x_ht).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) &&          ((new_word_box.top () >            baseline + (1 + applybox_error_band) * row->x_height ()) ||           (new_word_box.top () <            baseline + (1 - applybox_error_band) * row->x_height ()))) {        report_failed_box (boxfile_lineno, boxfile_charno, box,                           unicharset_boxes.id_to_unichar(uch_id),                           "FAILURE! x-ht char didn't have top near xht");        new_word->set_text ("");        return 1;      }      if (STRING (chs_non_ambig_bl).contains          (unicharset_boxes.id_to_unichar(uch_id)[0]) &&          ((new_word_box.bottom () <            baseline - applybox_error_band * row->x_height ()) ||           (new_word_box.bottom () >            baseline + applybox_error_band * row->x_height ()))) {        report_failed_box (boxfile_lineno, boxfile_charno, box,                           unicharset_boxes.id_to_unichar(uch_id),                           "FAILURE! non ambig BL char didnt have bottom near baseline");        new_word->set_text ("");        return 1;      }      if (STRING (chs_odd_bot).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) &&          (new_word_box.bottom () >           baseline + applybox_error_band * row->x_height ())) {        report_failed_box (boxfile_lineno, boxfile_charno, box,                           unicharset_boxes.id_to_unichar(uch_id),                           "FAILURE! Odd bottom char above baseline");        new_word->set_text ("");        return 1;      }      if (STRING (chs_desc).contains (unicharset_boxes.id_to_unichar(uch_id)[0]) &&          (new_word_box.bottom () >           baseline - applybox_error_band * row->x_height ())) {        report_failed_box (boxfile_lineno, boxfile_charno, box,                           unicharset_boxes.id_to_unichar(uch_id),        "FAILURE! Descender doesn't descend");        new_word->set_text ("");        return 1;      }    }#endif    return 0;  }  else {    report_failed_box (boxfile_lineno, boxfile_charno, box,                       unicharset_boxes.id_to_unichar(uch_id),                       "FAILURE! Couldn't find any blobs");    return 1;  }}/************************************************************************* * tidy_up() *   - report >1 block *   - sort the words in each row. *   - report any rows with no labelled words. *   - report any remaining unlabelled words *		- report total labelled words * *************************************************************************/void tidy_up(                         //             BLOCK_LIST *block_list,  //real blocks             inT16 &ok_char_count,             inT16 &ok_row_count,             inT16 &unlabelled_words,             inT16 *tgt_char_counts,             inT16 &rebalance_count,             UNICHAR_ID *min_uch_id,             inT16 &min_samples,             inT16 &final_labelled_blob_count) {  BLOCK_IT block_it(block_list);  ROW_IT row_it;  ROW *row;  WERD_IT word_it;  WERD *word;  WERD *duplicate_word;  inT16 block_idx = 0;  inT16 row_idx;  inT16 all_row_idx = 0;  BOOL8 row_ok;  BOOL8 rebalance_needed = FALSE;                                 //No. of unique labelled samples  inT16 labelled_char_counts[MAX_NUM_CLASSES];  inT16 i;  UNICHAR_ID uch_id;  UNICHAR_ID prev_uch_id = -1;  BOOL8 at_dupe_of_prev_word;  ROW *prev_row = NULL;  inT16 left;  inT16 prev_left = -1;  for (i = 0; i < MAX_NUM_CLASSES; i++)    labelled_char_counts[i] = 0;  ok_char_count = 0;  ok_row_count = 0;  unlabelled_words = 0;  if ((applybox_debug > 4) && (block_it.length () != 1))    tprintf ("APPLY_BOXES: More than one block??\n");  for (block_it.mark_cycle_pt ();  !block_it.cycled_list (); block_it.forward ()) {    block_idx++;    row_idx = 0;    row_ok = FALSE;    row_it.set_to_list (block_it.data ()->row_list ());    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {      row_idx++;      all_row_idx++;      row = row_it.data ();      word_it.set_to_list (row->word_list ());      word_it.sort (word_comparator);      for (word_it.mark_cycle_pt ();      !word_it.cycled_list (); word_it.forward ()) {        word = word_it.data ();        if (strlen (word->text ()) == 0) {          unlabelled_words++;          if (applybox_debug > 4) {            tprintf              ("APPLY_BOXES: Unlabelled word blk:%d row:%d allrows:%d\n",              block_idx, row_idx, all_row_idx);          }        }        else {          if (word->gblob_list ()->length () != 1)            tprintf              ("APPLY_BOXES: FATALITY - MULTIBLOB Labelled word blk:%d row:%d allrows:%d\n",              block_idx, row_idx, all_row_idx);          ok_char_count++;          labelled_char_counts[unicharset_boxes.unichar_to_id(word->text ())]++;          row_ok = TRUE;        }      }      if ((applybox_debug > 4) && (!row_ok)) {        tprintf          ("APPLY_BOXES: Row with no labelled words blk:%d row:%d allrows:%d\n",          block_idx, row_idx, all_row_idx);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?