⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 applybox.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
      return 1;    }    if (STRING (chs_x_ht).contains (ch[0]) &&      ((new_word_box.top () >      baseline + (1 + applybox_error_band) * row->x_height ()) ||      (new_word_box.top () <    baseline + (1 - applybox_error_band) * row->x_height ()))) {      report_failed_box (boxfile_lineno, boxfile_charno, box, ch,        "FAILURE! x-ht char didn't have top near xht");      new_word->set_text ("");      return 1;    }    if (STRING (chs_non_ambig_bl).contains (ch[0]) &&      ((new_word_box.bottom () <      baseline - applybox_error_band * row->x_height ()) ||      (new_word_box.bottom () >    baseline + applybox_error_band * row->x_height ()))) {      report_failed_box (boxfile_lineno, boxfile_charno, box, ch,        "FAILURE! non ambig BL char didnt have bottom near baseline");      new_word->set_text ("");      return 1;    }    if (STRING (chs_odd_bot).contains (ch[0]) &&      (new_word_box.bottom () >    baseline + applybox_error_band * row->x_height ())) {      report_failed_box (boxfile_lineno, boxfile_charno, box, ch,        "FAILURE! Odd bottom char above baseline");      new_word->set_text ("");      return 1;    }    if (STRING (chs_desc).contains (ch[0]) &&      (new_word_box.bottom () >    baseline - applybox_error_band * row->x_height ())) {      report_failed_box (boxfile_lineno, boxfile_charno, box, ch,        "FAILURE! Descender doesn't descend");      new_word->set_text ("");      return 1;    }    return 0;  }  else {    report_failed_box (boxfile_lineno, boxfile_charno, box, ch,      "FAILURE! Couldn't find any blobs");    return 1;  }}/************************************************************************* * tidy_up() *   - report >1 block *   - sort the words in each row. *   - report any rows with no labelled words. *   - report any remaining unlabelled words *		- report total labelled words * *************************************************************************/void tidy_up(                         //             BLOCK_LIST *block_list,  //real blocks             INT16 &ok_char_count,             INT16 &ok_row_count,             INT16 &unlabelled_words,             INT16 *tgt_char_counts,             INT16 &rebalance_count,             char &min_char,             INT16 &min_samples,             INT16 &final_labelled_blob_count) {  BLOCK_IT block_it(block_list);  ROW_IT row_it;  ROW *row;  WERD_IT word_it;  WERD *word;  WERD *duplicate_word;  INT16 block_idx = 0;  INT16 row_idx;  INT16 all_row_idx = 0;  BOOL8 row_ok;  BOOL8 rebalance_needed = FALSE;                                 //No. of unique labelled samples  INT16 labelled_char_counts[128];  INT16 i;  char ch;  char prev_ch = '\0';  BOOL8 at_dupe_of_prev_word;  ROW *prev_row = NULL;  INT16 left;  INT16 prev_left = -1;  for (i = 0; i < 128; i++)    labelled_char_counts[i] = 0;  ok_char_count = 0;  ok_row_count = 0;  unlabelled_words = 0;  if ((applybox_debug > 4) && (block_it.length () != 1))    tprintf ("APPLY_BOXES: More than one block??\n");  for (block_it.mark_cycle_pt ();  !block_it.cycled_list (); block_it.forward ()) {    block_idx++;    row_idx = 0;    row_ok = FALSE;    row_it.set_to_list (block_it.data ()->row_list ());    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {      row_idx++;      all_row_idx++;      row = row_it.data ();      word_it.set_to_list (row->word_list ());      word_it.sort (word_comparator);      for (word_it.mark_cycle_pt ();      !word_it.cycled_list (); word_it.forward ()) {        word = word_it.data ();        if (strlen (word->text ()) == 0) {          unlabelled_words++;          if (applybox_debug > 4) {            tprintf              ("APPLY_BOXES: Unlabelled word blk:%d row:%d allrows:%d\n",              block_idx, row_idx, all_row_idx);          }        }        else {          if (word->gblob_list ()->length () != 1)            tprintf              ("APPLY_BOXES: FATALITY - MULTIBLOB Labelled word blk:%d row:%d allrows:%d\n",              block_idx, row_idx, all_row_idx);          ok_char_count++;          labelled_char_counts[*word->text ()]++;          row_ok = TRUE;        }      }      if ((applybox_debug > 4) && (!row_ok)) {        tprintf          ("APPLY_BOXES: Row with no labelled words blk:%d row:%d allrows:%d\n",          block_idx, row_idx, all_row_idx);      }      else        ok_row_count++;    }  }  min_samples = 9999;  for (i = 0; i < 128; i++) {    if (tgt_char_counts[i] > labelled_char_counts[i]) {      if (labelled_char_counts[i] <= 1) {        tprintf          ("APPLY_BOXES: FATALITY - %d labelled samples of \"%c\" - target is %d\n",          labelled_char_counts[i], (char) i, tgt_char_counts[i]);      }      else {        rebalance_needed = TRUE;        if (applybox_debug > 0)          tprintf            ("APPLY_BOXES: REBALANCE REQD \"%c\" - target of %d from %d labelled samples\n",            (char) i, tgt_char_counts[i], labelled_char_counts[i]);      }    }    if ((min_samples > labelled_char_counts[i]) && (tgt_char_counts[i] > 0)) {      min_samples = labelled_char_counts[i];      min_char = (char) i;    }  }  while (applybox_rebalance && rebalance_needed) {    block_it.set_to_list (block_list);    for (block_it.mark_cycle_pt ();    !block_it.cycled_list (); block_it.forward ()) {      row_it.set_to_list (block_it.data ()->row_list ());      for (row_it.mark_cycle_pt ();      !row_it.cycled_list (); row_it.forward ()) {        row = row_it.data ();        word_it.set_to_list (row->word_list ());        for (word_it.mark_cycle_pt ();        !word_it.cycled_list (); word_it.forward ()) {          word = word_it.data ();          left = word->bounding_box ().left ();          ch = *word->text ();          at_dupe_of_prev_word = ((row == prev_row) &&            (left = prev_left) &&            (ch == prev_ch));          if ((ch != '\0') &&            (labelled_char_counts[ch] > 1) &&            (tgt_char_counts[ch] > labelled_char_counts[ch]) &&          (!at_dupe_of_prev_word)) {            /* Duplicate the word to rebalance the labelled samples */            if (applybox_debug > 9) {              tprintf ("Duping \"%c\" from ", ch);              word->bounding_box ().print ();            }            duplicate_word = new WERD;            *duplicate_word = *word;            word_it.add_after_then_move (duplicate_word);            rebalance_count++;            labelled_char_counts[ch]++;          }          prev_row = row;          prev_left = left;          prev_ch = ch;        }      }    }    rebalance_needed = FALSE;    for (i = 0; i < 128; i++) {      if ((tgt_char_counts[i] > labelled_char_counts[i]) &&      (labelled_char_counts[i] > 1)) {        rebalance_needed = TRUE;        break;      }    }  }  /* Now final check - count labelled blobs */  final_labelled_blob_count = 0;  block_it.set_to_list (block_list);  for (block_it.mark_cycle_pt ();  !block_it.cycled_list (); block_it.forward ()) {    row_it.set_to_list (block_it.data ()->row_list ());    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {      row = row_it.data ();      word_it.set_to_list (row->word_list ());      word_it.sort (word_comparator);      for (word_it.mark_cycle_pt ();      !word_it.cycled_list (); word_it.forward ()) {        word = word_it.data ();        if ((strlen (word->text ()) == 1) &&          (word->gblob_list ()->length () == 1))          final_labelled_blob_count++;      }    }  }}void report_failed_box(INT16 boxfile_lineno,                       INT16 boxfile_charno,                       BOX box,                       char *box_ch,                       const char *err_msg) {  if (applybox_debug > 4)    tprintf ("APPLY_BOXES: boxfile %1d/%1d/%s ((%1d,%1d),(%1d,%1d)): %s\n",      boxfile_lineno,      boxfile_charno,      box_ch,      box.left (), box.bottom (), box.right (), box.top (), err_msg);}void apply_box_training(BLOCK_LIST *block_list) {  BLOCK_IT block_it(block_list);  ROW_IT row_it;  ROW *row;  WERD_IT word_it;  WERD *word;  WERD *bln_word;  WERD copy_outword;             // copy to denorm  PBLOB_IT blob_it;  DENORM denorm;  INT16 count = 0;  char ch[2];  ch[1] = '\0';  tprintf ("Generating training data\n");  for (block_it.mark_cycle_pt ();  !block_it.cycled_list (); block_it.forward ()) {    row_it.set_to_list (block_it.data ()->row_list ());    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {      row = row_it.data ();      word_it.set_to_list (row->word_list ());      for (word_it.mark_cycle_pt ();      !word_it.cycled_list (); word_it.forward ()) {        word = word_it.data ();        if ((strlen (word->text ()) == 1) &&        (word->gblob_list ()->length () == 1)) {          /* Here is a word with a single char label and a single blob so train on it */          bln_word =            make_bln_copy (word, row, row->x_height (), &denorm);          blob_it.set_to_list (bln_word->blob_list ());          ch[0] = *word->text ();          tess_training_tester (blob_it.data (),                                 //single blob            &denorm, TRUE,       //correct            ch,                  //correct ASCII char            1,                   //ASCII length            NULL);          copy_outword = *(bln_word);          copy_outword.baseline_denormalise (&denorm);          blob_it.set_to_list (copy_outword.blob_list ());          ch[0] = *word->text ();          delete bln_word;          count++;        }      }    }  }  tprintf ("Generated training data for %d blobs\n", count);}void apply_box_testing(BLOCK_LIST *block_list) {  BLOCK_IT block_it(block_list);  ROW_IT row_it;  ROW *row;  INT16 row_count = 0;  WERD_IT word_it;  WERD *word;  WERD *bln_word;  INT16 word_count = 0;  PBLOB_IT blob_it;  DENORM denorm;  INT16 count = 0;  char ch[2];  WERD *outword;                 //bln best choice  //segmentation  WERD_CHOICE *best_choice;      //tess output  WERD_CHOICE *raw_choice;       //top choice permuter                                 //detailed results  BLOB_CHOICE_LIST_CLIST blob_choices;  INT16 char_count = 0;  INT16 correct_count = 0;  INT16 err_count = 0;  INT16 rej_count = 0;  #ifndef SECURE_NAMES  WERDSTATS wordstats;           //As from newdiff  #endif  char tess_rej_str[3];  char tess_long_str[3];  ch[1] = '\0';  strcpy (tess_rej_str, "|A");  strcpy (tess_long_str, "|B");  for (block_it.mark_cycle_pt ();  !block_it.cycled_list (); block_it.forward ()) {    row_it.set_to_list (block_it.data ()->row_list ());    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {      row = row_it.data ();      row_count++;      word_count = 0;      word_it.set_to_list (row->word_list ());      for (word_it.mark_cycle_pt ();      !word_it.cycled_list (); word_it.forward ()) {        word = word_it.data ();        word_count++;        if ((strlen (word->text ()) == 1) &&          !STRING (applybox_test_exclusions).contains (*word->text ())        && (word->gblob_list ()->length () == 1)) {          /* Here is a word with a single char label and a single blob so test it */          bln_word =            make_bln_copy (word, row, row->x_height (), &denorm);          blob_it.set_to_list (bln_word->blob_list ());          ch[0] = *word->text ();          char_count++;          best_choice = tess_segment_pass1 (bln_word,            &denorm,            tess_default_matcher,            raw_choice,            &blob_choices, outword);          /*            Test for TESS screw up on word. Recog_word has already ensured that the            choice list, outword blob lists and best_choice string are the same            length. A TESS screw up is indicated by a blank filled or 0 length string.          */          if ((best_choice->string ().length () == 0) ||            (strspn (best_choice->string ().string (), " ") ==          best_choice->string ().length ())) {            rej_count++;            tprintf ("%d:%d: \"%s\" -> TESS FAILED\n",              row_count, word_count, ch);            #ifndef SECURE_NAMES            wordstats.word (tess_rej_str, 2, ch, 1);            #endif          }          else {            if ((best_choice->string ().length () !=              outword->blob_list ()->length ()) ||              (best_choice->string ().length () !=            blob_choices.length ())) {              tprintf                ("ASSERT FAIL String:\"%s\"; Strlen=%d; #Blobs=%d; #Choices=%d\n",                best_choice->string ().string (),                best_choice->string ().length (),                outword->blob_list ()->length (),                blob_choices.length ());            }            ASSERT_HOST (best_choice->string ().length () ==              outword->blob_list ()->length ());            ASSERT_HOST (best_choice->string ().length () ==              blob_choices.length ());            fix_quotes ((char *) best_choice->string ().string (),                                 //turn to double              outword, &blob_choices);            if (strcmp (best_choice->string ().string (), ch) != 0) {              err_count++;              tprintf ("%d:%d: \"%s\" -> \"%s\"\n",                row_count, word_count, ch,                best_choice->string ().string ());            }            else              correct_count++;            #ifndef SECURE_NAMES            if (best_choice->string ().length () > 2)              wordstats.word (tess_long_str, 2, ch, 1);            else              wordstats.word ((char *) best_choice->string ().                string (),                best_choice->string ().length (), ch,                1);            #endif          }          delete bln_word;          delete outword;          delete best_choice;          delete raw_choice;          blob_choices.deep_clear ();          count++;        }      }    }  }  #ifndef SECURE_NAMES  wordstats.print (1, 100.0);  wordstats.conf_matrix ();  tprintf ("Tested %d chars: %d correct; %d rejected by tess; %d errs\n",    char_count, correct_count, rej_count, err_count);  #endif}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -