⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 docqual.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 4 页
字号:
  delete_word(tessword);  //get rid of it  initial_it.set_to_list (init_word->blob_list ());  init_blobs_left = initial_it.length ();  outword_it.set_to_list (word->outword->blob_list ());  for (outword_it.mark_cycle_pt ();  !outword_it.cycled_list (); outword_it.forward ()) {    out_box = outword_it.data ()->bounding_box ();    /* Skip any initial blobs LEFT of current outword blob */    while (!initial_it.at_last () &&    (initial_it.data ()->bounding_box ().left () < out_box.left ())) {      initial_it.forward ();      init_blobs_left--;    }    /* See if current outword blob matches any initial blob with the same left      coord. (Normally only one but possibly more - in unknown order) */    i = 0;    matched = FALSE;    do {      test_blob = initial_it.data_relative (i++);      matched = crude_match_blobs (test_blob, outword_it.data ());      if (matched &&        (word->reject_map[j].accept_if_good_quality ()) &&        (docqual_excuse_outline_errs ||        (count_outline_errs (word->best_choice->string ()[j],        outword_it.data ()->out_list ()->        length ()) == 0)))        word->reject_map[j].setrej_quality_accept ();    }    while (!matched &&      (init_blobs_left - i > 0) &&      (i < 129) &&      !initial_it.at_last () &&      test_blob->bounding_box ().left () == out_box.left ());    j++;  }  delete init_word;}void print_boxes(WERD *word) {   PBLOB_IT it;  BOX box;  it.set_to_list (word->blob_list ());  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {    box = it.data ()->bounding_box ();    box.print ();  }}INT16 count_outline_errs(char c, INT16 outline_count) {   int expected_outline_count;  if (STRING (outlines_odd).contains (c))    return 0;                    //Dont use this char  else if (STRING (outlines_2).contains (c))    expected_outline_count = 2;  else    expected_outline_count = 1;  return abs (outline_count - expected_outline_count);}void quality_based_rejection(PAGE_RES_IT &page_res_it,                             BOOL8 good_quality_doc) {  if ((tessedit_good_quality_unrej && good_quality_doc))    unrej_good_quality_words(page_res_it);   doc_and_block_rejection(page_res_it, good_quality_doc);   page_res_it.restart_page ();  while (page_res_it.word () != NULL) {    insert_rej_cblobs (page_res_it.word ());    page_res_it.forward ();  }  if (unlv_tilde_crunching) {    tilde_crunch(page_res_it);     tilde_delete(page_res_it);   }}/************************************************************************* * unrej_good_quality_words() * Accept potential rejects in words which pass the following checks: *    - Contains a potential reject *    - Word looks like a sensible alpha word. *    - Word segmentation is the same as the original image *		- All characters have the expected number of outlines * NOTE - the rejection counts are recalculated after unrejection *      - CANT do it in a single pass without a bit of fiddling *		- keep it simple but inefficient *************************************************************************/void unrej_good_quality_words(  //unreject potential                              PAGE_RES_IT &page_res_it) {  WERD_RES *word;  ROW_RES *current_row;  BLOCK_RES *current_block;  int i;  page_res_it.restart_page ();  while (page_res_it.word () != NULL) {    check_debug_pt (page_res_it.word (), 100);    if (bland_unrej) {      word = page_res_it.word ();      for (i = 0; i < word->reject_map.length (); i++) {        if (word->reject_map[i].accept_if_good_quality ())          word->reject_map[i].setrej_quality_accept ();      }      page_res_it.forward ();    }    else if ((page_res_it.row ()->char_count > 0) &&      ((page_res_it.row ()->rej_count /      (float) page_res_it.row ()->char_count) <=    quality_rowrej_pc)) {      word = page_res_it.word ();      if (word->reject_map.quality_recoverable_rejects () &&        (tessedit_unrej_any_wd ||        acceptable_word_string (word->best_choice->string ().string ())      != AC_UNACCEPTABLE)) {        unrej_good_chs (word, page_res_it.row ()->row);      }      page_res_it.forward ();    }    else {      /* Skip to end of dodgy row */      current_row = page_res_it.row ();      while ((page_res_it.word () != NULL) &&        (page_res_it.row () == current_row))        page_res_it.forward ();    }    check_debug_pt (page_res_it.word (), 110);  }  page_res_it.restart_page ();  page_res_it.page_res->char_count = 0;  page_res_it.page_res->rej_count = 0;  current_block = NULL;  current_row = NULL;  while (page_res_it.word () != NULL) {    if (current_block != page_res_it.block ()) {      current_block = page_res_it.block ();      current_block->char_count = 0;      current_block->rej_count = 0;    }    if (current_row != page_res_it.row ()) {      current_row = page_res_it.row ();      current_row->char_count = 0;      current_row->rej_count = 0;      current_row->whole_word_rej_count = 0;    }    page_res_it.rej_stat_word ();    page_res_it.forward ();  }}/************************************************************************* * doc_and_block_rejection() * * If the page has too many rejects - reject all of it. * If any block has too many rejects - reject all words in the block *************************************************************************/void doc_and_block_rejection(  //reject big chunks                             PAGE_RES_IT &page_res_it,                             BOOL8 good_quality_doc) {  INT16 block_no = 0;  INT16 row_no = 0;  BLOCK_RES *current_block;  ROW_RES *current_row;  BOOL8 rej_word;  BOOL8 prev_word_rejected;  INT16 char_quality;  INT16 accepted_char_quality;  if ((page_res_it.page_res->rej_count * 100.0 /  page_res_it.page_res->char_count) > tessedit_reject_doc_percent) {    reject_whole_page(page_res_it);     #ifndef SECURE_NAMES    if (tessedit_debug_doc_rejection) {      tprintf ("REJECT ALL #chars: %d #Rejects: %d; \n",        page_res_it.page_res->char_count,        page_res_it.page_res->rej_count);    }    #endif  }  else {    #ifndef SECURE_NAMES    if (tessedit_debug_doc_rejection)      tprintf ("NO PAGE REJECTION #chars: %d  # Rejects: %d; \n",        page_res_it.page_res->char_count,        page_res_it.page_res->rej_count);    #endif    /* Walk blocks testing for block rejection */    page_res_it.restart_page ();    while (page_res_it.word () != NULL) {      current_block = page_res_it.block ();      if (current_block->block->text_region () != NULL)        block_no = current_block->block->text_region ()->id_no ();      else        block_no = -1;      if ((page_res_it.block ()->char_count > 0) &&        ((page_res_it.block ()->rej_count * 100.0 /        page_res_it.block ()->char_count) >      tessedit_reject_block_percent)) {        #ifndef SECURE_NAMES        if (tessedit_debug_block_rejection)          tprintf ("REJECTING BLOCK %d  #chars: %d;  #Rejects: %d\n",            block_no,            page_res_it.block ()->char_count,            page_res_it.block ()->rej_count);        #endif        prev_word_rejected = FALSE;        while ((page_res_it.word () != NULL) &&        (page_res_it.block () == current_block)) {          if (tessedit_preserve_blk_rej_perfect_wds) {            rej_word =              (page_res_it.word ()->reject_map.reject_count () > 0)              || (page_res_it.word ()->reject_map.length () <              tessedit_preserve_min_wd_len);            if (rej_word && tessedit_dont_blkrej_good_wds              && !(page_res_it.word ()->reject_map.length () <              tessedit_preserve_min_wd_len)              &&              (acceptable_word_string              (page_res_it.word ()->best_choice->string ().            string ()) != AC_UNACCEPTABLE)) {              word_char_quality (page_res_it.word (),                page_res_it.row ()->row,                &char_quality,                &accepted_char_quality);              rej_word = char_quality !=                page_res_it.word ()->reject_map.length ();            }          }          else            rej_word = TRUE;          if (rej_word) {            /*              Reject spacing if both current and prev words are rejected.              NOTE - this is NOT restricted to FUZZY spaces. - When tried this generated              more space errors.            */            if (tessedit_use_reject_spaces &&              prev_word_rejected &&              (page_res_it.prev_row () == page_res_it.row ()) &&              (page_res_it.word ()->word->space () == 1))              page_res_it.word ()->reject_spaces = TRUE;            page_res_it.word ()->reject_map.rej_word_block_rej ();          }          prev_word_rejected = rej_word;          page_res_it.forward ();        }      }      else {        #ifndef SECURE_NAMES        if (tessedit_debug_block_rejection)          tprintf            ("NOT REJECTING BLOCK %d #chars: %d  # Rejects: %d; \n",            block_no, page_res_it.block ()->char_count,            page_res_it.block ()->rej_count);        #endif        /* Walk rows in block testing for row rejection */        row_no = 0;        while ((page_res_it.word () != NULL) &&        (page_res_it.block () == current_block)) {          current_row = page_res_it.row ();          row_no++;          /* Reject whole row if:            fraction of chars on row which are rejected exceed a limit AND            fraction rejects which occur in WHOLE WERD rejects is LESS THAN a limit          */          if ((page_res_it.row ()->char_count > 0) &&            ((page_res_it.row ()->rej_count * 100.0 /            page_res_it.row ()->char_count) >            tessedit_reject_row_percent) &&            ((page_res_it.row ()->whole_word_rej_count * 100.0 /            page_res_it.row ()->rej_count) <          tessedit_whole_wd_rej_row_percent)) {            #ifndef SECURE_NAMES            if (tessedit_debug_block_rejection)              tprintf                ("REJECTING ROW %d  #chars: %d;  #Rejects: %d\n",                row_no, page_res_it.row ()->char_count,                page_res_it.row ()->rej_count);            #endif            prev_word_rejected = FALSE;            while ((page_res_it.word () != NULL) &&            (page_res_it.row () == current_row)) {              /* Preserve words on good docs unless they are mostly rejected*/              if (!tessedit_row_rej_good_docs && good_quality_doc) {                rej_word =                  page_res_it.word ()->reject_map.                  reject_count () /                  (float) page_res_it.word ()->reject_map.                  length () > tessedit_good_doc_still_rowrej_wd;              }              /* Preserve perfect words anyway */              else if (tessedit_preserve_row_rej_perfect_wds) {                rej_word =                  (page_res_it.word ()->reject_map.                  reject_count () > 0)                  || (page_res_it.word ()->reject_map.                  length () < tessedit_preserve_min_wd_len);                if (rej_word && tessedit_dont_rowrej_good_wds                  && !(page_res_it.word ()->reject_map.                  length () <                  tessedit_preserve_min_wd_len)                  &&                  (acceptable_word_string                  (page_res_it.word ()->best_choice->                string ().string ()) != AC_UNACCEPTABLE)) {                  word_char_quality (page_res_it.word (),                    page_res_it.row ()->row,                    &char_quality,                    &accepted_char_quality);                  rej_word = char_quality !=                    page_res_it.word ()->reject_map.length ();                }              }              else                rej_word = TRUE;              if (rej_word) {                /*                  Reject spacing if both current and prev words are rejected.                  NOTE - this is NOT restricted to FUZZY spaces. - When tried this generated                  more space errors.                */                if (tessedit_use_reject_spaces &&                  prev_word_rejected &&                  (page_res_it.prev_row () ==                  page_res_it.row ())                  && (page_res_it.word ()->word->space () ==                  1))                  page_res_it.word ()->reject_spaces = TRUE;                page_res_it.word ()->reject_map.                  rej_word_row_rej();               }              prev_word_rejected = rej_word;              page_res_it.forward ();            }          }          else {            #ifndef SECURE_NAMES            if (tessedit_debug_block_rejection)              tprintf                ("NOT REJECTING ROW %d #chars: %d  # Rejects: %d; \n",                row_no, page_res_it.row ()->char_count,                page_res_it.row ()->rej_count);            #endif            while ((page_res_it.word () != NULL) &&              (page_res_it.row () == current_row))              page_res_it.forward ();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -