⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tordmain.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
  TO_BLOCK *block;               //created block  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();  block_it.forward ()) {    block = block_it.data ();    block->line_size = filter_noise_blobs (&block->blobs,      &block->noise_blobs,      &block->small_blobs,      &block->large_blobs);    block->line_spacing =      block->line_size * (textord_merge_desc + textord_merge_x +      textord_merge_asc +      textord_merge_asc) / textord_merge_x;    block->line_size *= textord_min_linesize;    block->max_blob_size = block->line_size * textord_excess_blobsize;#ifndef GRAPHICS_DISABLED    if (textord_show_blobs && testing_on) {      if (to_win == NO_WINDOW)        create_to_win(page_tr);      plot_blob_list (to_win, &block->noise_blobs, CORAL, BLUE);      plot_blob_list (to_win, &block->small_blobs, GOLDENROD, YELLOW);      plot_blob_list (to_win, &block->large_blobs, DARK_GREEN, YELLOW);      plot_blob_list (to_win, &block->blobs, WHITE, BROWN);    }    if (textord_show_boxes && testing_on) {      if (to_win == NO_WINDOW)        create_to_win(page_tr);      plot_box_list (to_win, &block->noise_blobs, WHITE);      plot_box_list (to_win, &block->small_blobs, WHITE);      plot_box_list (to_win, &block->large_blobs, WHITE);      plot_box_list (to_win, &block->blobs, WHITE);    }#endif  }}/********************************************************************** * filter_noise_blobs * * Move small blobs to a separate list. **********************************************************************/float filter_noise_blobs(                            //separate noise                         BLOBNBOX_LIST *src_list,    //origonal list                         BLOBNBOX_LIST *noise_list,  //noise list                         BLOBNBOX_LIST *small_list,  //small blobs                         BLOBNBOX_LIST *large_list   //large blobs                        ) {  INT16 height;                  //height of blob  INT16 width;                   //of blob  BLOBNBOX_IT src_it = src_list; //iterators  BLOBNBOX_IT noise_it = noise_list;  BLOBNBOX_IT small_it = small_list;  BLOBNBOX_IT large_it = large_list;  STATS size_stats (0, MAX_NEAREST_DIST);  //blob heights  if (textord_new_initial_xheight)    return filter_noise_blobs2 (src_list, noise_list, small_list, large_list);  float min_y;                   //size limits  float max_y;  float max_x;  for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {    if (src_it.data ()->bounding_box ().height () < textord_max_noise_size)      noise_it.add_after_then_move (src_it.extract ());  }  for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {    size_stats.add (src_it.data ()->bounding_box ().height (), 1);  }  min_y = floor (size_stats.ile (textord_blob_size_smallile / 100.0));  max_y = ceil (size_stats.ile (textord_blob_size_bigile / 100.0));  max_x = ceil (size_stats.ile (0.5) * textord_width_limit);  for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {    height = src_it.data ()->bounding_box ().height ();    width = src_it.data ()->bounding_box ().width ();    if (height < min_y)      small_it.add_after_then_move (src_it.extract ());    else if (height > max_y || width > max_x)      large_it.add_after_then_move (src_it.extract ());  }  return size_stats.ile (textord_initialx_ile);}/********************************************************************** * filter_noise_blobs2 * * Move small blobs to a separate list. **********************************************************************/float filter_noise_blobs2(                            //separate noise                          BLOBNBOX_LIST *src_list,    //origonal list                          BLOBNBOX_LIST *noise_list,  //noise list                          BLOBNBOX_LIST *small_list,  //small blobs                          BLOBNBOX_LIST *large_list   //large blobs                         ) {  INT16 height;                  //height of blob  INT16 width;                   //of blob  BLOBNBOX *blob;                //current blob  float initial_x;               //first guess  BLOBNBOX_IT src_it = src_list; //iterators  BLOBNBOX_IT noise_it = noise_list;  BLOBNBOX_IT small_it = small_list;  BLOBNBOX_IT large_it = large_list;  STATS size_stats (0, MAX_NEAREST_DIST);  //blob heights  float min_y;                   //size limits  float max_y;  float max_x;  float max_height;              //of good blobs  for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {    blob = src_it.data ();    if (blob->bounding_box ().height () < textord_max_noise_size)      noise_it.add_after_then_move (src_it.extract ());    else if (blob->enclosed_area () >= blob->bounding_box ().height ()      * blob->bounding_box ().width () * textord_noise_area_ratio)      small_it.add_after_then_move (src_it.extract ());  }  for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {    size_stats.add (src_it.data ()->bounding_box ().height (), 1);  }  initial_x = size_stats.ile (textord_initialx_ile);  max_y =    ceil (initial_x *    (textord_merge_desc + textord_merge_x +    2 * textord_merge_asc) / textord_merge_x);  min_y = floor (initial_x / 2);  max_x = ceil (initial_x * textord_width_limit);  small_it.move_to_first ();  for (small_it.mark_cycle_pt (); !small_it.cycled_list ();  small_it.forward ()) {    height = small_it.data ()->bounding_box ().height ();    if (height >= min_y)      large_it.add_after_then_move (small_it.extract ());  }  size_stats.clear ();  for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {    height = src_it.data ()->bounding_box ().height ();    width = src_it.data ()->bounding_box ().width ();    if (height < min_y)      small_it.add_after_then_move (src_it.extract ());    else if (height > max_y || width > max_x)      large_it.add_after_then_move (src_it.extract ());    else      size_stats.add (height, 1);  }  max_height = size_stats.ile (textord_initialasc_ile);  //      printf("max_y=%g, min_y=%g, initial_x=%g, max_height=%g,",  //              max_y,min_y,initial_x,max_height);  max_height *= textord_merge_x / (textord_merge_x + textord_merge_asc);  if (max_height > initial_x)    initial_x = max_height;  //      printf(" ret=%g\n",initial_x);  return initial_x;}/********************************************************************** * textord_page * * Textord the list of blobs and return a list of proper blocks. **********************************************************************/void textord_page(                             //make rows & words                  ICOORD page_tr,              //top right                  BLOCK_LIST *blocks,          //block list                  TO_BLOCK_LIST *land_blocks,  //rotated for landscape                  TO_BLOCK_LIST *port_blocks   //output list                 ) {  float gradient;                //global skew  set_global_loc_code(LOC_TEXT_ORD_ROWS);  gradient = make_rows (page_tr, blocks, land_blocks, port_blocks);  if (global_monitor != NULL) {    global_monitor->ocr_alive = TRUE;    global_monitor->progress = 20;  }  set_global_loc_code(LOC_TEXT_ORD_WORDS);  make_words(page_tr, gradient, blocks, land_blocks, port_blocks);  if (global_monitor != NULL) {    global_monitor->ocr_alive = TRUE;    global_monitor->progress = 30;  }  cleanup_blocks(blocks);  //remove empties#ifndef GRAPHICS_DISABLED  close_to_win();#endif  if (textord_exit_after && !interactive_mode)    exit (0);}/********************************************************************** * cleanup_blocks * * Delete empty blocks, rows from the page. **********************************************************************/void cleanup_blocks(                    //remove empties                    BLOCK_LIST *blocks  //list                   ) {  BLOCK_IT block_it = blocks;    //iterator  ROW_IT row_it;                 //row iterator  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();  block_it.forward ()) {    row_it.set_to_list (block_it.data ()->row_list ());    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {      if (textord_noise_rejrows        && !row_it.data ()->word_list ()->empty ()        && clean_noise_from_row (row_it.data ())        || row_it.data ()->word_list ()->empty ())        delete row_it.extract ();//lose empty row      else {        if (textord_noise_rejwords)          clean_noise_from_words (row_it.data ());        if (textord_blshift_maxshift >= 0)          tweak_row_baseline (row_it.data ());      }    }    if (block_it.data ()->row_list ()->empty ()) {      delete block_it.extract ();//lose empty block    }  }}/********************************************************************** * clean_noise_from_row * * Move blobs of words from rows of garbage into the reject blobs list. **********************************************************************/BOOL8 clean_noise_from_row(          //remove empties                           ROW *row  //row to clean                          ) {  BOOL8 testing_on;  BOX blob_box;                  //bounding box  C_BLOB *blob;                  //current blob  C_OUTLINE *outline;            //current outline  WERD *word;                    //current word  INT32 blob_size;               //biggest size  INT32 trans_count = 0;         //no of transitions  INT32 trans_threshold;         //noise tolerance  INT32 dot_count;               //small objects  INT32 norm_count;              //normal objects  INT32 super_norm_count;        //real char-like                                 //words of row  WERD_IT word_it = row->word_list ();  C_BLOB_IT blob_it;             //blob iterator  C_OUTLINE_IT out_it;           //outline iterator  if (textord_test_y > row->base_line (textord_test_x)    && textord_show_blobs    && textord_test_y < row->base_line (textord_test_x) + row->x_height ())    testing_on = TRUE;  else    testing_on = FALSE;  dot_count = 0;  norm_count = 0;  super_norm_count = 0;  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {    word = word_it.data ();      //current word                                 //blobs in word    blob_it.set_to_list (word->cblob_list ());    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();    blob_it.forward ()) {      blob = blob_it.data ();      if (!word->flag (W_DONT_CHOP)) {                                 //get outlines        out_it.set_to_list (blob->out_list ());        for (out_it.mark_cycle_pt (); !out_it.cycled_list ();        out_it.forward ()) {          outline = out_it.data ();          blob_box = outline->bounding_box ();          blob_size =            blob_box.width () >            blob_box.height ()? blob_box.width () : blob_box.            height();          if (blob_size < textord_noise_sizelimit * row->x_height ())            dot_count++;         //count smal outlines          if (!outline->child ()->empty ()            && blob_box.height () <            (1 + textord_noise_syfract) * row->x_height ()            && blob_box.height () >            (1 - textord_noise_syfract) * row->x_height ()            && blob_box.width () <            (1 + textord_noise_sxfract) * row->x_height ()            && blob_box.width () >            (1 - textord_noise_sxfract) * row->x_height ())            super_norm_count++;  //count smal outlines        }      }      else        super_norm_count++;      blob_box = blob->bounding_box ();      blob_size =        blob_box.width () >        blob_box.height ()? blob_box.width () : blob_box.height ();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -