📄 tordmain.cpp
字号:
TO_BLOCK *block; //created block for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { block = block_it.data (); block->line_size = filter_noise_blobs (&block->blobs, &block->noise_blobs, &block->small_blobs, &block->large_blobs); block->line_spacing = block->line_size * (textord_merge_desc + textord_merge_x + textord_merge_asc + textord_merge_asc) / textord_merge_x; block->line_size *= textord_min_linesize; block->max_blob_size = block->line_size * textord_excess_blobsize;#ifndef GRAPHICS_DISABLED if (textord_show_blobs && testing_on) { if (to_win == NO_WINDOW) create_to_win(page_tr); plot_blob_list (to_win, &block->noise_blobs, CORAL, BLUE); plot_blob_list (to_win, &block->small_blobs, GOLDENROD, YELLOW); plot_blob_list (to_win, &block->large_blobs, DARK_GREEN, YELLOW); plot_blob_list (to_win, &block->blobs, WHITE, BROWN); } if (textord_show_boxes && testing_on) { if (to_win == NO_WINDOW) create_to_win(page_tr); plot_box_list (to_win, &block->noise_blobs, WHITE); plot_box_list (to_win, &block->small_blobs, WHITE); plot_box_list (to_win, &block->large_blobs, WHITE); plot_box_list (to_win, &block->blobs, WHITE); }#endif }}/********************************************************************** * filter_noise_blobs * * Move small blobs to a separate list. **********************************************************************/float filter_noise_blobs( //separate noise BLOBNBOX_LIST *src_list, //origonal list BLOBNBOX_LIST *noise_list, //noise list BLOBNBOX_LIST *small_list, //small blobs BLOBNBOX_LIST *large_list //large blobs ) { INT16 height; //height of blob INT16 width; //of blob BLOBNBOX_IT src_it = src_list; //iterators BLOBNBOX_IT noise_it = noise_list; BLOBNBOX_IT small_it = small_list; BLOBNBOX_IT large_it = large_list; STATS size_stats (0, MAX_NEAREST_DIST); //blob heights if (textord_new_initial_xheight) return filter_noise_blobs2 (src_list, noise_list, small_list, large_list); float min_y; //size limits float max_y; float max_x; for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) { if (src_it.data ()->bounding_box ().height () < textord_max_noise_size) noise_it.add_after_then_move (src_it.extract ()); } for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) { size_stats.add (src_it.data ()->bounding_box ().height (), 1); } min_y = floor (size_stats.ile (textord_blob_size_smallile / 100.0)); max_y = ceil (size_stats.ile (textord_blob_size_bigile / 100.0)); max_x = ceil (size_stats.ile (0.5) * textord_width_limit); for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) { height = src_it.data ()->bounding_box ().height (); width = src_it.data ()->bounding_box ().width (); if (height < min_y) small_it.add_after_then_move (src_it.extract ()); else if (height > max_y || width > max_x) large_it.add_after_then_move (src_it.extract ()); } return size_stats.ile (textord_initialx_ile);}/********************************************************************** * filter_noise_blobs2 * * Move small blobs to a separate list. **********************************************************************/float filter_noise_blobs2( //separate noise BLOBNBOX_LIST *src_list, //origonal list BLOBNBOX_LIST *noise_list, //noise list BLOBNBOX_LIST *small_list, //small blobs BLOBNBOX_LIST *large_list //large blobs ) { INT16 height; //height of blob INT16 width; //of blob BLOBNBOX *blob; //current blob float initial_x; //first guess BLOBNBOX_IT src_it = src_list; //iterators BLOBNBOX_IT noise_it = noise_list; BLOBNBOX_IT small_it = small_list; BLOBNBOX_IT large_it = large_list; STATS size_stats (0, MAX_NEAREST_DIST); //blob heights float min_y; //size limits float max_y; float max_x; float max_height; //of good blobs for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) { blob = src_it.data (); if (blob->bounding_box ().height () < textord_max_noise_size) noise_it.add_after_then_move (src_it.extract ()); else if (blob->enclosed_area () >= blob->bounding_box ().height () * blob->bounding_box ().width () * textord_noise_area_ratio) small_it.add_after_then_move (src_it.extract ()); } for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) { size_stats.add (src_it.data ()->bounding_box ().height (), 1); } initial_x = size_stats.ile (textord_initialx_ile); max_y = ceil (initial_x * (textord_merge_desc + textord_merge_x + 2 * textord_merge_asc) / textord_merge_x); min_y = floor (initial_x / 2); max_x = ceil (initial_x * textord_width_limit); small_it.move_to_first (); for (small_it.mark_cycle_pt (); !small_it.cycled_list (); small_it.forward ()) { height = small_it.data ()->bounding_box ().height (); if (height >= min_y) large_it.add_after_then_move (small_it.extract ()); } size_stats.clear (); for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) { height = src_it.data ()->bounding_box ().height (); width = src_it.data ()->bounding_box ().width (); if (height < min_y) small_it.add_after_then_move (src_it.extract ()); else if (height > max_y || width > max_x) large_it.add_after_then_move (src_it.extract ()); else size_stats.add (height, 1); } max_height = size_stats.ile (textord_initialasc_ile); // printf("max_y=%g, min_y=%g, initial_x=%g, max_height=%g,", // max_y,min_y,initial_x,max_height); max_height *= textord_merge_x / (textord_merge_x + textord_merge_asc); if (max_height > initial_x) initial_x = max_height; // printf(" ret=%g\n",initial_x); return initial_x;}/********************************************************************** * textord_page * * Textord the list of blobs and return a list of proper blocks. **********************************************************************/void textord_page( //make rows & words ICOORD page_tr, //top right BLOCK_LIST *blocks, //block list TO_BLOCK_LIST *land_blocks, //rotated for landscape TO_BLOCK_LIST *port_blocks //output list ) { float gradient; //global skew set_global_loc_code(LOC_TEXT_ORD_ROWS); gradient = make_rows (page_tr, blocks, land_blocks, port_blocks); if (global_monitor != NULL) { global_monitor->ocr_alive = TRUE; global_monitor->progress = 20; } set_global_loc_code(LOC_TEXT_ORD_WORDS); make_words(page_tr, gradient, blocks, land_blocks, port_blocks); if (global_monitor != NULL) { global_monitor->ocr_alive = TRUE; global_monitor->progress = 30; } cleanup_blocks(blocks); //remove empties#ifndef GRAPHICS_DISABLED close_to_win();#endif if (textord_exit_after && !interactive_mode) exit (0);}/********************************************************************** * cleanup_blocks * * Delete empty blocks, rows from the page. **********************************************************************/void cleanup_blocks( //remove empties BLOCK_LIST *blocks //list ) { BLOCK_IT block_it = blocks; //iterator ROW_IT row_it; //row iterator for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { row_it.set_to_list (block_it.data ()->row_list ()); for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { if (textord_noise_rejrows && !row_it.data ()->word_list ()->empty () && clean_noise_from_row (row_it.data ()) || row_it.data ()->word_list ()->empty ()) delete row_it.extract ();//lose empty row else { if (textord_noise_rejwords) clean_noise_from_words (row_it.data ()); if (textord_blshift_maxshift >= 0) tweak_row_baseline (row_it.data ()); } } if (block_it.data ()->row_list ()->empty ()) { delete block_it.extract ();//lose empty block } }}/********************************************************************** * clean_noise_from_row * * Move blobs of words from rows of garbage into the reject blobs list. **********************************************************************/BOOL8 clean_noise_from_row( //remove empties ROW *row //row to clean ) { BOOL8 testing_on; BOX blob_box; //bounding box C_BLOB *blob; //current blob C_OUTLINE *outline; //current outline WERD *word; //current word INT32 blob_size; //biggest size INT32 trans_count = 0; //no of transitions INT32 trans_threshold; //noise tolerance INT32 dot_count; //small objects INT32 norm_count; //normal objects INT32 super_norm_count; //real char-like //words of row WERD_IT word_it = row->word_list (); C_BLOB_IT blob_it; //blob iterator C_OUTLINE_IT out_it; //outline iterator if (textord_test_y > row->base_line (textord_test_x) && textord_show_blobs && textord_test_y < row->base_line (textord_test_x) + row->x_height ()) testing_on = TRUE; else testing_on = FALSE; dot_count = 0; norm_count = 0; super_norm_count = 0; for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { word = word_it.data (); //current word //blobs in word blob_it.set_to_list (word->cblob_list ()); for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { blob = blob_it.data (); if (!word->flag (W_DONT_CHOP)) { //get outlines out_it.set_to_list (blob->out_list ()); for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { outline = out_it.data (); blob_box = outline->bounding_box (); blob_size = blob_box.width () > blob_box.height ()? blob_box.width () : blob_box. height(); if (blob_size < textord_noise_sizelimit * row->x_height ()) dot_count++; //count smal outlines if (!outline->child ()->empty () && blob_box.height () < (1 + textord_noise_syfract) * row->x_height () && blob_box.height () > (1 - textord_noise_syfract) * row->x_height () && blob_box.width () < (1 + textord_noise_sxfract) * row->x_height () && blob_box.width () > (1 - textord_noise_sxfract) * row->x_height ()) super_norm_count++; //count smal outlines } } else super_norm_count++; blob_box = blob->bounding_box (); blob_size = blob_box.width () > blob_box.height ()? blob_box.width () : blob_box.height ();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -