makerow.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 1,682 行 · 第 1/5 页
CPP
1,682 行
// Return true if the dot looks like it is part of the i.// Doesn't work for any other diacritical.static bool dot_of_i(BLOBNBOX* dot, BLOBNBOX* i, TO_ROW* row) { const TBOX& ibox = i->bounding_box(); const TBOX& dotbox = dot->bounding_box(); // Must overlap horizontally by enough and be high enough. int overlap = MIN(dotbox.right(), ibox.right()) - MAX(dotbox.left(), ibox.left()); if (ibox.height() <= 2 * dotbox.height() || (overlap * 2 < ibox.width() && overlap < dotbox.width())) return false; // If the i is tall and thin then it is good. if (ibox.height() > ibox.width() * 2) return true; // The i or ! must be tall and thin. // It might still be tall and thin, but it might be joined to something. // So search the outline for a piece of large height close to the edges // of the dot. const double kHeightFraction = 0.6; double target_height = MIN(dotbox.bottom(), ibox.top()); target_height -= row->line_m()*dotbox.left() + row->line_c(); target_height *= kHeightFraction; int left_min = dotbox.left() - dotbox.width(); int middle = (dotbox.left() + dotbox.right())/2; int right_max = dotbox.right() + dotbox.width(); int left_miny = 0; int left_maxy = 0; int right_miny = 0; int right_maxy = 0; bool found_left = false; bool found_right = false; bool in_left = false; bool in_right = false; C_BLOB* blob = i->cblob(); C_OUTLINE_IT o_it = blob->out_list(); for (o_it.mark_cycle_pt(); !o_it.cycled_list(); o_it.forward()) { C_OUTLINE* outline = o_it.data(); int length = outline->pathlength(); ICOORD pos = outline->start_pos(); for (int step = 0; step < length; pos += outline->step(step++)) { int x = pos.x(); int y = pos.y(); if (x >= left_min && x < middle && !found_left) { // We are in the left part so find min and max y. if (in_left) { if (y > left_maxy) left_maxy = y; if (y < left_miny) left_miny = y; } else { left_maxy = left_miny = y; in_left = true; } } else if (in_left) { // We just left the left so look for size. if (left_maxy - left_miny > target_height) { if (found_right) return true; found_left = true; } in_left = false; } if (x <= right_max && x > middle && !found_right) { // We are in the right part so find min and max y. if (in_right) { if (y > right_maxy) right_maxy = y; if (y < right_miny) right_miny = y; } else { right_maxy = right_miny = y; in_right = true; } } else if (in_right) { // We just left the right so look for size. if (right_maxy - right_miny > target_height) { if (found_left) return true; found_right = true; } in_right = false; } } } return false;}static void vigorous_noise_removal(TO_BLOCK* block) { TO_ROW_IT row_it = block->get_rows (); for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { TO_ROW* row = row_it.data(); BLOBNBOX_IT b_it = row->blob_list(); // Estimate the xheight on the row. int max_height = 0; for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOBNBOX* blob = b_it.data(); if (blob->bounding_box().height() > max_height) max_height = blob->bounding_box().height(); } STATS hstats(0, max_height + 1); for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOBNBOX* blob = b_it.data(); int height = blob->bounding_box().height(); if (height >= kMinSize) hstats.add(blob->bounding_box().height(), 1); } float xheight = hstats.median(); // Delete small objects. BLOBNBOX* prev = NULL; for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { BLOBNBOX* blob = b_it.data(); const TBOX& box = blob->bounding_box(); if (box.height() < kNoiseSize * xheight) { // Small so delete unless it looks like an i dot. if (prev != NULL) { if (dot_of_i(blob, prev, row)) continue; // Looks OK. } if (!b_it.at_last()) { BLOBNBOX* next = b_it.data_relative(1); if (dot_of_i(blob, next, row)) continue; // Looks OK. } // It might be noise so get rid of it. if (blob->blob() != NULL) delete blob->blob(); if (blob->cblob() != NULL) delete blob->cblob(); delete b_it.extract(); } else { prev = blob; } } }}/********************************************************************** * cleanup_rows * * Remove overlapping rows and fit all the blobs to what's left. **********************************************************************/void cleanup_rows( //find lines ICOORD page_tr, //top right TO_BLOCK *block, //block to do float gradient, //gradient to fit FCOORD rotation, //for drawing inT32 block_edge, //edge of block BOOL8 testing_on //correct orientation ) { //iterators BLOBNBOX_IT blob_it = &block->blobs; TO_ROW_IT row_it = block->get_rows ();#ifndef GRAPHICS_DISABLED if (textord_show_parallel_rows && testing_on) { if (to_win == NULL) create_to_win(page_tr); }#endif //get row coords fit_parallel_rows(block, gradient, rotation, block_edge, textord_show_parallel_rows &&testing_on); delete_non_dropout_rows(block, gradient, rotation, block_edge, textord_show_parallel_rows &&testing_on); expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on); blob_it.set_to_list (&block->blobs); row_it.set_to_list (block->get_rows ()); for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) blob_it.add_list_after (row_it.data ()->blob_list ()); //give blobs back assign_blobs_to_rows (block, &gradient, 1, FALSE, FALSE, FALSE); //now new rows must be genuine blob_it.set_to_list (&block->blobs); blob_it.add_list_after (&block->large_blobs); assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE); //safe to use big ones now blob_it.set_to_list (&block->blobs); //throw all blobs in blob_it.add_list_after (&block->noise_blobs); blob_it.add_list_after (&block->small_blobs); assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE); //no rows for noise row_it.set_to_list (block->get_rows ()); for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) row_it.data ()->blob_list ()->sort (blob_x_order); fit_parallel_rows(block, gradient, rotation, block_edge, FALSE); if (textord_heavy_nr) { vigorous_noise_removal(block); } separate_underlines(block, gradient, rotation, testing_on); pre_associate_blobs(page_tr, block, rotation, testing_on);#ifndef GRAPHICS_DISABLED if (textord_show_final_rows && testing_on) { if (to_win == NULL) create_to_win(page_tr); }#endif fit_parallel_rows(block, gradient, rotation, block_edge, FALSE); // textord_show_final_rows && testing_on); make_spline_rows(block, gradient, rotation, block_edge, textord_show_final_rows &&testing_on); if (!textord_old_xheight || !textord_old_baselines) compute_block_xheight(block, gradient); if (textord_restore_underlines) //fix underlines restore_underlined_blobs(block);#ifndef GRAPHICS_DISABLED if (textord_show_final_rows && testing_on) { plot_blob_list (to_win, &block->blobs, ScrollView::MAGENTA, ScrollView::WHITE); //show discarded blobs plot_blob_list (to_win, &block->underlines, ScrollView::YELLOW, ScrollView::CORAL); } if (textord_show_final_rows && testing_on && block->blobs.length () > 0) tprintf ("%d blobs discarded as noise\n", block->blobs.length ()); if (textord_show_final_rows && testing_on) { draw_meanlines(block, gradient, block_edge, ScrollView::WHITE, rotation); }#endif}/********************************************************************** * delete_non_dropout_rows * * Compute the linespacing and offset. **********************************************************************/void delete_non_dropout_rows( //find lines TO_BLOCK *block, //block to do float gradient, //global skew FCOORD rotation, //deskew vector inT32 block_edge, //left edge BOOL8 testing_on //correct orientation ) { TBOX block_box; //deskewed block inT32 *deltas; //change in occupation inT32 *occupation; //of pixel coords inT32 max_y; //in block inT32 min_y; inT32 line_index; //of scan line inT32 line_count; //no of scan lines inT32 distance; //to drop-out inT32 xleft; //of block inT32 ybottom; //of block TO_ROW *row; //current row TO_ROW_IT row_it = block->get_rows (); BLOBNBOX_IT blob_it = &block->blobs; if (row_it.length () == 0) return; //empty block block_box = deskew_block_coords (block, gradient); xleft = block->block->bounding_box ().left (); ybottom = block->block->bounding_box ().bottom (); min_y = block_box.bottom () - 1; max_y = block_box.top () + 1; for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { line_index = (inT32) floor (row_it.data ()->intercept ()); if (line_index <= min_y) min_y = line_index - 1; if (line_index >= max_y) max_y = line_index + 1; } line_count = max_y - min_y + 1; if (line_count <= 0) return; //empty block deltas = (inT32 *) alloc_mem (line_count * sizeof (inT32)); occupation = (inT32 *) alloc_mem (line_count * sizeof (inT32)); if (deltas == NULL || occupation == NULL) MEMORY_OUT.error ("compute_line_spacing", ABORT, NULL); compute_line_occupation(block, gradient, min_y, max_y, occupation, deltas); compute_occupation_threshold ((inT32) ceil (block->line_spacing * (textord_merge_desc + textord_merge_asc)), (inT32) ceil (block->line_spacing * (textord_merge_x + textord_merge_asc)), max_y - min_y + 1, occupation, deltas);#ifndef GRAPHICS_DISABLED if (testing_on) { draw_occupation(xleft, ybottom, min_y, max_y, occupation, deltas); }#endif compute_dropout_distances(occupation, deltas, line_count); for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { row = row_it.data (); line_index = (inT32) floor (row->intercept ()); distance = deltas[line_index - min_y]; if (find_best_dropout_row (row, distance, block->line_spacing / 2, line_index, &row_it, testing_on)) {#ifndef GRAPHICS_DISABLED if (testing_on) plot_parallel_row(row, gradient, block_edge, ScrollView::WHITE, rotation);#endif blob_it.add_list_after (row_it.data ()->blob_list ()); delete row_it.extract (); //too far away } } for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { blob_it.add_list_after (row_it.data ()->blob_list ()); } free_mem(deltas); free_mem(occupation);}/********************************************************************** * find_best_dropout_row * * Delete this row if it has a neighbour with better dropout characteristics. * TRUE is returned if the row should be deleted. **********************************************************************/BOOL8 find_best_dropout_row( //find neighbours TO_ROW *row, //row to test inT32 distance, //dropout dist float dist_limit, //threshold distance inT32 line_index, //index of row TO_ROW_IT *row_it, //current position BOOL8 testing_on //correct orientation ) { inT32 next_index; //of neigbouring row
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?