makerow.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 1,682 行 · 第 1/5 页
CPP
1,682 行
//get real membership if (block->get_rows ()->length () == 0) return; fit_parallel_rows(block, gradient, rotation, block_edge, textord_show_expanded_rows &&testing_on); if (!textord_new_initial_xheight) compute_row_stats(block, textord_show_expanded_rows &&testing_on); row_it.move_to_last (); do { row = row_it.data (); y_max = row->max_y (); //get current limits y_min = row->min_y (); y_bottom = row->intercept () - block->line_size * textord_expansion_factor * textord_merge_desc; y_top = row->intercept () + block->line_size * textord_expansion_factor * (textord_merge_x + textord_merge_asc); if (y_min > y_bottom) { //expansion allowed if (textord_show_expanded_rows && testing_on) tprintf("Expanding bottom of row at %f from %f to %f\n", row->intercept(), y_min, y_bottom); //expandable swallowed_row = TRUE; while (swallowed_row && !row_it.at_last ()) { swallowed_row = FALSE; //get next one test_row = row_it.data_relative (1); //overlaps space if (test_row->max_y () > y_bottom) { if (test_row->min_y () > y_bottom) { if (textord_show_expanded_rows && testing_on) tprintf("Eating row below at %f\n", test_row->intercept()); row_it.forward ();#ifndef GRAPHICS_DISABLED if (textord_show_expanded_rows && testing_on) plot_parallel_row(test_row, gradient, block_edge, ScrollView::WHITE, rotation);#endif blob_it.set_to_list (row->blob_list ()); blob_it.add_list_after (test_row->blob_list ()); //swallow complete row delete row_it.extract (); row_it.backward (); swallowed_row = TRUE; } else if (test_row->max_y () < y_min) { //shorter limit y_bottom = test_row->max_y (); if (textord_show_expanded_rows && testing_on) tprintf("Truncating limit to %f due to touching row at %f\n", y_bottom, test_row->intercept()); } else { y_bottom = y_min; //can't expand it if (textord_show_expanded_rows && testing_on) tprintf("Not expanding limit beyond %f due to touching row at %f\n", y_bottom, test_row->intercept()); } } } y_min = y_bottom; //expand it } if (y_max < y_top) { //expansion allowed if (textord_show_expanded_rows && testing_on) tprintf("Expanding top of row at %f from %f to %f\n", row->intercept(), y_max, y_top); swallowed_row = TRUE; while (swallowed_row && !row_it.at_first ()) { swallowed_row = FALSE; //get one above test_row = row_it.data_relative (-1); if (test_row->min_y () < y_top) { if (test_row->max_y () < y_top) { if (textord_show_expanded_rows && testing_on) tprintf("Eating row above at %f\n", test_row->intercept()); row_it.backward (); blob_it.set_to_list (row->blob_list ());#ifndef GRAPHICS_DISABLED if (textord_show_expanded_rows && testing_on) plot_parallel_row(test_row, gradient, block_edge, ScrollView::WHITE, rotation);#endif blob_it.add_list_after (test_row->blob_list ()); //swallow complete row delete row_it.extract (); row_it.forward (); swallowed_row = TRUE; } else if (test_row->min_y () < y_max) { //shorter limit y_top = test_row->min_y (); if (textord_show_expanded_rows && testing_on) tprintf("Truncating limit to %f due to touching row at %f\n", y_top, test_row->intercept()); } else { y_top = y_max; //can't expand it if (textord_show_expanded_rows && testing_on) tprintf("Not expanding limit beyond %f due to touching row at %f\n", y_top, test_row->intercept()); } } } y_max = y_top; } //new limits row->set_limits (y_min, y_max); row_it.backward (); } while (!row_it.at_last ());}/********************************************************************** * adjust_row_limits * * Change the limits of rows to suit the default fractions. **********************************************************************/void adjust_row_limits( //tidy limits TO_BLOCK *block //block to do ) { TO_ROW *row; //current row float size; //size of row float ymax; //top of row float ymin; //bottom of row TO_ROW_IT row_it = block->get_rows (); if (textord_show_expanded_rows) tprintf("Adjusting row limits for block(%d,%d)\n", block->block->bounding_box().left(), block->block->bounding_box().top()); for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { row = row_it.data (); size = row->max_y () - row->min_y (); if (textord_show_expanded_rows) tprintf("Row at %f has min %f, max %f, size %f\n", row->intercept(), row->min_y(), row->max_y(), size); size /= textord_merge_x + textord_merge_asc + textord_merge_desc; ymax = size * (textord_merge_x + textord_merge_asc); ymin = -size * textord_merge_desc; row->set_limits (row->intercept () + ymin, row->intercept () + ymax); row->merged = FALSE; }}/********************************************************************** * compute_row_stats * * Compute the linespacing and offset. **********************************************************************/void compute_row_stats( //find lines TO_BLOCK *block, //block to do BOOL8 testing_on //correct orientation ) { inT32 row_index; //of median TO_ROW *row; //current row TO_ROW *prev_row; //previous row float iqr; //inter quartile range TO_ROW_IT row_it = block->get_rows (); //number of rows inT16 rowcount = row_it.length (); TO_ROW **rows; //for choose nth rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *)); if (rows == NULL) MEMORY_OUT.error ("compute_row_stats", ABORT, NULL); rowcount = 0; prev_row = NULL; row_it.move_to_last (); //start at bottom do { row = row_it.data (); if (prev_row != NULL) { rows[rowcount++] = prev_row; prev_row->spacing = row->intercept () - prev_row->intercept (); if (testing_on) tprintf ("Row at %g yields spacing of %g\n", row->intercept (), prev_row->spacing); } prev_row = row; row_it.backward (); } while (!row_it.at_last ()); block->key_row = prev_row; block->baseline_offset = fmod (prev_row->parallel_c (), block->line_spacing); if (testing_on) tprintf ("Blob based spacing=(%g,%g), offset=%g", block->line_size, block->line_spacing, block->baseline_offset); if (rowcount > 0) { row_index = choose_nth_item (rowcount * 3 / 4, rows, rowcount, sizeof (TO_ROW *), row_spacing_order); iqr = rows[row_index]->spacing; row_index = choose_nth_item (rowcount / 4, rows, rowcount, sizeof (TO_ROW *), row_spacing_order); iqr -= rows[row_index]->spacing; row_index = choose_nth_item (rowcount / 2, rows, rowcount, sizeof (TO_ROW *), row_spacing_order); block->key_row = rows[row_index]; if (testing_on) tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr); if (rowcount > 2 && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) { if (!textord_new_initial_xheight) { if (rows[row_index]->spacing < block->line_spacing && rows[row_index]->spacing > block->line_size) //within range block->line_size = rows[row_index]->spacing; //spacing=size else if (rows[row_index]->spacing > block->line_spacing) block->line_size = block->line_spacing; //too big so use max } else { if (rows[row_index]->spacing < block->line_spacing) block->line_size = rows[row_index]->spacing; else block->line_size = block->line_spacing; //too big so use max } if (block->line_size < textord_min_xheight) block->line_size = (float) textord_min_xheight; block->line_spacing = rows[row_index]->spacing; block->max_blob_size = block->line_spacing * textord_excess_blobsize; } block->baseline_offset = fmod (rows[row_index]->intercept (), block->line_spacing); } if (testing_on) tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n", block->line_size, block->line_spacing, block->baseline_offset); free_mem(rows);}/********************************************************************** * compute_block_xheight * * Compute the xheight of the individual rows, then correlate them * and interpret ascenderless lines, correcting xheights. **********************************************************************/void compute_block_xheight( //find lines TO_BLOCK *block, //block to do float gradient //global skew ) { TO_ROW *row; //current row int xh_count, desc_count; //no of samples float block_median; //median blob size int asc_count, cap_count; inT32 min_size, max_size; //limits on xheight inT32 evidence; //no of samples on row float xh_sum, desc_sum; //for averages float asc_sum, cap_sum; TO_ROW_IT row_it = block->get_rows (); STATS row_heights; //block evidence if (row_it.empty ()) return; //no rows block_median = median_block_xheight (block, gradient); block_median *= 2; if (block_median < block->line_size) block_median = block->line_size; // tprintf("Block median=%g, linesize=%g\n", // block_median,block->line_size); max_size = (inT32) ceil (block_median); min_size = (inT32) floor (block_median * textord_minxh); row_heights.set_range (min_size, max_size + 1); xh_count = desc_count = asc_count = cap_count = 0; xh_sum = desc_sum = asc_sum = cap_sum = 0.0f; for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { row = row_it.data (); evidence = compute_row_xheight (row, min_size, max_size, gradient); if (row->xheight > 0 && row->ascrise > 0) { row_heights.add ((inT32) row->xheight, evidence); xh_count += evidence; asc_sum += row->ascrise; asc_count++; } else if (row->xheight > 0) { cap_sum += row->xheight; //assume just caps cap_count++; } if (row->descdrop != 0) { desc_sum += row->descdrop; desc_count++; } } if (xh_count > 0) { //median xh_sum = row_heights.ile (0.5); asc_sum /= asc_count; } else if (cap_count > 0) { cap_sum /= cap_count; //must assume caps xh_sum = cap_sum * textord_merge_x / (textord_merge_x + textord_merge_asc); asc_sum = cap_sum * textord_merge_asc / (textord_merge_x + textord_merge_asc); } else { //default sizes xh_sum = block_median * textord_merge_x; asc_sum = block_median * textord_merge_asc; } if (desc_count > 0) { desc_sum /= desc_count; } else { desc_sum = xh_sum * textord_merge_desc / textord_merge_x; } // tprintf("Block average x height=%g, count=%d, asc=%g/%d, desc=%g/%d,cap=%g/%d\n", // xh_sum,xh_count,asc_sum,asc_count,desc_sum,desc_count, // cap_sum,cap_count); if (xh_sum < textord_min_xheight) xh_sum = (float) textord_min_xheight; block->xheight = xh_sum; for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { correct_row_xheight (row_it.data (), xh_sum, asc_sum, desc_sum); }}/********************************************************************** * median_block_xheight *
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?