makerow.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 1,682 行 · 第 1/5 页
CPP
1,682 行
* Compute the linespacing and offset. **********************************************************************/float median_block_xheight( //find lines TO_BLOCK *block, //block to do float gradient //global skew ) { TO_ROW *row; //current row float result; //output size float xcentre; //centre of blob TO_ROW_IT row_it = block->get_rows (); BLOBNBOX_IT blob_it; BLOBNBOX *blob; //current blob float *heights; //for choose nth inT32 blob_count; //blobs in block inT32 blob_index; //current blob blob_count = 0; for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) blob_count += row_it.data ()->blob_list ()->length (); heights = (float *) alloc_mem (blob_count * sizeof (float)); if (heights == NULL) MEMORY_OUT.error ("compute_row_stats", ABORT, NULL); blob_index = 0; for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { row = row_it.data (); blob_it.set_to_list (row->blob_list ()); for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { blob = blob_it.data (); if (!blob->joined_to_prev ()) { xcentre = (blob->bounding_box ().left () + blob->bounding_box ().right ()) / 2.0f; heights[blob_index] = blob->bounding_box ().top () - gradient * xcentre - row->parallel_c (); if (heights[blob_index] > 0) blob_index++; } } } ASSERT_HOST (blob_index > 0); //dont expect 0 blob_count = blob_index; blob_index = choose_nth_item (blob_count / 2, heights, blob_count); result = heights[blob_index]; free_mem(heights); return result;}/********************************************************************** * compute_row_xheight * * Estimate the xheight of this row. * Compute the ascender rise and descender drop at the same time. **********************************************************************/inT32 compute_row_xheight( //find lines TO_ROW *row, //row to do inT32 min_height, //min xheight inT32 max_height, //max xheight float gradient //global skew ) { BOOL8 in_best_pile; //control of mode size inT32 prev_size; //previous size float xcentre; //centre of blob float height; //height of blob BLOBNBOX_IT blob_it = row->blob_list (); BLOBNBOX *blob; //current blob inT32 blob_count; //blobs in block inT32 x; //xheight index inT32 asc; //ascender index inT32 blob_index; //current blob inT32 mode_count; //no of modes inT32 best_count; //count of best x so far float ratio; //size ratio inT32 modes[MAX_HEIGHT_MODES]; //biggest piles STATS heights (min_height, max_height + 1); for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { blob = blob_it.data (); if (!blob->joined_to_prev ()) { xcentre = (blob->bounding_box ().left () + blob->bounding_box ().right ()) / 2.0f; height = blob->bounding_box ().top (); if (textord_fix_xheight_bug) height -= row->baseline.y (xcentre); else height -= gradient * xcentre + row->parallel_c (); if (height >= min_height && height <= max_height && (!textord_xheight_tweak || height > textord_min_xheight)) heights.add ((inT32) floor (height + 0.5), 1); } } blob_index = heights.mode (); //find mode //get count of mode blob_count = heights.pile_count (blob_index); if (textord_debug_xheights) tprintf ("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d,%d\n", min_height, max_height, blob_index, blob_count, heights.get_total (), row->blob_list ()->length ()); row->ascrise = 0.0f; row->xheight = 0.0f; row->descdrop = 0.0f; //undefined; in_best_pile = FALSE; prev_size = -MAX_INT32; best_count = 0; if (blob_count > 0) { //get biggest ones mode_count = compute_height_modes (&heights, min_height, max_height, modes, MAX_HEIGHT_MODES); for (x = 0; x < mode_count - 1; x++) { if (modes[x] != prev_size + 1) in_best_pile = FALSE; //had empty height if (heights.pile_count (modes[x]) >= blob_count * textord_xheight_mode_fraction && (in_best_pile || heights.pile_count (modes[x]) > best_count)) { for (asc = x + 1; asc < mode_count; asc++) { ratio = (float) modes[asc] / modes[x]; if (textord_ascx_ratio_min < ratio && ratio < textord_ascx_ratio_max && heights.pile_count (modes[asc]) >= blob_count * textord_ascheight_mode_fraction) { if (heights.pile_count (modes[x]) > best_count) { in_best_pile = TRUE; best_count = heights.pile_count (modes[x]); } // tprintf("X=%d, asc=%d, count=%d, ratio=%g\n", // modes[x],modes[asc]-modes[x], // heights.pile_count(modes[x]), // ratio); prev_size = modes[x]; row->xheight = (float) modes[x]; row->ascrise = (float) (modes[asc] - modes[x]); } } } } if (row->xheight == 0) { //single mode row->xheight = (float) blob_index; row->ascrise = 0.0f; if (textord_debug_xheights) tprintf ("Single mode xheight set to %g\n", row->xheight); } else if (textord_debug_xheights) tprintf ("Multi-mode xheight set to %g, asc=%g\n", row->xheight, row->ascrise); row->descdrop = (float) compute_row_descdrop (row, gradient); //find descenders } return best_count;}/********************************************************************** * compute_row_descdrop * * Estimate the descdrop of this row. **********************************************************************/inT32 compute_row_descdrop( //find lines TO_ROW *row, //row to do float gradient //global skew ) { inT32 min_height = (inT32) floor (row->xheight * textord_descx_ratio_min); inT32 max_height = (inT32) floor (row->xheight * textord_descx_ratio_max); float xcentre; //centre of blob float height; //height of blob BLOBNBOX_IT blob_it = row->blob_list (); BLOBNBOX *blob; //current blob inT32 blob_count; //blobs in block inT32 blob_index; //current blob STATS heights (min_height, max_height + 1); for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { blob = blob_it.data (); if (!blob->joined_to_prev ()) { xcentre = (blob->bounding_box ().left () + blob->bounding_box ().right ()) / 2.0f; height = gradient * xcentre + row->parallel_c () - blob->bounding_box ().bottom (); if (height >= min_height && height <= max_height) heights.add ((inT32) floor (height + 0.5), 1); } } blob_index = heights.mode (); //find mode //get count of mode blob_count = heights.pile_count (blob_index); return blob_count > 0 ? -blob_index : 0;}/********************************************************************** * compute_height_modes * * Find the top maxmodes values in the input array and put their * indices in the output in the order in which they occurred. **********************************************************************/inT32 compute_height_modes( //find lines STATS *heights, //stats to search inT32 min_height, //bottom of range inT32 max_height, //top of range inT32 *modes, //output array inT32 maxmodes //size of modes ) { inT32 pile_count; //no in source pile inT32 src_count; //no of source entries inT32 src_index; //current entry inT32 least_count; //height of smalllest inT32 least_index; //index of least inT32 dest_count; //index in modes src_count = max_height + 1 - min_height; dest_count = 0; least_count = MAX_INT32; least_index = -1; for (src_index = 0; src_index < src_count; src_index++) { pile_count = heights->pile_count (min_height + src_index); if (pile_count > 0) { if (dest_count < maxmodes) { if (pile_count < least_count) { //find smallest in array least_count = pile_count; least_index = dest_count; } modes[dest_count++] = min_height + src_index; } else if (pile_count >= least_count) { while (least_index < maxmodes - 1) { modes[least_index] = modes[least_index + 1]; //shuffle up least_index++; } //new one on end modes[maxmodes - 1] = min_height + src_index; if (pile_count == least_count) { //new smallest least_index = maxmodes - 1; } else { least_count = heights->pile_count (modes[0]); least_index = 0; for (dest_count = 1; dest_count < maxmodes; dest_count++) { pile_count = heights->pile_count (modes[dest_count]); if (pile_count < least_count) { //find smallest least_count = pile_count; least_index = dest_count; } } } } } } return dest_count;}/********************************************************************** * correct_row_xheight * * Adjust the xheight etc of this row if not within reasonable limits * of the average for the block. **********************************************************************/void correct_row_xheight( //fix bad values TO_ROW *row, //row to fix float xheight, //average values float ascrise, float descdrop) { if (textord_row_xheights) { if (row->xheight <= 0) row->xheight = xheight; if (row->ascrise < row->xheight * (textord_ascx_ratio_min - 1)) { if (row->xheight >= xheight * (1 - textord_xheight_error_margin) && row->xheight <= xheight * (1 + textord_xheight_error_margin)) { row->all_caps = FALSE; row->ascrise = ascrise; } else if (row->xheight >= (xheight + ascrise) * (1 - textord_xheight_error_margin) && row->xheight <= (xheight + ascrise) * (1 + textord_xheight_error_margin)) { row->all_caps = TRUE; //it was caps row->ascrise = row->xheight - xheight; row->xheight = xheight; } else { row->all_caps = TRUE; row->ascrise = row->xheight * ascrise / (xheight + ascrise); row->xheight -= row->ascrise; } } else row->all_caps = FALSE; row->ascrise = ascrise; if (row->descdrop >= -row->xheight * (textord_ascx_ratio_min - 1)) row->descdrop = descdrop; } else { if (row->xheight < xheight * (1 - textord_xheight_error_margin) || row->xheight > xheight * (1 + textord_xheight_error_margin)) row->xheight = xheight; //set to average row->all_caps = row->ascrise <= 0; if (row->ascrise < ascrise * (1 - textord_xheight_error_margin) || row->ascrise > ascrise * (1 + textord_xheight_error_margin)) row->ascrise = ascrise; //set to average if (row->descdrop < descdrop * (1 - textord_xheight_error_margin) || row->descdrop > descdrop * (1 + textord_xheight_error_margin)) row->descdrop = descdrop; //set to average }}/********************************************************************** * separate_underlines * * Test wide objects for being potential underlines. If they are then * put them in a separate list in the block. **********************************************************************/void separate_underlines( //make rough chars TO_BLOCK *block, //block to do float gradient, //skew angle FCOORD rotation, //inverse landscape BOOL8 testing_on //correct orientation
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?