tospace.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 1,802 行 · 第 1/5 页
CPP
1,802 行
fuzzy_non = TRUE; else fuzzy_sp = TRUE; } else space = FALSE;#ifndef GRAPHICS_DISABLED mark_gap (blob_box, 4, prev_gap, prev_blob_box.width (), current_gap, next_blob_box.width (), next_gap);#endif } else if ((((next_blob_box.width () > 0) && narrow_blob (row, next_blob_box)) || ((prev_blob_box.width () > 0) && narrow_blob (row, prev_blob_box)))) { fuzzy_sp = TRUE;#ifndef GRAPHICS_DISABLED mark_gap (blob_box, 6, prev_gap, prev_blob_box.width (), current_gap, next_blob_box.width (), next_gap);#endif } } else if ((current_gap > row->max_nonspace) && (current_gap <= row->space_threshold)) { /* Heuristics to turn dubious kerns to spaces */ /* TRIED THIS BUT IT MADE THINGS WORSE if ( prev_gap == MAX_INT16 ) prev_gap = 0; //start of row if ( next_gap == MAX_INT16 ) next_gap = 0; //end of row */ if ((prev_blob_box.width () > 0) && (next_blob_box.width () > 0) && (current_gap >= tosp_kern_gap_factor1 * MAX (prev_gap, next_gap)) && wide_blob (row, prev_blob_box) && wide_blob (row, next_blob_box)) { space = TRUE; /* tosp_flip_caution is an attempt to stop the default changing in cases where there is a large difference between the kern and space estimates. See problem in 'chiefs' where "have" gets split in the quotation. */ if ((tosp_flip_fuzz_kn_to_sp) && ((tosp_flip_caution <= 0) || (tosp_flip_caution * row->kern_size > row->space_size))) fuzzy_sp = TRUE; else fuzzy_non = TRUE;#ifndef GRAPHICS_DISABLED mark_gap (blob_box, 7, prev_gap, prev_blob_box.width (), current_gap, next_blob_box.width (), next_gap);#endif } else if ((prev_blob_box.width () > 0) && (next_blob_box.width () > 0) && (current_gap >= tosp_kern_gap_factor2 * MAX (prev_gap, next_gap)) && !(narrow_blob (row, prev_blob_box) || suspected_punct_blob (row, prev_blob_box)) && !(narrow_blob (row, next_blob_box) || suspected_punct_blob (row, next_blob_box))) { space = TRUE; fuzzy_non = TRUE;#ifndef GRAPHICS_DISABLED mark_gap (blob_box, 8, prev_gap, prev_blob_box.width (), current_gap, next_blob_box.width (), next_gap);#endif } else if ((tosp_kern_gap_factor3 > 0) && (prev_blob_box.width () > 0) && (next_blob_box.width () > 0) && (current_gap >= tosp_kern_gap_factor3 * MAX (prev_gap, next_gap)) && (!tosp_rule_9_test_punct || (!suspected_punct_blob (row, prev_blob_box) && !suspected_punct_blob (row, next_blob_box)))) { space = TRUE; fuzzy_non = TRUE;#ifndef GRAPHICS_DISABLED mark_gap (blob_box, 9, prev_gap, prev_blob_box.width (), current_gap, next_blob_box.width (), next_gap);#endif } } prev_gap_was_a_space = space && !(fuzzy_non); return space; }}BOOL8 narrow_blob(TO_ROW *row, TBOX blob_box) { BOOL8 result; result = ((blob_box.width () <= tosp_narrow_fraction * row->xheight) || (((float) blob_box.width () / blob_box.height ()) <= tosp_narrow_aspect_ratio)); return result;}BOOL8 wide_blob(TO_ROW *row, TBOX blob_box) { BOOL8 result; if (tosp_wide_fraction > 0) { if (tosp_wide_aspect_ratio > 0) result = ((blob_box.width () >= tosp_wide_fraction * row->xheight) && (((float) blob_box.width () / blob_box.height ()) > tosp_wide_aspect_ratio)); else result = (blob_box.width () >= tosp_wide_fraction * row->xheight); } else result = !narrow_blob (row, blob_box); return result;}BOOL8 suspected_punct_blob(TO_ROW *row, TBOX box) { BOOL8 result; float baseline; float blob_x_centre; /* Find baseline of centre of blob */ blob_x_centre = (box.right () + box.left ()) / 2.0; baseline = row->baseline.y (blob_x_centre); result = (box.height () <= 0.66 * row->xheight) || (box.top () < baseline + row->xheight / 2.0) || (box.bottom () > baseline + row->xheight / 2.0); return result;}void peek_at_next_gap( //A COPY FOR PEEKING TO_ROW *row, BLOBNBOX_IT box_it, TBOX &next_blob_box, inT16 &next_gap, inT16 &next_within_xht_gap) { TBOX next_reduced_blob_box; TBOX bit_beyond; BLOBNBOX_IT reduced_box_it = box_it; next_blob_box = box_next (&box_it); next_reduced_blob_box = reduced_box_next (row, &reduced_box_it); if (box_it.at_first ()) { next_gap = MAX_INT16; next_within_xht_gap = MAX_INT16; } else { bit_beyond = box_it.data ()->bounding_box (); next_gap = bit_beyond.left () - next_blob_box.right (); bit_beyond = reduced_box_next (row, &reduced_box_it); next_within_xht_gap = bit_beyond.left () - next_reduced_blob_box.right (); }}#ifndef GRAPHICS_DISABLEDvoid mark_gap( //Debug stuff TBOX blob, //blob following gap inT16 rule, // heuristic id inT16 prev_gap, inT16 prev_blob_width, inT16 current_gap, inT16 next_blob_width, inT16 next_gap) { ScrollView::Color col; //of ellipse marking flipped gap switch (rule) { case 1: col = ScrollView::RED; break; case 2: col = ScrollView::CYAN; break; case 3: col = ScrollView::GREEN; break; case 4: col = ScrollView::BLACK; break; case 5: col = ScrollView::MAGENTA; break; case 6: col = ScrollView::BLUE; break; case 7: col = ScrollView::WHITE; break; case 8: col = ScrollView::YELLOW; break; case 9: col = ScrollView::BLACK; break; case 20: col = ScrollView::CYAN; break; case 21: col = ScrollView::GREEN; break; case 22: col = ScrollView::MAGENTA; break; default: col = ScrollView::BLACK; } if (textord_show_initial_words) { to_win->Pen(col); /* if (rule < 20) //interior_style(to_win, INT_SOLID, FALSE); else //interior_style(to_win, INT_HOLLOW, TRUE);*/ //x radius to_win->Ellipse (current_gap / 2.0f, blob.height () / 2.0f, //y radius //x centre blob.left () - current_gap / 2.0f, //y centre blob.bottom () + blob.height () / 2.0f); } if (tosp_debug_level > 0) tprintf (" (%d,%d) Sp<->Kn Rule %d %d %d %d %d\n", blob.left () - current_gap / 2, blob.bottom (), rule, prev_gap, prev_blob_width, current_gap, next_blob_width, next_gap);}#endiffloat find_mean_blob_spacing(WERD *word) { PBLOB_IT blob_it; C_BLOB_IT cblob_it; TBOX blob_box; inT32 gap_sum = 0; inT16 gap_count = 0; inT16 prev_right; if (word->flag (W_POLYGON)) { blob_it.set_to_list (word->blob_list ()); if (!blob_it.empty ()) { blob_it.mark_cycle_pt (); prev_right = blob_it.data ()->bounding_box ().right (); //first blob blob_it.forward (); for (; !blob_it.cycled_list (); blob_it.forward ()) { blob_box = blob_it.data ()->bounding_box (); gap_sum += blob_box.left () - prev_right; gap_count++; prev_right = blob_box.right (); } } } else { cblob_it.set_to_list (word->cblob_list ()); if (!cblob_it.empty ()) { cblob_it.mark_cycle_pt (); prev_right = cblob_it.data ()->bounding_box ().right (); //first blob cblob_it.forward (); for (; !cblob_it.cycled_list (); cblob_it.forward ()) { blob_box = cblob_it.data ()->bounding_box (); gap_sum += blob_box.left () - prev_right; gap_count++; prev_right = blob_box.right (); } } } if (gap_count > 0) return (gap_sum / (float) gap_count); else return 0.0f;}BOOL8 ignore_big_gap(TO_ROW *row, inT32 row_length, GAPMAP *gapmap, inT16 left, inT16 right) { inT16 gap = right - left + 1; if (tosp_ignore_big_gaps > 999) return FALSE; //Dont ignore if (tosp_ignore_big_gaps > 0) return (gap > tosp_ignore_big_gaps * row->xheight); if (gap > tosp_ignore_very_big_gaps * row->xheight) return TRUE; if (tosp_ignore_big_gaps == 0) { if ((gap > 2.1 * row->xheight) && (row_length > 20 * row->xheight)) return TRUE; if ((gap > 1.75 * row->xheight) && ((row_length > 35 * row->xheight) || gapmap->table_gap (left, right))) return TRUE; } else { /* ONLY time gaps < 3.0 * xht are ignored is when they are part of a table */ if ((gap > gapmap_big_gaps * row->xheight) && gapmap->table_gap (left, right)) return TRUE; } return FALSE;}/********************************************************************** * reduced_box_next * * Compute the bounding box of this blob with merging of x overlaps * but no pre-chopping. * Then move the iterator on to the start of the next blob. * DONT reduce the box for small things - eg punctuation. **********************************************************************/TBOX reduced_box_next( //get bounding box TO_ROW *row, //current row BLOBNBOX_IT *it //iterator to blobds ) { BLOBNBOX *blob; //current blob BLOBNBOX *head_blob; //place to store box TBOX full_box; //full blob boundg box TBOX reduced_box; //box of significant part inT16 left_above_xht; //ABOVE xht left limit inT16 new_left_above_xht; //ABOVE xht left limit blob = it->data (); if (blob->red_box_set ()) { reduced_box = blob->reduced_box (); do { it->forward (); blob = it->data (); } //until next real blob while ((blob->blob () == NULL && blob->cblob () == NULL) || blob->joined_to_prev ()); return reduced_box; } head_blob = blob; full_box = blob->bounding_box (); reduced_box = reduced_box_for_blob (blob, row, &left_above_xht); do { it->forward (); blob = it->data (); if (blob->blob () == NUL
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?