tospace.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 1,802 行 · 第 1/5 页

CPP
1,802
字号
            fuzzy_non = TRUE;          else            fuzzy_sp = TRUE;        }        else          space = FALSE;#ifndef GRAPHICS_DISABLED        mark_gap (blob_box, 4,          prev_gap, prev_blob_box.width (),          current_gap, next_blob_box.width (), next_gap);#endif      }      else if ((((next_blob_box.width () > 0) &&        narrow_blob (row, next_blob_box)) ||        ((prev_blob_box.width () > 0) &&      narrow_blob (row, prev_blob_box)))) {        fuzzy_sp = TRUE;#ifndef GRAPHICS_DISABLED        mark_gap (blob_box, 6,          prev_gap, prev_blob_box.width (),          current_gap, next_blob_box.width (), next_gap);#endif      }    }    else if ((current_gap > row->max_nonspace) &&    (current_gap <= row->space_threshold)) {      /* Heuristics to turn dubious kerns to spaces */      /* TRIED THIS BUT IT MADE THINGS WORSE          if ( prev_gap == MAX_INT16 )            prev_gap = 0;								//start of row          if ( next_gap == MAX_INT16 )            next_gap = 0;								//end of row      */      if ((prev_blob_box.width () > 0) &&        (next_blob_box.width () > 0) &&        (current_gap >=        tosp_kern_gap_factor1 * MAX (prev_gap, next_gap)) &&        wide_blob (row, prev_blob_box) &&      wide_blob (row, next_blob_box)) {        space = TRUE;        /*        tosp_flip_caution is an attempt to stop the default changing in cases        where there is a large difference between the kern and space estimates.          See problem in 'chiefs' where "have" gets split in the quotation.        */        if ((tosp_flip_fuzz_kn_to_sp) &&          ((tosp_flip_caution <= 0) ||          (tosp_flip_caution * row->kern_size > row->space_size)))          fuzzy_sp = TRUE;        else          fuzzy_non = TRUE;#ifndef GRAPHICS_DISABLED        mark_gap (blob_box, 7,          prev_gap, prev_blob_box.width (),          current_gap, next_blob_box.width (), next_gap);#endif      }      else if ((prev_blob_box.width () > 0) &&        (next_blob_box.width () > 0) &&        (current_gap >=        tosp_kern_gap_factor2 * MAX (prev_gap, next_gap)) &&        !(narrow_blob (row, prev_blob_box) ||        suspected_punct_blob (row, prev_blob_box)) &&        !(narrow_blob (row, next_blob_box) ||      suspected_punct_blob (row, next_blob_box))) {        space = TRUE;        fuzzy_non = TRUE;#ifndef GRAPHICS_DISABLED        mark_gap (blob_box, 8,          prev_gap, prev_blob_box.width (),          current_gap, next_blob_box.width (), next_gap);#endif      }      else if ((tosp_kern_gap_factor3 > 0) &&        (prev_blob_box.width () > 0) &&        (next_blob_box.width () > 0) &&        (current_gap >=        tosp_kern_gap_factor3 * MAX (prev_gap, next_gap)) &&        (!tosp_rule_9_test_punct ||        (!suspected_punct_blob (row, prev_blob_box) &&      !suspected_punct_blob (row, next_blob_box)))) {        space = TRUE;        fuzzy_non = TRUE;#ifndef GRAPHICS_DISABLED        mark_gap (blob_box, 9,          prev_gap, prev_blob_box.width (),          current_gap, next_blob_box.width (), next_gap);#endif      }    }    prev_gap_was_a_space = space && !(fuzzy_non);    return space;  }}BOOL8 narrow_blob(TO_ROW *row, TBOX blob_box) {  BOOL8 result;  result = ((blob_box.width () <= tosp_narrow_fraction * row->xheight) ||    (((float) blob_box.width () / blob_box.height ()) <=    tosp_narrow_aspect_ratio));  return result;}BOOL8 wide_blob(TO_ROW *row, TBOX blob_box) {  BOOL8 result;  if (tosp_wide_fraction > 0) {    if (tosp_wide_aspect_ratio > 0)      result = ((blob_box.width () >= tosp_wide_fraction * row->xheight) &&        (((float) blob_box.width () / blob_box.height ()) >        tosp_wide_aspect_ratio));    else      result = (blob_box.width () >= tosp_wide_fraction * row->xheight);  }  else    result = !narrow_blob (row, blob_box);  return result;}BOOL8 suspected_punct_blob(TO_ROW *row, TBOX box) {  BOOL8 result;  float baseline;  float blob_x_centre;  /* Find baseline of centre of blob */  blob_x_centre = (box.right () + box.left ()) / 2.0;  baseline = row->baseline.y (blob_x_centre);  result = (box.height () <= 0.66 * row->xheight) ||    (box.top () < baseline + row->xheight / 2.0) ||    (box.bottom () > baseline + row->xheight / 2.0);  return result;}void peek_at_next_gap(  //A COPY FOR PEEKING                      TO_ROW *row,                      BLOBNBOX_IT box_it,                      TBOX &next_blob_box,                      inT16 &next_gap,                      inT16 &next_within_xht_gap) {  TBOX next_reduced_blob_box;  TBOX bit_beyond;  BLOBNBOX_IT reduced_box_it = box_it;  next_blob_box = box_next (&box_it);  next_reduced_blob_box = reduced_box_next (row, &reduced_box_it);  if (box_it.at_first ()) {    next_gap = MAX_INT16;    next_within_xht_gap = MAX_INT16;  }  else {    bit_beyond = box_it.data ()->bounding_box ();    next_gap = bit_beyond.left () - next_blob_box.right ();    bit_beyond = reduced_box_next (row, &reduced_box_it);    next_within_xht_gap =      bit_beyond.left () - next_reduced_blob_box.right ();  }}#ifndef GRAPHICS_DISABLEDvoid mark_gap(             //Debug stuff              TBOX blob,    //blob following gap              inT16 rule,  // heuristic id              inT16 prev_gap,              inT16 prev_blob_width,              inT16 current_gap,              inT16 next_blob_width,              inT16 next_gap) {  ScrollView::Color col;                    //of ellipse marking flipped gap  switch (rule) {    case 1:      col = ScrollView::RED;      break;    case 2:      col = ScrollView::CYAN;      break;    case 3:      col = ScrollView::GREEN;      break;    case 4:      col = ScrollView::BLACK;      break;    case 5:      col = ScrollView::MAGENTA;      break;    case 6:      col = ScrollView::BLUE;      break;    case 7:      col = ScrollView::WHITE;      break;    case 8:      col = ScrollView::YELLOW;      break;    case 9:      col = ScrollView::BLACK;      break;    case 20:      col = ScrollView::CYAN;      break;    case 21:      col = ScrollView::GREEN;      break;    case 22:      col = ScrollView::MAGENTA;      break;    default:      col = ScrollView::BLACK;  }  if (textord_show_initial_words) {    to_win->Pen(col);  /*  if (rule < 20)      //interior_style(to_win, INT_SOLID, FALSE);    else      //interior_style(to_win, INT_HOLLOW, TRUE);*/                                 //x radius    to_win->Ellipse (current_gap / 2.0f,      blob.height () / 2.0f,     //y radius                                 //x centre      blob.left () - current_gap / 2.0f,                                 //y centre      blob.bottom () + blob.height () / 2.0f); }  if (tosp_debug_level > 0)    tprintf ("  (%d,%d) Sp<->Kn Rule %d %d %d %d %d\n",      blob.left () - current_gap / 2, blob.bottom (), rule,      prev_gap, prev_blob_width, current_gap,      next_blob_width, next_gap);}#endiffloat find_mean_blob_spacing(WERD *word) {  PBLOB_IT blob_it;  C_BLOB_IT cblob_it;  TBOX blob_box;  inT32 gap_sum = 0;  inT16 gap_count = 0;  inT16 prev_right;  if (word->flag (W_POLYGON)) {    blob_it.set_to_list (word->blob_list ());    if (!blob_it.empty ()) {      blob_it.mark_cycle_pt ();      prev_right = blob_it.data ()->bounding_box ().right ();      //first blob      blob_it.forward ();      for (; !blob_it.cycled_list (); blob_it.forward ()) {        blob_box = blob_it.data ()->bounding_box ();        gap_sum += blob_box.left () - prev_right;        gap_count++;        prev_right = blob_box.right ();      }    }  }  else {    cblob_it.set_to_list (word->cblob_list ());    if (!cblob_it.empty ()) {      cblob_it.mark_cycle_pt ();      prev_right = cblob_it.data ()->bounding_box ().right ();      //first blob      cblob_it.forward ();      for (; !cblob_it.cycled_list (); cblob_it.forward ()) {        blob_box = cblob_it.data ()->bounding_box ();        gap_sum += blob_box.left () - prev_right;        gap_count++;        prev_right = blob_box.right ();      }    }  }  if (gap_count > 0)    return (gap_sum / (float) gap_count);  else    return 0.0f;}BOOL8 ignore_big_gap(TO_ROW *row,                     inT32 row_length,                     GAPMAP *gapmap,                     inT16 left,                     inT16 right) {  inT16 gap = right - left + 1;  if (tosp_ignore_big_gaps > 999)    return FALSE;                //Dont ignore  if (tosp_ignore_big_gaps > 0)    return (gap > tosp_ignore_big_gaps * row->xheight);  if (gap > tosp_ignore_very_big_gaps * row->xheight)    return TRUE;  if (tosp_ignore_big_gaps == 0) {    if ((gap > 2.1 * row->xheight) && (row_length > 20 * row->xheight))      return TRUE;    if ((gap > 1.75 * row->xheight) &&      ((row_length > 35 * row->xheight) ||      gapmap->table_gap (left, right)))      return TRUE;  }  else {  /* ONLY time gaps < 3.0 * xht are ignored is when they are part of a table */    if ((gap > gapmap_big_gaps * row->xheight) &&      gapmap->table_gap (left, right))      return TRUE;  }  return FALSE;}/********************************************************************** * reduced_box_next * * Compute the bounding box of this blob with merging of x overlaps * but no pre-chopping. * Then move the iterator on to the start of the next blob. * DONT reduce the box for small things - eg punctuation. **********************************************************************/TBOX reduced_box_next(                 //get bounding box                     TO_ROW *row,     //current row                     BLOBNBOX_IT *it  //iterator to blobds                    ) {  BLOBNBOX *blob;                //current blob  BLOBNBOX *head_blob;           //place to store box  TBOX full_box;                  //full blob boundg box  TBOX reduced_box;               //box of significant part  inT16 left_above_xht;          //ABOVE xht left limit  inT16 new_left_above_xht;      //ABOVE xht left limit  blob = it->data ();  if (blob->red_box_set ()) {    reduced_box = blob->reduced_box ();    do {      it->forward ();      blob = it->data ();    }                                 //until next real blob    while ((blob->blob () == NULL && blob->cblob () == NULL) || blob->joined_to_prev ());    return reduced_box;  }  head_blob = blob;  full_box = blob->bounding_box ();  reduced_box = reduced_box_for_blob (blob, row, &left_above_xht);  do {    it->forward ();    blob = it->data ();    if (blob->blob () == NUL

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?