makerow.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 1,682 行 · 第 1/5 页

CPP
1,682
字号
// Return true if the dot looks like it is part of the i.// Doesn't work for any other diacritical.static bool dot_of_i(BLOBNBOX* dot, BLOBNBOX* i, TO_ROW* row) {  const TBOX& ibox = i->bounding_box();  const TBOX& dotbox = dot->bounding_box();  // Must overlap horizontally by enough and be high enough.  int overlap = MIN(dotbox.right(), ibox.right()) -                MAX(dotbox.left(), ibox.left());  if (ibox.height() <= 2 * dotbox.height() ||      (overlap * 2 < ibox.width() && overlap < dotbox.width()))    return false;  // If the i is tall and thin then it is good.  if (ibox.height() > ibox.width() * 2)    return true;  // The i or ! must be tall and thin.  // It might still be tall and thin, but it might be joined to something.  // So search the outline for a piece of large height close to the edges  // of the dot.  const double kHeightFraction = 0.6;  double target_height = MIN(dotbox.bottom(), ibox.top());  target_height -= row->line_m()*dotbox.left() + row->line_c();  target_height *= kHeightFraction;  int left_min = dotbox.left() - dotbox.width();  int middle = (dotbox.left() + dotbox.right())/2;  int right_max = dotbox.right() + dotbox.width();  int left_miny = 0;  int left_maxy = 0;  int right_miny = 0;  int right_maxy = 0;  bool found_left = false;  bool found_right = false;  bool in_left = false;  bool in_right = false;  C_BLOB* blob = i->cblob();  C_OUTLINE_IT o_it = blob->out_list();  for (o_it.mark_cycle_pt(); !o_it.cycled_list(); o_it.forward()) {    C_OUTLINE* outline = o_it.data();    int length = outline->pathlength();    ICOORD pos = outline->start_pos();    for (int step = 0; step < length; pos += outline->step(step++)) {      int x = pos.x();      int y = pos.y();      if (x >= left_min && x < middle && !found_left) {        // We are in the left part so find min and max y.        if (in_left) {          if (y > left_maxy) left_maxy = y;          if (y < left_miny) left_miny = y;        } else {          left_maxy = left_miny = y;          in_left = true;        }      } else if (in_left) {        // We just left the left so look for size.        if (left_maxy - left_miny > target_height) {          if (found_right)            return true;          found_left = true;        }        in_left = false;      }      if (x <= right_max && x > middle && !found_right) {        // We are in the right part so find min and max y.        if (in_right) {          if (y > right_maxy) right_maxy = y;          if (y < right_miny) right_miny = y;        } else {          right_maxy = right_miny = y;          in_right = true;        }      } else if (in_right) {        // We just left the right so look for size.        if (right_maxy - right_miny > target_height) {          if (found_left)            return true;          found_right = true;        }        in_right = false;      }    }  }  return false;}static void vigorous_noise_removal(TO_BLOCK* block) {  TO_ROW_IT row_it = block->get_rows ();  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {    TO_ROW* row = row_it.data();    BLOBNBOX_IT b_it = row->blob_list();    // Estimate the xheight on the row.    int max_height = 0;    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {      BLOBNBOX* blob = b_it.data();      if (blob->bounding_box().height() > max_height)        max_height = blob->bounding_box().height();    }    STATS hstats(0, max_height + 1);    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {      BLOBNBOX* blob = b_it.data();      int height = blob->bounding_box().height();      if (height >= kMinSize)        hstats.add(blob->bounding_box().height(), 1);    }    float xheight = hstats.median();    // Delete small objects.    BLOBNBOX* prev = NULL;    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {      BLOBNBOX* blob = b_it.data();      const TBOX& box = blob->bounding_box();      if (box.height() < kNoiseSize * xheight) {        // Small so delete unless it looks like an i dot.        if (prev != NULL) {          if (dot_of_i(blob, prev, row))            continue;  // Looks OK.        }        if (!b_it.at_last()) {          BLOBNBOX* next = b_it.data_relative(1);          if (dot_of_i(blob, next, row))            continue;  // Looks OK.        }        // It might be noise so get rid of it.        if (blob->blob() != NULL)          delete blob->blob();        if (blob->cblob() != NULL)          delete blob->cblob();        delete b_it.extract();      } else {        prev = blob;      }    }  }}/********************************************************************** * cleanup_rows * * Remove overlapping rows and fit all the blobs to what's left. **********************************************************************/void cleanup_rows(                   //find lines                  ICOORD page_tr,    //top right                  TO_BLOCK *block,   //block to do                  float gradient,    //gradient to fit                  FCOORD rotation,   //for drawing                  inT32 block_edge,  //edge of block                  BOOL8 testing_on   //correct orientation                 ) {                                 //iterators  BLOBNBOX_IT blob_it = &block->blobs;  TO_ROW_IT row_it = block->get_rows ();#ifndef GRAPHICS_DISABLED  if (textord_show_parallel_rows && testing_on) {    if (to_win == NULL)      create_to_win(page_tr);  }#endif                                 //get row coords  fit_parallel_rows(block,                    gradient,                    rotation,                    block_edge,                    textord_show_parallel_rows &&testing_on);  delete_non_dropout_rows(block,                          gradient,                          rotation,                          block_edge,                          textord_show_parallel_rows &&testing_on);  expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);  blob_it.set_to_list (&block->blobs);  row_it.set_to_list (block->get_rows ());  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())    blob_it.add_list_after (row_it.data ()->blob_list ());  //give blobs back  assign_blobs_to_rows (block, &gradient, 1, FALSE, FALSE, FALSE);  //now new rows must be genuine  blob_it.set_to_list (&block->blobs);  blob_it.add_list_after (&block->large_blobs);  assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE);  //safe to use big ones now  blob_it.set_to_list (&block->blobs);                                 //throw all blobs in  blob_it.add_list_after (&block->noise_blobs);  blob_it.add_list_after (&block->small_blobs);  assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE);  //no rows for noise  row_it.set_to_list (block->get_rows ());  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())    row_it.data ()->blob_list ()->sort (blob_x_order);  fit_parallel_rows(block, gradient, rotation, block_edge, FALSE);  if (textord_heavy_nr) {    vigorous_noise_removal(block);  }  separate_underlines(block, gradient, rotation, testing_on);  pre_associate_blobs(page_tr, block, rotation, testing_on);#ifndef GRAPHICS_DISABLED  if (textord_show_final_rows && testing_on) {    if (to_win == NULL)      create_to_win(page_tr);  }#endif  fit_parallel_rows(block, gradient, rotation, block_edge, FALSE);  //              textord_show_final_rows && testing_on);  make_spline_rows(block,                   gradient,                   rotation,                   block_edge,                   textord_show_final_rows &&testing_on);  if (!textord_old_xheight || !textord_old_baselines)    compute_block_xheight(block, gradient);  if (textord_restore_underlines)                                 //fix underlines    restore_underlined_blobs(block);#ifndef GRAPHICS_DISABLED  if (textord_show_final_rows && testing_on) {    plot_blob_list (to_win, &block->blobs,                    ScrollView::MAGENTA, ScrollView::WHITE);    //show discarded blobs    plot_blob_list (to_win, &block->underlines,                    ScrollView::YELLOW, ScrollView::CORAL);  }  if (textord_show_final_rows && testing_on && block->blobs.length () > 0)    tprintf ("%d blobs discarded as noise\n", block->blobs.length ());  if (textord_show_final_rows && testing_on) {    draw_meanlines(block, gradient, block_edge, ScrollView::WHITE, rotation);  }#endif}/********************************************************************** * delete_non_dropout_rows * * Compute the linespacing and offset. **********************************************************************/void delete_non_dropout_rows(                   //find lines                             TO_BLOCK *block,   //block to do                             float gradient,    //global skew                             FCOORD rotation,   //deskew vector                             inT32 block_edge,  //left edge                             BOOL8 testing_on   //correct orientation                            ) {  TBOX block_box;                 //deskewed block  inT32 *deltas;                 //change in occupation  inT32 *occupation;             //of pixel coords  inT32 max_y;                   //in block  inT32 min_y;  inT32 line_index;              //of scan line  inT32 line_count;              //no of scan lines  inT32 distance;                //to drop-out  inT32 xleft;                   //of block  inT32 ybottom;                 //of block  TO_ROW *row;                   //current row  TO_ROW_IT row_it = block->get_rows ();  BLOBNBOX_IT blob_it = &block->blobs;  if (row_it.length () == 0)    return;                      //empty block  block_box = deskew_block_coords (block, gradient);  xleft = block->block->bounding_box ().left ();  ybottom = block->block->bounding_box ().bottom ();  min_y = block_box.bottom () - 1;  max_y = block_box.top () + 1;  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {    line_index = (inT32) floor (row_it.data ()->intercept ());    if (line_index <= min_y)      min_y = line_index - 1;    if (line_index >= max_y)      max_y = line_index + 1;  }  line_count = max_y - min_y + 1;  if (line_count <= 0)    return;                      //empty block  deltas = (inT32 *) alloc_mem (line_count * sizeof (inT32));  occupation = (inT32 *) alloc_mem (line_count * sizeof (inT32));  if (deltas == NULL || occupation == NULL)    MEMORY_OUT.error ("compute_line_spacing", ABORT, NULL);  compute_line_occupation(block, gradient, min_y, max_y, occupation, deltas);  compute_occupation_threshold ((inT32)    ceil (block->line_spacing *    (textord_merge_desc +    textord_merge_asc)),    (inT32) ceil (block->line_spacing *    (textord_merge_x +    textord_merge_asc)),    max_y - min_y + 1, occupation, deltas);#ifndef GRAPHICS_DISABLED  if (testing_on) {    draw_occupation(xleft, ybottom, min_y, max_y, occupation, deltas);  }#endif  compute_dropout_distances(occupation, deltas, line_count);  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {    row = row_it.data ();    line_index = (inT32) floor (row->intercept ());    distance = deltas[line_index - min_y];    if (find_best_dropout_row (row, distance, block->line_spacing / 2,    line_index, &row_it, testing_on)) {#ifndef GRAPHICS_DISABLED      if (testing_on)        plot_parallel_row(row, gradient, block_edge,                          ScrollView::WHITE, rotation);#endif      blob_it.add_list_after (row_it.data ()->blob_list ());      delete row_it.extract ();  //too far away    }  }  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {    blob_it.add_list_after (row_it.data ()->blob_list ());  }  free_mem(deltas);  free_mem(occupation);}/********************************************************************** * find_best_dropout_row * * Delete this row if it has a neighbour with better dropout characteristics. * TRUE is returned if the row should be deleted. **********************************************************************/BOOL8 find_best_dropout_row(                    //find neighbours                            TO_ROW *row,        //row to test                            inT32 distance,     //dropout dist                            float dist_limit,   //threshold distance                            inT32 line_index,   //index of row                            TO_ROW_IT *row_it,  //current position                            BOOL8 testing_on    //correct orientation                           ) {  inT32 next_index;              //of neigbouring row

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?