⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gap_map.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
字号:
#include "mfcpch.h"#include          "statistc.h"#include          "gap_map.h"#define EXTERNEXTERN BOOL_VAR (gapmap_debug, FALSE, "Say which blocks have tables");EXTERN BOOL_VAR (gapmap_use_ends, FALSE,"Use large space at start and end of rows");EXTERN BOOL_VAR (gapmap_no_isolated_quanta, FALSE,"Ensure gaps not less than 2quanta wide");EXTERN double_VAR (gapmap_big_gaps, 1.75, "xht multiplier");/************************************************************************* * A block gap map is a quantised histogram of whitespace regions in the * block. It is a vertical projection of wide gaps WITHIN lines * * The map is held as an array of counts of rows which have a wide gap * covering that region of the row. Each bucket in the map represents a width * of about half an xheight - (The median of the xhts in the rows is used.) * * The block is considered RECTANGULAR - delimited by the left and right * extremes of the rows in the block. However, ONLY wide gaps WITHIN a row are * counted. * *************************************************************************/GAPMAP::GAPMAP(                 //Constructor               TO_BLOCK *block  //block              ) {  TO_ROW_IT row_it;              //row iterator  TO_ROW *row;                   //current row  BLOBNBOX_IT blob_it;           //iterator  BOX blob_box;  BOX prev_blob_box;  INT16 gap_width;  INT16 start_of_row;  INT16 end_of_row;  STATS xht_stats (0, 128);  INT16 min_quantum;  INT16 max_quantum;  INT16 i;  row_it.set_to_list (block->get_rows ());  /*    Find left and right extremes and bucket size  */  map = NULL;  min_left = MAX_INT16;  max_right = -MAX_INT16;  total_rows = 0;  any_tabs = FALSE;  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {    row = row_it.data ();    if (!row->blob_list ()->empty ()) {      total_rows++;      xht_stats.add ((INT16) floor (row->xheight + 0.5), 1);      blob_it.set_to_list (row->blob_list ());      start_of_row = blob_it.data ()->bounding_box ().left ();      end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();      if (min_left > start_of_row)        min_left = start_of_row;      if (max_right < end_of_row)        max_right = end_of_row;    }  }  if ((total_rows < 3) || (min_left >= max_right)) {    total_rows = 0;    min_left = max_right = 0;    return;  }  bucket_size = (INT16) floor (xht_stats.median () + 0.5) / 2;  map_max = (max_right - min_left) / bucket_size;  map = (INT16 *) alloc_mem ((map_max + 1) * sizeof (INT16));  for (i = 0; i <= map_max; i++)    map[i] = 0;  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {    row = row_it.data ();    if (!row->blob_list ()->empty ()) {      blob_it.set_to_list (row->blob_list ());      blob_it.mark_cycle_pt ();      blob_box = box_next (&blob_it);      prev_blob_box = blob_box;      if (gapmap_use_ends) {        /* Leading space */        gap_width = blob_box.left () - min_left;        if ((gap_width > gapmap_big_gaps * row->xheight)        && gap_width > 2) {          max_quantum = (blob_box.left () - min_left) / bucket_size;          for (i = 0; i <= max_quantum; i++)            map[i]++;        }      }      while (!blob_it.cycled_list ()) {        blob_box = box_next (&blob_it);        gap_width = blob_box.left () - prev_blob_box.right ();        if ((gap_width > gapmap_big_gaps * row->xheight)        && gap_width > 2) {          min_quantum =            (prev_blob_box.right () - min_left) / bucket_size;          max_quantum = (blob_box.left () - min_left) / bucket_size;          for (i = min_quantum; i <= max_quantum; i++)            map[i]++;        }        prev_blob_box = blob_box;      }      if (gapmap_use_ends) {        /* Trailing space */        gap_width = max_right - prev_blob_box.right ();        if ((gap_width > gapmap_big_gaps * row->xheight)        && gap_width > 2) {          min_quantum =            (prev_blob_box.right () - min_left) / bucket_size;          for (i = min_quantum; i <= map_max; i++)            map[i]++;        }      }    }  }  for (i = 0; i <= map_max; i++) {    if (map[i] > total_rows / 2) {      if (gapmap_no_isolated_quanta &&        (((i == 0) &&        (map[i + 1] <= total_rows / 2)) ||        ((i == map_max) &&        (map[i - 1] <= total_rows / 2)) ||        ((i > 0) &&        (i < map_max) &&        (map[i - 1] <= total_rows / 2) &&      (map[i + 1] <= total_rows / 2)))) {        map[i] = 0;              //prevent isolated quantum      }      else        any_tabs = TRUE;    }  }  if (gapmap_debug && any_tabs)    tprintf ("Table found\n");}/************************************************************************* * GAPMAP::table_gap() * Is there a bucket in the specified range where more than half the rows in the * block have a wide gap? *************************************************************************/BOOL8 GAPMAP::table_gap(             //Is gap a table?                        INT16 left,  //From here                        INT16 right  //To here                       ) {  INT16 min_quantum;  INT16 max_quantum;  INT16 i;  BOOL8 tab_found = FALSE;  if (!any_tabs)    return FALSE;  min_quantum = (left - min_left) / bucket_size;  max_quantum = (right - min_left) / bucket_size;  for (i = min_quantum; (!tab_found && (i <= max_quantum)); i++)    if (map[i] > total_rows / 2)      tab_found = TRUE;  return tab_found;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -