⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tospace.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 5 页
字号:
#include "mfcpch.h"#include          "tovars.h"#include          "drawtord.h"#include          "tospace.h"#include          "ndminx.h"#include          "statistc.h"#define EXTERNEXTERN BOOL_VAR (tosp_old_to_method, FALSE, "Space stats use prechopping?");EXTERN BOOL_VAR (tosp_only_use_prop_rows, TRUE,"Block stats to use fixed pitch rows?");EXTERN BOOL_VAR (tosp_use_pre_chopping, FALSE,"Space stats use prechopping?");EXTERN BOOL_VAR (tosp_old_to_bug_fix, FALSE, "Fix suspected bug in old code");EXTERN BOOL_VAR (tosp_block_use_cert_spaces, TRUE,"Only stat OBVIOUS spaces");EXTERN BOOL_VAR (tosp_row_use_cert_spaces, TRUE, "Only stat OBVIOUS spaces");EXTERN BOOL_VAR (tosp_narrow_blobs_not_cert, TRUE,"Only stat OBVIOUS spaces");EXTERN BOOL_VAR (tosp_row_use_cert_spaces1, TRUE, "Only stat OBVIOUS spaces");EXTERN BOOL_VAR (tosp_recovery_isolated_row_stats, TRUE,"Use row alone when inadequate cert spaces");EXTERN BOOL_VAR (tosp_only_small_gaps_for_kern, FALSE, "Better guess");EXTERN BOOL_VAR (tosp_all_flips_fuzzy, FALSE, "Pass ANY flip to context?");EXTERN BOOL_VAR (tosp_fuzzy_limit_all, TRUE,"Dont restrict kn->sp fuzzy limit to tables");EXTERN BOOL_VAR (tosp_stats_use_xht_gaps, TRUE,"Use within xht gap for wd breaks");EXTERN BOOL_VAR (tosp_use_xht_gaps, TRUE, "Use within xht gap for wd breaks");EXTERN BOOL_VAR (tosp_only_use_xht_gaps, FALSE,"Only use within xht gap for wd breaks");EXTERN BOOL_VAR (tosp_rule_9_test_punct, FALSE,"Dont chng kn to space next to punct");EXTERN BOOL_VAR (tosp_flip_fuzz_kn_to_sp, TRUE, "Default flip");EXTERN BOOL_VAR (tosp_flip_fuzz_sp_to_kn, TRUE, "Default flip");EXTERN BOOL_VAR (tosp_improve_thresh, FALSE, "Enable improvement heuristic");EXTERN INT_VAR (tosp_debug_level, 0, "Debug data");EXTERN INT_VAR (tosp_enough_space_samples_for_median, 3,"or should we use mean");EXTERN INT_VAR (tosp_redo_kern_limit, 10,"No.samples reqd to reestimate for row");EXTERN INT_VAR (tosp_few_samples, 40,"No.gaps reqd with 1 large gap to treat as a table");EXTERN INT_VAR (tosp_short_row, 20,"No.gaps reqd with few cert spaces to use certs");EXTERN INT_VAR (tosp_sanity_method, 1, "How to avoid being silly");EXTERN double_VAR (tosp_threshold_bias1, 0,"how far between kern and space?");EXTERN double_VAR (tosp_threshold_bias2, 0,"how far between kern and space?");EXTERN double_VAR (tosp_narrow_fraction, 0.3, "Fract of xheight for narrow");EXTERN double_VAR (tosp_narrow_aspect_ratio, 0.48,"narrow if w/h less than this");EXTERN double_VAR (tosp_wide_fraction, 0.52, "Fract of xheight for wide");EXTERN double_VAR (tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this");EXTERN double_VAR (tosp_fuzzy_space_factor, 0.6,"Fract of xheight for fuzz sp");EXTERN double_VAR (tosp_fuzzy_space_factor1, 0.5,"Fract of xheight for fuzz sp");EXTERN double_VAR (tosp_fuzzy_space_factor2, 0.72,"Fract of xheight for fuzz sp");EXTERN double_VAR (tosp_gap_factor, 0.83, "gap ratio to flip sp->kern");EXTERN double_VAR (tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp");EXTERN double_VAR (tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp");EXTERN double_VAR (tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp");EXTERN double_VAR (tosp_ignore_big_gaps, -1, "xht multiplier");EXTERN double_VAR (tosp_ignore_very_big_gaps, 3.5, "xht multiplier");EXTERN double_VAR (tosp_rep_space, 1.6, "rep gap multiplier for space");EXTERN double_VAR (tosp_enough_small_gaps, 0.65,"Fract of kerns reqd for isolated row stats");EXTERN double_VAR (tosp_table_kn_sp_ratio, 2.25,"Min difference of kn & sp in table");EXTERN double_VAR (tosp_table_xht_sp_ratio, 0.33,"Expect spaces bigger than this");EXTERN double_VAR (tosp_table_fuzzy_kn_sp_ratio, 3.0,"Fuzzy if less than this");EXTERN double_VAR (tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg");EXTERN double_VAR (tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg");EXTERN double_VAR (tosp_min_sane_kn_sp, 1.5,"Dont trust spaces less than this time kn");EXTERN double_VAR (tosp_init_guess_kn_mult, 2.2,"Thresh guess - mult kn by this");EXTERN double_VAR (tosp_init_guess_xht_mult, 0.28,"Thresh guess - mult xht by this");EXTERN double_VAR (tosp_max_sane_kn_thresh, 5.0,"Multiplier on kn to limit thresh");EXTERN double_VAR (tosp_flip_caution, 0.0,"Dont autoflip kn to sp when large separation");EXTERN double_VAR (tosp_large_kerning, 0.19,"Limit use of xht gap with large kns");EXTERN double_VAR (tosp_dont_fool_with_small_kerns, -1,"Limit use of xht gap with odd small kns");EXTERN double_VAR (tosp_near_lh_edge, 0,"Dont reduce box if the top left is non blank");EXTERN double_VAR (tosp_silly_kn_sp_gap, 0.2,"Dont let sp minus kn get too small");EXTERN double_VAR (tosp_pass_wide_fuzz_sp_to_context, 0.75,"How wide fuzzies need context");#define MAXSPACING      128      /*max expected spacing in pix *//********************************************************************** * to_spacing * * Compute fuzzy word spacing thresholds for each row. * I.e. set :   max_nonspace *							space_threshold *							min_space *							kern_size *							space_size     for each row. * ONLY FOR PROPORTIONAL BLOCKS - FIXED PITCH IS ASSUMED ALREADY DONE **********************************************************************/void to_spacing(                       //set spacing                ICOORD page_tr,        //topright of page                TO_BLOCK_LIST *blocks  //blocks on page               ) {  TO_BLOCK_IT block_it;          //iterator  TO_BLOCK *block;               //current block;  TO_ROW_IT row_it;              //row iterator  TO_ROW *row;                   //current row  int block_index;               //block number  int row_index;                 //row number  INT16 block_space_gap_width;   //Estimated width of    real spaces for whole block                                 //Estimate width ofnon space gaps for whole block  INT16 block_non_space_gap_width;                                 //Old fixed/prop result  BOOL8 old_text_ord_proportional;  GAPMAP *gapmap = NULL;         //map of big vert gaps in blk  block_it.set_to_list (blocks);  block_index = 1;  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();  block_it.forward ()) {    block = block_it.data ();    gapmap = new GAPMAP (block);    block_spacing_stats(block,                        gapmap,                        old_text_ord_proportional,                        block_space_gap_width,                        block_non_space_gap_width);    row_it.set_to_list (block->get_rows ());    row_index = 1;    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {      row = row_it.data ();      if ((row->pitch_decision == PITCH_DEF_PROP) ||      (row->pitch_decision == PITCH_CORR_PROP)) {        if ((tosp_debug_level > 0) && !old_text_ord_proportional)          tprintf ("Block %d Row %d: Now Proportional\n",            block_index, row_index);        row_spacing_stats(row,                          gapmap,                          block_index,                          row_index,                          block_space_gap_width,                          block_non_space_gap_width);      }      else {        if ((tosp_debug_level > 0) && old_text_ord_proportional)          tprintf            ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",            block_index, row_index, row->pitch_decision,            row->fixed_pitch);      }#ifndef GRAPHICS_DISABLED      if (textord_show_initial_words)        plot_word_decisions (to_win, (INT16) row->fixed_pitch, row);#endif      row_index++;    }    delete gapmap;    block_index++;  }}/************************************************************************* * block_spacing_stats() *************************************************************************/void block_spacing_stats(                                  //DEBUG USE ONLY                         TO_BLOCK *block,                         GAPMAP *gapmap,                         BOOL8 &old_text_ord_proportional,                         INT16 &block_space_gap_width,     //resulting estimate                         INT16 &block_non_space_gap_width  //resulting estimate                        ) {  TO_ROW_IT row_it;              //row iterator  TO_ROW *row;                   //current row  BLOBNBOX_IT blob_it;           //iterator  STATS centre_to_centre_stats (0, MAXSPACING);  //DEBUG USE ONLY  STATS all_gap_stats (0, MAXSPACING);  STATS space_gap_stats (0, MAXSPACING);  INT16 minwidth = MAX_INT16;    //narrowest blob  BOX blob_box;  BOX prev_blob_box;  INT16 centre_to_centre;  INT16 gap_width;  float real_space_threshold;  float iqr_centre_to_centre;    //DEBUG USE ONLY  float iqr_all_gap_stats;       //DEBUG USE ONLY  INT32 end_of_row;  INT32 row_length;  row_it.set_to_list (block->get_rows ());  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {    row = row_it.data ();    if (!row->blob_list ()->empty () &&      (!tosp_only_use_prop_rows ||      (row->pitch_decision == PITCH_DEF_PROP) ||    (row->pitch_decision == PITCH_CORR_PROP))) {      blob_it.set_to_list (row->blob_list ());      blob_it.mark_cycle_pt ();      end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();      if (tosp_use_pre_chopping)        blob_box = box_next_pre_chopped (&blob_it);      else if (tosp_stats_use_xht_gaps)        blob_box = reduced_box_next (row, &blob_it);      else        blob_box = box_next (&blob_it);      row_length = end_of_row - blob_box.left ();      if (blob_box.width () < minwidth)        minwidth = blob_box.width ();      prev_blob_box = blob_box;      while (!blob_it.cycled_list ()) {        if (tosp_use_pre_chopping)          blob_box = box_next_pre_chopped (&blob_it);        else if (tosp_stats_use_xht_gaps)          blob_box = reduced_box_next (row, &blob_it);        else          blob_box = box_next (&blob_it);        if (blob_box.width () < minwidth)          minwidth = blob_box.width ();        gap_width = blob_box.left () - prev_blob_box.right ();        if (!ignore_big_gap (row, row_length, gapmap,        prev_blob_box.right (), blob_box.left ())) {          all_gap_stats.add (gap_width, 1);          centre_to_centre = (blob_box.left () + blob_box.right () -            (prev_blob_box.left () +            prev_blob_box.right ())) / 2;          //DEBUG          centre_to_centre_stats.add (centre_to_centre, 1);          // DEBUG        }        prev_blob_box = blob_box;      }    }  }                                 //Inadequate samples  if (all_gap_stats.get_total () <= 1) {    block_non_space_gap_width = minwidth;    block_space_gap_width = -1;  //No est. space width                                 //DEBUG    old_text_ord_proportional = TRUE;  }  else {    /* For debug only ..... */    iqr_centre_to_centre = centre_to_centre_stats.ile (0.75) -      centre_to_centre_stats.ile (0.25);    iqr_all_gap_stats = all_gap_stats.ile (0.75) - all_gap_stats.ile (0.25);    old_text_ord_proportional =      iqr_centre_to_centre * 2 > iqr_all_gap_stats;    /* .......For debug only */    /*    The median of the gaps is used as an estimate of the NON-SPACE gap width.    This RELIES on the assumption that there are more gaps WITHIN words than    BETWEEN words in a block    Now try to estimate the width of a real space for all real spaces in the    block. Do this by using a crude threshold to ignore "narrow" gaps, then    find the median of the "wide" gaps and use this.    */    block_non_space_gap_width = (INT16) floor (all_gap_stats.median ());    // median gap    row_it.set_to_list (block->get_rows ());    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {      row = row_it.data ();      if (!row->blob_list ()->empty () &&        (!tosp_only_use_prop_rows ||        (row->pitch_decision == PITCH_DEF_PROP) ||      (row->pitch_decision == PITCH_CORR_PROP))) {        real_space_threshold =          MAX (tosp_init_guess_kn_mult * block_non_space_gap_width,          tosp_init_guess_xht_mult * row->xheight);        blob_it.set_to_list (row->blob_list ());        blob_it.mark_cycle_pt ();        end_of_row =          blob_it.data_relative (-1)->bounding_box ().right ();        if (tosp_use_pre_chopping)          blob_box = box_next_pre_chopped (&blob_it);        else if (tosp_stats_use_xht_gaps)          blob_box = reduced_box_next (row, &blob_it);        else          blob_box = box_next (&blob_it);        row_length = blob_box.left () - end_of_row;        prev_blob_box = blob_box;        while (!blob_it.cycled_list ()) {          if (tosp_use_pre_chopping)            blob_box = box_next_pre_chopped (&blob_it);          else if (tosp_stats_use_xht_gaps)            blob_box = reduced_box_next (row, &blob_it);          else            blob_box = box_next (&blob_it);          gap_width = blob_box.left () - prev_blob_box.right ();          if ((gap_width > real_space_threshold) &&            !ignore_big_gap (row, row_length, gapmap,            prev_blob_box.right (),          blob_box.left ())) {            /*            If tosp_use_cert_spaces is enabled, the estimate of the space gap is            restricted to obvious spaces - those wider than half the xht or those            with wide blobs on both sides - i.e not things that are suspect 1's or            punctiation that is sometimes widely spaced.            */            if (!tosp_block_use_cert_spaces ||              (gap_width >              tosp_fuzzy_space_factor2 * row->xheight)              ||              ((gap_width >              tosp_fuzzy_space_factor1 * row->xheight)              && (!tosp_narrow_blobs_not_cert              || (!narrow_blob (row, prev_blob_box)              && !narrow_blob (row, blob_box))))              || (wide_blob (row, prev_blob_box)              && wide_blob (row, blob_box)))              space_gap_stats.add (gap_width, 1);          }          prev_blob_box = blob_box;        }      }    }                                 //Inadequate samples    if (space_gap_stats.get_total () <= 2)      block_space_gap_width = -1;//No est. space width    else      block_space_gap_width =        MAX ((INT16) floor (space_gap_stats.median ()),        3 * block_non_space_gap_width);  }}/************************************************************************* * row_spacing_stats() * Set values for min_space, max_non_space based on row stats only * If failure - return 0 values. *************************************************************************/void row_spacing_stats(                                 //estimate for block                       TO_ROW *row,                       GAPMAP *gapmap,                       INT16 block_idx,                       INT16 row_idx,                       INT16 block_space_gap_width,                       INT16 block_non_space_gap_width  //estimate for block                      ) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -