📄 tospace.cpp
字号:
#include "mfcpch.h"#include "tovars.h"#include "drawtord.h"#include "tospace.h"#include "ndminx.h"#include "statistc.h"#define EXTERNEXTERN BOOL_VAR (tosp_old_to_method, FALSE, "Space stats use prechopping?");EXTERN BOOL_VAR (tosp_only_use_prop_rows, TRUE,"Block stats to use fixed pitch rows?");EXTERN BOOL_VAR (tosp_use_pre_chopping, FALSE,"Space stats use prechopping?");EXTERN BOOL_VAR (tosp_old_to_bug_fix, FALSE, "Fix suspected bug in old code");EXTERN BOOL_VAR (tosp_block_use_cert_spaces, TRUE,"Only stat OBVIOUS spaces");EXTERN BOOL_VAR (tosp_row_use_cert_spaces, TRUE, "Only stat OBVIOUS spaces");EXTERN BOOL_VAR (tosp_narrow_blobs_not_cert, TRUE,"Only stat OBVIOUS spaces");EXTERN BOOL_VAR (tosp_row_use_cert_spaces1, TRUE, "Only stat OBVIOUS spaces");EXTERN BOOL_VAR (tosp_recovery_isolated_row_stats, TRUE,"Use row alone when inadequate cert spaces");EXTERN BOOL_VAR (tosp_only_small_gaps_for_kern, FALSE, "Better guess");EXTERN BOOL_VAR (tosp_all_flips_fuzzy, FALSE, "Pass ANY flip to context?");EXTERN BOOL_VAR (tosp_fuzzy_limit_all, TRUE,"Dont restrict kn->sp fuzzy limit to tables");EXTERN BOOL_VAR (tosp_stats_use_xht_gaps, TRUE,"Use within xht gap for wd breaks");EXTERN BOOL_VAR (tosp_use_xht_gaps, TRUE, "Use within xht gap for wd breaks");EXTERN BOOL_VAR (tosp_only_use_xht_gaps, FALSE,"Only use within xht gap for wd breaks");EXTERN BOOL_VAR (tosp_rule_9_test_punct, FALSE,"Dont chng kn to space next to punct");EXTERN BOOL_VAR (tosp_flip_fuzz_kn_to_sp, TRUE, "Default flip");EXTERN BOOL_VAR (tosp_flip_fuzz_sp_to_kn, TRUE, "Default flip");EXTERN BOOL_VAR (tosp_improve_thresh, FALSE, "Enable improvement heuristic");EXTERN INT_VAR (tosp_debug_level, 0, "Debug data");EXTERN INT_VAR (tosp_enough_space_samples_for_median, 3,"or should we use mean");EXTERN INT_VAR (tosp_redo_kern_limit, 10,"No.samples reqd to reestimate for row");EXTERN INT_VAR (tosp_few_samples, 40,"No.gaps reqd with 1 large gap to treat as a table");EXTERN INT_VAR (tosp_short_row, 20,"No.gaps reqd with few cert spaces to use certs");EXTERN INT_VAR (tosp_sanity_method, 1, "How to avoid being silly");EXTERN double_VAR (tosp_threshold_bias1, 0,"how far between kern and space?");EXTERN double_VAR (tosp_threshold_bias2, 0,"how far between kern and space?");EXTERN double_VAR (tosp_narrow_fraction, 0.3, "Fract of xheight for narrow");EXTERN double_VAR (tosp_narrow_aspect_ratio, 0.48,"narrow if w/h less than this");EXTERN double_VAR (tosp_wide_fraction, 0.52, "Fract of xheight for wide");EXTERN double_VAR (tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this");EXTERN double_VAR (tosp_fuzzy_space_factor, 0.6,"Fract of xheight for fuzz sp");EXTERN double_VAR (tosp_fuzzy_space_factor1, 0.5,"Fract of xheight for fuzz sp");EXTERN double_VAR (tosp_fuzzy_space_factor2, 0.72,"Fract of xheight for fuzz sp");EXTERN double_VAR (tosp_gap_factor, 0.83, "gap ratio to flip sp->kern");EXTERN double_VAR (tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp");EXTERN double_VAR (tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp");EXTERN double_VAR (tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp");EXTERN double_VAR (tosp_ignore_big_gaps, -1, "xht multiplier");EXTERN double_VAR (tosp_ignore_very_big_gaps, 3.5, "xht multiplier");EXTERN double_VAR (tosp_rep_space, 1.6, "rep gap multiplier for space");EXTERN double_VAR (tosp_enough_small_gaps, 0.65,"Fract of kerns reqd for isolated row stats");EXTERN double_VAR (tosp_table_kn_sp_ratio, 2.25,"Min difference of kn & sp in table");EXTERN double_VAR (tosp_table_xht_sp_ratio, 0.33,"Expect spaces bigger than this");EXTERN double_VAR (tosp_table_fuzzy_kn_sp_ratio, 3.0,"Fuzzy if less than this");EXTERN double_VAR (tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg");EXTERN double_VAR (tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg");EXTERN double_VAR (tosp_min_sane_kn_sp, 1.5,"Dont trust spaces less than this time kn");EXTERN double_VAR (tosp_init_guess_kn_mult, 2.2,"Thresh guess - mult kn by this");EXTERN double_VAR (tosp_init_guess_xht_mult, 0.28,"Thresh guess - mult xht by this");EXTERN double_VAR (tosp_max_sane_kn_thresh, 5.0,"Multiplier on kn to limit thresh");EXTERN double_VAR (tosp_flip_caution, 0.0,"Dont autoflip kn to sp when large separation");EXTERN double_VAR (tosp_large_kerning, 0.19,"Limit use of xht gap with large kns");EXTERN double_VAR (tosp_dont_fool_with_small_kerns, -1,"Limit use of xht gap with odd small kns");EXTERN double_VAR (tosp_near_lh_edge, 0,"Dont reduce box if the top left is non blank");EXTERN double_VAR (tosp_silly_kn_sp_gap, 0.2,"Dont let sp minus kn get too small");EXTERN double_VAR (tosp_pass_wide_fuzz_sp_to_context, 0.75,"How wide fuzzies need context");#define MAXSPACING 128 /*max expected spacing in pix *//********************************************************************** * to_spacing * * Compute fuzzy word spacing thresholds for each row. * I.e. set : max_nonspace * space_threshold * min_space * kern_size * space_size for each row. * ONLY FOR PROPORTIONAL BLOCKS - FIXED PITCH IS ASSUMED ALREADY DONE **********************************************************************/void to_spacing( //set spacing ICOORD page_tr, //topright of page TO_BLOCK_LIST *blocks //blocks on page ) { TO_BLOCK_IT block_it; //iterator TO_BLOCK *block; //current block; TO_ROW_IT row_it; //row iterator TO_ROW *row; //current row int block_index; //block number int row_index; //row number INT16 block_space_gap_width; //Estimated width of real spaces for whole block //Estimate width ofnon space gaps for whole block INT16 block_non_space_gap_width; //Old fixed/prop result BOOL8 old_text_ord_proportional; GAPMAP *gapmap = NULL; //map of big vert gaps in blk block_it.set_to_list (blocks); block_index = 1; for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { block = block_it.data (); gapmap = new GAPMAP (block); block_spacing_stats(block, gapmap, old_text_ord_proportional, block_space_gap_width, block_non_space_gap_width); row_it.set_to_list (block->get_rows ()); row_index = 1; for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { row = row_it.data (); if ((row->pitch_decision == PITCH_DEF_PROP) || (row->pitch_decision == PITCH_CORR_PROP)) { if ((tosp_debug_level > 0) && !old_text_ord_proportional) tprintf ("Block %d Row %d: Now Proportional\n", block_index, row_index); row_spacing_stats(row, gapmap, block_index, row_index, block_space_gap_width, block_non_space_gap_width); } else { if ((tosp_debug_level > 0) && old_text_ord_proportional) tprintf ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n", block_index, row_index, row->pitch_decision, row->fixed_pitch); }#ifndef GRAPHICS_DISABLED if (textord_show_initial_words) plot_word_decisions (to_win, (INT16) row->fixed_pitch, row);#endif row_index++; } delete gapmap; block_index++; }}/************************************************************************* * block_spacing_stats() *************************************************************************/void block_spacing_stats( //DEBUG USE ONLY TO_BLOCK *block, GAPMAP *gapmap, BOOL8 &old_text_ord_proportional, INT16 &block_space_gap_width, //resulting estimate INT16 &block_non_space_gap_width //resulting estimate ) { TO_ROW_IT row_it; //row iterator TO_ROW *row; //current row BLOBNBOX_IT blob_it; //iterator STATS centre_to_centre_stats (0, MAXSPACING); //DEBUG USE ONLY STATS all_gap_stats (0, MAXSPACING); STATS space_gap_stats (0, MAXSPACING); INT16 minwidth = MAX_INT16; //narrowest blob BOX blob_box; BOX prev_blob_box; INT16 centre_to_centre; INT16 gap_width; float real_space_threshold; float iqr_centre_to_centre; //DEBUG USE ONLY float iqr_all_gap_stats; //DEBUG USE ONLY INT32 end_of_row; INT32 row_length; row_it.set_to_list (block->get_rows ()); for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { row = row_it.data (); if (!row->blob_list ()->empty () && (!tosp_only_use_prop_rows || (row->pitch_decision == PITCH_DEF_PROP) || (row->pitch_decision == PITCH_CORR_PROP))) { blob_it.set_to_list (row->blob_list ()); blob_it.mark_cycle_pt (); end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); if (tosp_use_pre_chopping) blob_box = box_next_pre_chopped (&blob_it); else if (tosp_stats_use_xht_gaps) blob_box = reduced_box_next (row, &blob_it); else blob_box = box_next (&blob_it); row_length = end_of_row - blob_box.left (); if (blob_box.width () < minwidth) minwidth = blob_box.width (); prev_blob_box = blob_box; while (!blob_it.cycled_list ()) { if (tosp_use_pre_chopping) blob_box = box_next_pre_chopped (&blob_it); else if (tosp_stats_use_xht_gaps) blob_box = reduced_box_next (row, &blob_it); else blob_box = box_next (&blob_it); if (blob_box.width () < minwidth) minwidth = blob_box.width (); gap_width = blob_box.left () - prev_blob_box.right (); if (!ignore_big_gap (row, row_length, gapmap, prev_blob_box.right (), blob_box.left ())) { all_gap_stats.add (gap_width, 1); centre_to_centre = (blob_box.left () + blob_box.right () - (prev_blob_box.left () + prev_blob_box.right ())) / 2; //DEBUG centre_to_centre_stats.add (centre_to_centre, 1); // DEBUG } prev_blob_box = blob_box; } } } //Inadequate samples if (all_gap_stats.get_total () <= 1) { block_non_space_gap_width = minwidth; block_space_gap_width = -1; //No est. space width //DEBUG old_text_ord_proportional = TRUE; } else { /* For debug only ..... */ iqr_centre_to_centre = centre_to_centre_stats.ile (0.75) - centre_to_centre_stats.ile (0.25); iqr_all_gap_stats = all_gap_stats.ile (0.75) - all_gap_stats.ile (0.25); old_text_ord_proportional = iqr_centre_to_centre * 2 > iqr_all_gap_stats; /* .......For debug only */ /* The median of the gaps is used as an estimate of the NON-SPACE gap width. This RELIES on the assumption that there are more gaps WITHIN words than BETWEEN words in a block Now try to estimate the width of a real space for all real spaces in the block. Do this by using a crude threshold to ignore "narrow" gaps, then find the median of the "wide" gaps and use this. */ block_non_space_gap_width = (INT16) floor (all_gap_stats.median ()); // median gap row_it.set_to_list (block->get_rows ()); for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { row = row_it.data (); if (!row->blob_list ()->empty () && (!tosp_only_use_prop_rows || (row->pitch_decision == PITCH_DEF_PROP) || (row->pitch_decision == PITCH_CORR_PROP))) { real_space_threshold = MAX (tosp_init_guess_kn_mult * block_non_space_gap_width, tosp_init_guess_xht_mult * row->xheight); blob_it.set_to_list (row->blob_list ()); blob_it.mark_cycle_pt (); end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); if (tosp_use_pre_chopping) blob_box = box_next_pre_chopped (&blob_it); else if (tosp_stats_use_xht_gaps) blob_box = reduced_box_next (row, &blob_it); else blob_box = box_next (&blob_it); row_length = blob_box.left () - end_of_row; prev_blob_box = blob_box; while (!blob_it.cycled_list ()) { if (tosp_use_pre_chopping) blob_box = box_next_pre_chopped (&blob_it); else if (tosp_stats_use_xht_gaps) blob_box = reduced_box_next (row, &blob_it); else blob_box = box_next (&blob_it); gap_width = blob_box.left () - prev_blob_box.right (); if ((gap_width > real_space_threshold) && !ignore_big_gap (row, row_length, gapmap, prev_blob_box.right (), blob_box.left ())) { /* If tosp_use_cert_spaces is enabled, the estimate of the space gap is restricted to obvious spaces - those wider than half the xht or those with wide blobs on both sides - i.e not things that are suspect 1's or punctiation that is sometimes widely spaced. */ if (!tosp_block_use_cert_spaces || (gap_width > tosp_fuzzy_space_factor2 * row->xheight) || ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) && (!tosp_narrow_blobs_not_cert || (!narrow_blob (row, prev_blob_box) && !narrow_blob (row, blob_box)))) || (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box))) space_gap_stats.add (gap_width, 1); } prev_blob_box = blob_box; } } } //Inadequate samples if (space_gap_stats.get_total () <= 2) block_space_gap_width = -1;//No est. space width else block_space_gap_width = MAX ((INT16) floor (space_gap_stats.median ()), 3 * block_non_space_gap_width); }}/************************************************************************* * row_spacing_stats() * Set values for min_space, max_non_space based on row stats only * If failure - return 0 values. *************************************************************************/void row_spacing_stats( //estimate for block TO_ROW *row, GAPMAP *gapmap, INT16 block_idx, INT16 row_idx, INT16 block_space_gap_width, INT16 block_non_space_gap_width //estimate for block ) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -