⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 topitch.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 5 页
字号:
/********************************************************************** * File:        topitch.cpp  (Formerly to_pitch.c) * Description: Code to determine fixed pitchness and the pitch if fixed. * Author:		Ray Smith * Created:		Tue Aug 24 16:57:29 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/#include "mfcpch.h"#ifdef __UNIX__#include          <assert.h>#endif#include          "stderr.h"#include          "blobbox.h"#include          "lmedsq.h"#include          "statistc.h"#include          "drawtord.h"#include          "makerow.h"#include          "pitsync1.h"#include          "pithsync.h"#include          "blobcmpl.h"#include          "tovars.h"#include          "wordseg.h"#include          "topitch.h"#include          "secname.h"#define EXTERNEXTERN BOOL_VAR (textord_all_prop, FALSE, "All doc is proportial text");EXTERN BOOL_VAR (textord_debug_pitch_test, FALSE,"Debug on fixed pitch test");EXTERN BOOL_VAR (textord_disable_pitch_test, FALSE,"Turn off dp fixed pitch algorithm");EXTERN BOOL_VAR (textord_fast_pitch_test, FALSE,"Do even faster pitch algorithm");EXTERN BOOL_VAR (textord_debug_pitch_metric, FALSE,"Write full metric stuff");EXTERN BOOL_VAR (textord_show_row_cuts, FALSE, "Draw row-level cuts");EXTERN BOOL_VAR (textord_show_page_cuts, FALSE, "Draw page-level cuts");EXTERN BOOL_VAR (textord_pitch_cheat, FALSE,"Use correct answer for fixed/prop");EXTERN BOOL_VAR (textord_blockndoc_fixed, FALSE,"Attempt whole doc/block fixed pitch");EXTERN double_VAR (textord_projection_scale, 0.200, "Ding rate for mid-cuts");EXTERN double_VAR (textord_balance_factor, 1.0,"Ding rate for unbalanced char cells");EXTERN double_VAR (textord_repch_width_variance, 0.2,"Max width change of gap/blob");#define FIXED_WIDTH_MULTIPLE  5#define BLOCK_STATS_CLUSTERS  10#define MAX_ALLOWED_PITCH 100    //max pixel pitch./********************************************************************** * compute_fixed_pitch * * Decide whether each row is fixed pitch individually. * Correlate definite and uncertain results to obtain an individual * result for each row in the TO_ROW class. **********************************************************************/void compute_fixed_pitch(                             //determine pitch                         ICOORD page_tr,              //top right                         TO_BLOCK_LIST *port_blocks,  //input list                         float gradient,              //page skew                         FCOORD rotation,             //for drawing                         BOOL8 testing_on             //correct orientation                        ) {  TO_BLOCK_IT block_it;          //iterator  TO_BLOCK *block;               //current block;  TO_ROW_IT row_it;              //row iterator  TO_ROW *row;                   //current row  int block_index;               //block number  int row_index;                 //row number#ifndef GRAPHICS_DISABLED  if (textord_show_initial_words && testing_on) {    if (to_win == NO_WINDOW)      create_to_win(page_tr);  }#endif  block_it.set_to_list (port_blocks);  block_index = 1;  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();  block_it.forward ()) {    block = block_it.data ();    compute_block_pitch(block, rotation, block_index, testing_on);    block_index++;  }  if (!try_doc_fixed (page_tr, port_blocks, gradient)) {    block_index = 1;    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();    block_it.forward ()) {      block = block_it.data ();      if (!try_block_fixed (block, block_index))        try_rows_fixed(block, block_index, testing_on);      block_index++;    }  }  block_index = 1;  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();  block_it.forward ()) {    block = block_it.data ();    row_it.set_to_list (block->get_rows ());    row_index = 1;    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {      row = row_it.data ();      fix_row_pitch(row, block, port_blocks, row_index, block_index);      row_index++;    }    if (testing_on      && (textord_debug_pitch_test && block->block->text_region () != NULL    || textord_blocksall_fixed || textord_blocksall_prop)) {      tprintf ("Corr:");      print_block_counts(block, block_index);    }    block_index++;  }#ifndef GRAPHICS_DISABLED  if (textord_show_initial_words && testing_on) {    overlap_picture_ops(TRUE);  }#endif}/********************************************************************** * fix_row_pitch * * Get a pitch_decision for this row by voting among similar rows in the * block, then similar rows over all the page, or any other rows at all. **********************************************************************/void fix_row_pitch(                        //get some value                   TO_ROW *bad_row,        //row to fix                   TO_BLOCK *bad_block,    //block of bad_row                   TO_BLOCK_LIST *blocks,  //blocks to scan                   INT32 row_target,       //number of row                   INT32 block_target      //number of block                  ) {  const char *res_string;        //decision on line  INT16 mid_cuts;  int block_votes;               //votes in block  int like_votes;                //votes over page  int other_votes;               //votes of unlike blocks  int block_index;               //number of block  int row_index;                 //number of row  int maxwidth;                  //max pitch  TO_BLOCK_IT block_it = blocks; //block iterator  TO_ROW_IT row_it;  TO_BLOCK *block;               //current block  TO_ROW *row;                   //current row  float sp_sd;                   //space deviation  STATS block_stats;             //pitches in block  STATS like_stats;              //pitches in page  block_votes = like_votes = other_votes = 0;  maxwidth = (INT32) ceil (bad_row->xheight * textord_words_maxspace);  if (bad_row->pitch_decision != PITCH_DEF_FIXED  && bad_row->pitch_decision != PITCH_DEF_PROP) {    block_stats.set_range (0, maxwidth);    like_stats.set_range (0, maxwidth);    block_index = 1;    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();    block_it.forward ()) {      block = block_it.data ();      row_index = 1;      row_it.set_to_list (block->get_rows ());      for (row_it.mark_cycle_pt (); !row_it.cycled_list ();      row_it.forward ()) {        row = row_it.data ();        if (bad_row->all_caps          && row->xheight + row->ascrise          <          (bad_row->xheight + bad_row->ascrise) * (1 +          textord_pitch_rowsimilarity)          && row->xheight + row->ascrise >          (bad_row->xheight + bad_row->ascrise) * (1 -          textord_pitch_rowsimilarity)          || !bad_row->all_caps          && row->xheight <          bad_row->xheight * (1 + textord_pitch_rowsimilarity)          && row->xheight >        bad_row->xheight * (1 - textord_pitch_rowsimilarity)) {          if (block_index == block_target) {            if (row->pitch_decision == PITCH_DEF_FIXED) {              block_votes += textord_words_veto_power;              block_stats.add ((INT32) row->fixed_pitch,                textord_words_veto_power);            }            else if (row->pitch_decision == PITCH_MAYBE_FIXED            || row->pitch_decision == PITCH_CORR_FIXED) {              block_votes++;              block_stats.add ((INT32) row->fixed_pitch, 1);            }            else if (row->pitch_decision == PITCH_DEF_PROP)              block_votes -= textord_words_veto_power;            else if (row->pitch_decision == PITCH_MAYBE_PROP              || row->pitch_decision == PITCH_CORR_PROP)              block_votes--;          }          else {            if (row->pitch_decision == PITCH_DEF_FIXED) {              like_votes += textord_words_veto_power;              like_stats.add ((INT32) row->fixed_pitch,                textord_words_veto_power);            }            else if (row->pitch_decision == PITCH_MAYBE_FIXED            || row->pitch_decision == PITCH_CORR_FIXED) {              like_votes++;              like_stats.add ((INT32) row->fixed_pitch, 1);            }            else if (row->pitch_decision == PITCH_DEF_PROP)              like_votes -= textord_words_veto_power;            else if (row->pitch_decision == PITCH_MAYBE_PROP              || row->pitch_decision == PITCH_CORR_PROP)              like_votes--;          }        }        else {          if (row->pitch_decision == PITCH_DEF_FIXED)            other_votes += textord_words_veto_power;          else if (row->pitch_decision == PITCH_MAYBE_FIXED            || row->pitch_decision == PITCH_CORR_FIXED)            other_votes++;          else if (row->pitch_decision == PITCH_DEF_PROP)            other_votes -= textord_words_veto_power;          else if (row->pitch_decision == PITCH_MAYBE_PROP            || row->pitch_decision == PITCH_CORR_PROP)            other_votes--;        }        row_index++;      }      block_index++;    }    if (block_votes > textord_words_veto_power) {      bad_row->fixed_pitch = block_stats.ile (0.5);      bad_row->pitch_decision = PITCH_CORR_FIXED;    }    else if (block_votes <= textord_words_veto_power && like_votes > 0) {      bad_row->fixed_pitch = like_stats.ile (0.5);      bad_row->pitch_decision = PITCH_CORR_FIXED;    }    else {      bad_row->pitch_decision = PITCH_CORR_PROP;      #ifndef SECURE_NAMES      if (block_votes == 0 && like_votes == 0 && other_votes > 0        && (textord_debug_pitch_test || textord_debug_pitch_metric))        tprintf          ("Warning:row %d of block %d set prop with no like rows against trend\n",          row_target, block_target);      #endif    }  }  if (textord_debug_pitch_metric) {    tprintf (":b_votes=%d:l_votes=%d:o_votes=%d",      block_votes, like_votes, other_votes);    if (bad_row->pitch_decision == PITCH_CORR_PROP    || bad_row->pitch_decision == PITCH_DEF_PROP) {      res_string = bad_block->block->text_region () != NULL ?        (bad_block->block->text_region ()->        is_prop ()? "CP" : "WP") : "XP";    }    else {      res_string = bad_block->block->text_region () != NULL ?        (bad_block->block->text_region ()->        is_prop ()? "WF" : "CF") : "XF";    }    tprintf (":Blk=%d:Row=%d:%c:",      block_target, row_target,      bad_block->block->text_region () != NULL ?      (bad_block->block->text_region ()->      is_prop ()? 'P' : 'F') : 'X');    tprintf ("x=%g:asc=%g:corr_res=%s\n", bad_row->xheight,      bad_row->ascrise, res_string);  }  if (textord_pitch_cheat && bad_block->block->text_region () != NULL)    bad_row->pitch_decision =      bad_block->block->text_region ()->      is_prop ()? PITCH_CORR_PROP : PITCH_CORR_FIXED;  if (bad_row->pitch_decision == PITCH_CORR_FIXED) {    if (bad_row->fixed_pitch < textord_min_xheight) {      if (block_votes > 0)        bad_row->fixed_pitch = block_stats.ile (0.5);      else if (block_votes == 0 && like_votes > 0)        bad_row->fixed_pitch = like_stats.ile (0.5);      else {        tprintf          ("Warning:guessing pitch as xheight on row %d, block %d\n",          row_target, block_target);        bad_row->fixed_pitch = bad_row->xheight;      }    }    if (bad_row->fixed_pitch < textord_min_xheight)      bad_row->fixed_pitch = (float) textord_min_xheight;    bad_row->kern_size = bad_row->fixed_pitch / 4;    bad_row->min_space = (INT32) (bad_row->fixed_pitch * 0.6);    bad_row->max_nonspace = (INT32) (bad_row->fixed_pitch * 0.4);    bad_row->space_threshold =      (bad_row->min_space + bad_row->max_nonspace) / 2;    bad_row->space_size = bad_row->fixed_pitch;    if (bad_row->char_cells.empty ())      tune_row_pitch (bad_row, &bad_row->projection,        bad_row->projection_left, bad_row->projection_right,        (bad_row->fixed_pitch +        bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,        sp_sd, mid_cuts, &bad_row->char_cells, FALSE);  }  else if (bad_row->pitch_decision == PITCH_CORR_PROP  || bad_row->pitch_decision == PITCH_DEF_PROP) {    bad_row->fixed_pitch = 0.0f;    bad_row->char_cells.clear ();  }}/********************************************************************** * compute_block_pitch * * Decide whether each block is fixed pitch individually. **********************************************************************/void compute_block_pitch(                    //process each block                         TO_BLOCK *block,    //input list                         FCOORD rotation,    //for drawing                         INT32 block_index,  //block number                         BOOL8 testing_on    //correct orientation                        ) {  BOX block_box;                 //bounding box  block_box = block->block->bounding_box ();  if (testing_on && textord_debug_pitch_test) {    tprintf ("Block %d at (%d,%d)->(%d,%d)\n",      block_index,      block_box.left (), block_box.bottom (),      block_box.right (), block_box.top ());  }  block->min_space = (INT32) floor (block->xheight    * textord_words_default_minspace);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -