⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 reject.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 4 页
字号:
/********************************************************************** * File:        reject.cpp  (Formerly reject.c) * Description: Rejection functions used in tessedit * Author:		Phil Cheatle * Created:		Wed Sep 23 16:50:21 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/#include "mfcpch.h"#include          "tessvars.h"#ifdef __UNIX__#include          <assert.h>#include          <errno.h>#endif#include          "scanutils.h"#include          <ctype.h>#include          <string.h>//#include                                      "tessbox.h"#include          "memry.h"#include          "reject.h"#include          "tfacep.h"#include          "mainblk.h"#include          "charcut.h"#include          "imgs.h"#include          "scaleimg.h"#include          "control.h"#include          "docqual.h"#include          "secname.h"/* #define SECURE_NAMES done in secnames.h when necessary *///extern "C" {#include          "callnet.h"//}#include          "notdll.h"CLISTIZEH (STRING) CLISTIZE (STRING)#define EXTERNEXTERNINT_VAR (tessedit_reject_mode, 0, "Rejection algorithm");EXTERNINT_VAR (tessedit_ok_mode, 5, "Acceptance decision algorithm");EXTERNBOOL_VAR (tessedit_use_nn, FALSE, "");EXTERNBOOL_VAR (tessedit_rejection_debug, FALSE, "Adaption debug");EXTERNBOOL_VAR (tessedit_rejection_stats, FALSE, "Show NN stats");EXTERNBOOL_VAR (tessedit_flip_0O, TRUE, "Contextual 0O O0 flips");EXTERNdouble_VAR (tessedit_lower_flip_hyphen, 1.5,"Aspect ratio dot/hyphen test");EXTERNdouble_VAR (tessedit_upper_flip_hyphen, 1.8,"Aspect ratio dot/hyphen test");EXTERNBOOL_VAR (rej_trust_doc_dawg, FALSE,"Use DOC dawg in 11l conf. detector");EXTERNBOOL_VAR (rej_1Il_use_dict_word, FALSE, "Use dictword test");EXTERNBOOL_VAR (rej_1Il_trust_permuter_type, TRUE, "Dont double check");EXTERNBOOL_VAR (one_ell_conflict_default, TRUE, "one_ell_conflict default");EXTERNBOOL_VAR (show_char_clipping, FALSE, "Show clip image window?");EXTERNBOOL_VAR (nn_debug, FALSE, "NN DEBUGGING?");EXTERNBOOL_VAR (nn_reject_debug, FALSE, "NN DEBUG each char?");EXTERNBOOL_VAR (nn_lax, FALSE, "Use 2nd rate matches");EXTERNBOOL_VAR (nn_double_check_dict, FALSE, "Double check");EXTERNBOOL_VAR (nn_conf_double_check_dict, TRUE,"Double check for confusions");EXTERNBOOL_VAR (nn_conf_1Il, TRUE, "NN use 1Il conflicts");EXTERNBOOL_VAR (nn_conf_Ss, TRUE, "NN use Ss conflicts");EXTERNBOOL_VAR (nn_conf_hyphen, TRUE, "NN hyphen conflicts");EXTERNBOOL_VAR (nn_conf_test_good_qual, FALSE, "NN dodgy 1Il cross check");EXTERNBOOL_VAR (nn_conf_test_dict, TRUE, "NN dodgy 1Il cross check");EXTERNBOOL_VAR (nn_conf_test_sensible, TRUE, "NN dodgy 1Il cross check");EXTERNBOOL_VAR (nn_conf_strict_on_dodgy_chs, TRUE,"Require stronger NN match");EXTERNdouble_VAR (nn_dodgy_char_threshold, 0.99, "min accept score");EXTERNINT_VAR (nn_conf_accept_level, 4, "NN accept dodgy 1Il matches? ");EXTERNINT_VAR (nn_conf_initial_i_level, 3,"NN accept initial Ii match level ");EXTERNBOOL_VAR (no_unrej_dubious_chars, TRUE, "Dubious chars next to reject?");EXTERNBOOL_VAR (no_unrej_no_alphanum_wds, TRUE, "Stop unrej of non A/N wds?");EXTERNBOOL_VAR (no_unrej_1Il, FALSE, "Stop unrej of 1Ilchars?");EXTERNBOOL_VAR (rej_use_tess_accepted, TRUE, "Individual rejection control");EXTERNBOOL_VAR (rej_use_tess_blanks, TRUE, "Individual rejection control");EXTERNBOOL_VAR (rej_use_good_perm, TRUE, "Individual rejection control");EXTERNBOOL_VAR (rej_use_sensible_wd, FALSE, "Extend permuter check");EXTERNBOOL_VAR (rej_alphas_in_number_perm, FALSE, "Extend permuter check");EXTERNdouble_VAR (rej_whole_of_mostly_reject_word_fract, 0.85,"if >this fract");EXTERNINT_VAR (rej_mostly_reject_mode, 1,"0-never, 1-afterNN, 2-after new xht");EXTERNdouble_VAR (tessed_fullstop_aspect_ratio, 1.2,"if >this fract then reject");EXTERNINT_VAR (net_image_width, 40, "NN input image width");EXTERNINT_VAR (net_image_height, 36, "NN input image height");EXTERNINT_VAR (net_image_x_height, 22, "NN input image x_height");EXTERNINT_VAR (tessedit_image_border, 2, "Rej blbs near image edge limit");/*  Net input is assumed to have (net_image_width * net_image_height) input  units of image pixels, followed by 0, 1, or N units representing the  baseline position. 0 implies no baseline information. 1 implies a floating  point value. N implies a "guage" of N units. For any char an initial set  of these are ON, the remainder OFF to indicate the "level" of the  baseline.  HOWEVER!!!  NOTE THAT EACH NEW INPUT LAYER FORMAT EXPECTS TO BE RUN WITH A  DIFFERENT tessed/netmatch/nmatch.c MODULE. - These are classic C modules  generated by aspirin with HARD CODED CONSTANTS*/EXTERNINT_VAR (net_bl_nodes, 20, "Number of baseline nodes");EXTERNdouble_VAR (nn_reject_threshold, 0.5, "NN min accept score");EXTERNdouble_VAR (nn_reject_head_and_shoulders, 0.6, "top scores sep factor");/* NOTE - ctoh doesn't handle "=" properly, hence \075 */EXTERNSTRING_VAR (ok_single_ch_non_alphanum_wds, "-?\075","Allow NN to unrej");EXTERNSTRING_VAR (ok_repeated_ch_non_alphanum_wds, "-?*\075","Allow NN to unrej");EXTERNSTRING_VAR (conflict_set_I_l_1, "Il1[]", "Il1 conflict set");EXTERNSTRING_VAR (conflict_set_S_s, "Ss$", "Ss conflict set");EXTERNSTRING_VAR (conflict_set_hyphen, "-_~", "hyphen conflict set");EXTERNSTRING_VAR (dubious_chars_left_of_reject, "!'+`()-./\\<>;:^_,~\"","Unreliable chars");EXTERNSTRING_VAR (dubious_chars_right_of_reject, "!'+`()-./\\<>;:^_,~\"","Unreliable chars");EXTERNINT_VAR (min_sane_x_ht_pixels, 8, "Reject any x-ht lt or eq than this");/************************************************************************* * set_done() * * Set the done flag based on the word acceptability criteria *************************************************************************/void set_done(  //set done flag              WERD_RES *word,              INT16 pass) {  /*  0: Original heuristic used in Tesseract and Ray's prototype Resaljet  */  if (tessedit_ok_mode == 0) {    /* NOTE - done even if word contains some or all spaces !!! */    word->done = word->tess_accepted;  }  /*  1: Reject words containing blanks and on pass 1 reject I/l/1 conflicts  */  else if (tessedit_ok_mode == 1) {    word->done = word->tess_accepted &&      (strchr (word->best_choice->string ().string (), ' ') == NULL);    if (word->done && (pass == 1) && one_ell_conflict (word, FALSE))      word->done = FALSE;  }  /*  2: as 1 + only accept dict words or numerics in pass 1  */  else if (tessedit_ok_mode == 2) {    word->done = word->tess_accepted &&      (strchr (word->best_choice->string ().string (), ' ') == NULL);    if (word->done && (pass == 1) && one_ell_conflict (word, FALSE))      word->done = FALSE;    if (word->done &&      (pass == 1) &&      (word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&      (word->best_choice->permuter () != FREQ_DAWG_PERM) &&      (word->best_choice->permuter () != USER_DAWG_PERM) &&    (word->best_choice->permuter () != NUMBER_PERM)) {      #ifndef SECURE_NAMES      if (tessedit_rejection_debug)        tprintf ("\nVETO Tess accepting poor word \"%s\"\n",          word->best_choice->string ().string ());      #endif      word->done = FALSE;    }  }  /*  3: as 2 + only accept dict words or numerics in pass 2 as well  */  else if (tessedit_ok_mode == 3) {    word->done = word->tess_accepted &&      (strchr (word->best_choice->string ().string (), ' ') == NULL);    if (word->done && (pass == 1) && one_ell_conflict (word, FALSE))      word->done = FALSE;    if (word->done &&      (word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&      (word->best_choice->permuter () != FREQ_DAWG_PERM) &&      (word->best_choice->permuter () != USER_DAWG_PERM) &&    (word->best_choice->permuter () != NUMBER_PERM)) {      #ifndef SECURE_NAMES      if (tessedit_rejection_debug)        tprintf ("\nVETO Tess accepting poor word \"%s\"\n",          word->best_choice->string ().string ());      #endif      word->done = FALSE;    }  }  /*  4: as 2 + reject dict ambigs in pass 1  */  else if (tessedit_ok_mode == 4) {    word->done = word->tess_accepted &&      (strchr (word->best_choice->string ().string (), ' ') == NULL);    if (word->done && (pass == 1) && one_ell_conflict (word, FALSE))      word->done = FALSE;    if (word->done &&      (pass == 1) &&      ((word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&      (word->best_choice->permuter () != FREQ_DAWG_PERM) &&      (word->best_choice->permuter () != USER_DAWG_PERM) &&      (word->best_choice->permuter () != NUMBER_PERM)) ||    (test_ambig_word (word))) {      #ifndef SECURE_NAMES      if (tessedit_rejection_debug)        tprintf ("\nVETO Tess accepting poor word \"%s\"\n",          word->best_choice->string ().string ());      #endif      word->done = FALSE;    }  }  /*  5: as 3 + reject dict ambigs in both passes  */  else if (tessedit_ok_mode == 5) {    word->done = word->tess_accepted &&      (strchr (word->best_choice->string ().string (), ' ') == NULL);    if (word->done && (pass == 1) && one_ell_conflict (word, FALSE))      word->done = FALSE;    if (word->done &&      ((word->best_choice->permuter () != SYSTEM_DAWG_PERM) &&      (word->best_choice->permuter () != FREQ_DAWG_PERM) &&      (word->best_choice->permuter () != USER_DAWG_PERM) &&      (word->best_choice->permuter () != NUMBER_PERM)) ||    (test_ambig_word (word))) {      #ifndef SECURE_NAMES      if (tessedit_rejection_debug)        tprintf ("\nVETO Tess accepting poor word \"%s\"\n",          word->best_choice->string ().string ());      #endif      word->done = FALSE;    }  }  else {    tprintf ("BAD tessedit_ok_mode\n");    err_exit();  }}/************************************************************************* * make_reject_map() * * Sets the done flag to indicate whether the resylt is acceptable. * * Sets a reject map for the word. *************************************************************************/void make_reject_map(            //make rej map for wd //detailed results                     WERD_RES *word,                     BLOB_CHOICE_LIST_CLIST *blob_choices,                     ROW *row,                     INT16 pass  //1st or 2nd?                    ) {  INT16 i;  flip_0O(word);  check_debug_pt (word, -1);     //For trap only  set_done(word, pass);  //Set acceptance  word->reject_map.initialise (word->best_choice->string ().length ());  reject_blanks(word);  /*  0: Rays original heuristic - the baseline  */  if (tessedit_reject_mode == 0) {    if (!word->done)      reject_poor_matches(word, blob_choices);  }  /*  5: Reject I/1/l from words where there is no strong contextual confirmation;    the whole of any unacceptable words (incl PERM rej of dubious 1/I/ls);    and the whole of any words which are very small  */  else if (tessedit_reject_mode == 5) {    if (bln_x_height / word->denorm.scale () <= min_sane_x_ht_pixels)      word->reject_map.rej_word_small_xht ();    else {      one_ell_conflict(word, TRUE);      /*        Originally the code here just used the done flag. Now I have duplicated        and unpacked the conditions for setting the done flag so that each        mechanism can be turned on or off independently. This works WITHOUT        affecting the done flag setting.      */      if (rej_use_tess_accepted && !word->tess_accepted)        word->reject_map.rej_word_not_tess_accepted ();      if (rej_use_tess_blanks &&        (strchr (word->best_choice->string ().string (), ' ') != NULL))        word->reject_map.rej_word_contains_blanks ();      if (rej_use_good_perm) {        if (((word->best_choice->permuter () == SYSTEM_DAWG_PERM) ||          (word->best_choice->permuter () == FREQ_DAWG_PERM) ||          (word->best_choice->permuter () == USER_DAWG_PERM)) &&          (!rej_use_sensible_wd ||          (acceptable_word_string          (word->best_choice->string ().string ()) !=        AC_UNACCEPTABLE))) {          //PASSED TEST        }        else if (word->best_choice->permuter () == NUMBER_PERM) {          if (rej_alphas_in_number_perm) {            for (i = 0; word->best_choice->string ()[i] != '\0';            i++) {              if (word->reject_map[i].accepted () &&                isalpha (word->best_choice->string ()[i]))                word->reject_map[i].setrej_bad_permuter ();              //rej alpha            }          }        }        else {          word->reject_map.rej_word_bad_permuter ();        }      }      /* Ambig word rejection was here once !!*/    }  }  else {    tprintf ("BAD tessedit_reject_mode\n");    err_exit();  }  if (tessedit_image_border > -1)    reject_edge_blobs(word);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -