⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 control.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 4 页
字号:
/****************************************************************** * File:        control.cpp  (Formerly control.c) * Description: Module-independent matcher controller. * Author:					Ray Smith * Created:					Thu Apr 23 11:09:58 BST 1992 * ReHacked:    Tue Sep 22 08:42:49 BST 1992 Phil Cheatle * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/#include          "mfcpch.h"#include          "mainblk.h"#include          <string.h>#include          <math.h>#ifdef __UNIX__#include          <assert.h>#include          <unistd.h>#include                    <errno.h>#endif#include          <ctype.h>#include          "ocrclass.h"#include          "werdit.h"#include          "drawfx.h"#include          "tfacep.h"#include          "tessbox.h"#include          "tessvars.h"//#include                                      "fxtop.h"#include          "pgedit.h"#include          "reject.h"#include          "adaptions.h"#include          "charcut.h"#include          "fixxht.h"#include          "fixspace.h"#include          "genblob.h"#include          "docqual.h"#include          "control.h"#include          "secname.h"#include          "output.h"#include          "callcpp.h"#include          "notdll.h"#include "tordvars.h"#include "adaptmatch.h"#define MIN_FONT_ROW_COUNT  8#define MAX_XHEIGHT_DIFF  3#define EXTERN//extern "C" {//EXTERN BOOL_VAR(tessedit_small_match,FALSE,"Use small matrix matcher");//extern FILE*                          matcher_fp;//extern FILE*                          correct_fp;//};BOOL_VAR (tessedit_small_match, FALSE, "Use small matrix matcher");EXTERN BOOL_VAR (tessedit_print_text, FALSE, "Write text to stdout");EXTERN BOOL_VAR (tessedit_draw_words, FALSE, "Draw source words");EXTERN BOOL_VAR (tessedit_draw_outwords, FALSE, "Draw output words");EXTERN BOOL_VAR (tessedit_training_wiseowl, FALSE, "Call WO to learn blobs");EXTERN BOOL_VAR (tessedit_training_tess, FALSE, "Call Tess to learn blobs");EXTERN BOOL_VAR (tessedit_matcher_is_wiseowl, FALSE, "Call WO to classify");EXTERN BOOL_VAR (tessedit_dump_choices, FALSE, "Dump char choices");EXTERN BOOL_VAR (tessedit_fix_fuzzy_spaces, TRUE,"Try to improve fuzzy spaces");EXTERN BOOL_VAR (tessedit_unrej_any_wd, FALSE,"Dont bother with word plausibility");EXTERN BOOL_VAR (tessedit_fix_hyphens, TRUE, "Crunch double hyphens?");EXTERN BOOL_VAR (tessedit_reject_fullstops, FALSE, "Reject all fullstops");EXTERN BOOL_VAR (tessedit_reject_suspect_fullstops, FALSE,"Reject suspect fullstops");EXTERN BOOL_VAR (tessedit_redo_xheight, TRUE, "Check/Correct x-height");EXTERN BOOL_VAR (tessedit_cluster_adaption_on, TRUE,"Do our own adaption - ems only");EXTERN BOOL_VAR (tessedit_enable_doc_dict, TRUE,"Add words to the document dictionary");EXTERN BOOL_VAR (word_occ_first, FALSE, "Do word occ before re-est xht");EXTERN BOOL_VAR (tessedit_debug_fonts, FALSE, "Output font info per char");EXTERN BOOL_VAR (tessedit_xht_fiddles_on_done_wds, TRUE,"Apply xht fix up even if done");EXTERN BOOL_VAR (tessedit_xht_fiddles_on_no_rej_wds, TRUE,"Apply xht fix up even in no rejects");EXTERN INT_VAR (x_ht_check_word_occ, 2, "Check Char Block occupancy");EXTERN INT_VAR (x_ht_stringency, 1, "How many confirmed a/n to accept?");EXTERN BOOL_VAR (x_ht_quality_check, TRUE, "Dont allow worse quality");EXTERN BOOL_VAR (tessedit_debug_block_rejection, FALSE,"Block and Row stats");EXTERN INT_VAR (debug_x_ht_level, 0, "Reestimate debug");EXTERN BOOL_VAR (rej_use_xht, TRUE, "Individual rejection control");EXTERN BOOL_VAR (debug_acceptable_wds, FALSE, "Dump word pass/fail chk");EXTERN STRING_VAR (chs_leading_punct, "('`\"", "Leading punctuation");EXTERNSTRING_VAR (chs_trailing_punct1, ").,;:?!", "1st Trailing punctuation");EXTERN STRING_VAR (chs_trailing_punct2, ")'`\"","2nd Trailing punctuation");EXTERN double_VAR (quality_rej_pc, 0.08,"good_quality_doc lte rejection limit");EXTERN double_VAR (quality_blob_pc, 0.0,"good_quality_doc gte good blobs limit");EXTERN double_VAR (quality_outline_pc, 1.0,"good_quality_doc lte outline error limit");EXTERN double_VAR (quality_char_pc, 0.95,"good_quality_doc gte good char limit");EXTERN INT_VAR (quality_min_initial_alphas_reqd, 2,"alphas in a good word");EXTERN BOOL_VAR (tessedit_tess_adapt_to_rejmap, FALSE,"Use reject map to control Tesseract adaption");EXTERN INT_VAR (tessedit_tess_adaption_mode, 0x27,"Adaptation decision algorithm for tess");EXTERN INT_VAR (tessedit_em_adaption_mode, 0,"Adaptation decision algorithm for ems matrix matcher");EXTERN BOOL_VAR (tessedit_cluster_adapt_after_pass1, FALSE,"Adapt using clusterer after pass 1");EXTERN BOOL_VAR (tessedit_cluster_adapt_after_pass2, FALSE,"Adapt using clusterer after pass 1");EXTERN BOOL_VAR (tessedit_cluster_adapt_after_pass3, FALSE,"Adapt using clusterer after pass 1");EXTERN BOOL_VAR (tessedit_cluster_adapt_before_pass1, FALSE,"Adapt using clusterer before Tess adaping during pass 1");EXTERN INT_VAR (tessedit_cluster_adaption_mode, 0,"Adaptation decision algorithm for matrix matcher");EXTERN BOOL_VAR (tessedit_adaption_debug, FALSE,"Generate and print debug information for adaption");EXTERN BOOL_VAR (tessedit_minimal_rej_pass1, FALSE,"Do minimal rejection on pass 1 output");EXTERN BOOL_VAR (tessedit_test_adaption, FALSE,"Test adaption criteria");EXTERN BOOL_VAR (tessedit_global_adaption, FALSE,"Adapt to all docs over time");EXTERN BOOL_VAR (tessedit_matcher_log, FALSE, "Log matcher activity");EXTERN INT_VAR (tessedit_test_adaption_mode, 3,"Adaptation decision algorithm for tess");EXTERN BOOL_VAR (test_pt, FALSE, "Test for point");EXTERN double_VAR (test_pt_x, 99999.99, "xcoord");EXTERN double_VAR (test_pt_y, 99999.99, "ycoord");extern int MatcherDebugLevel;extern int display_ratings;extern int number_debug;extern int adjust_debug;/*extern "C" {  extern int 	MatcherDebugLevel;  extern int 	display_ratings;  extern int	number_debug;  extern int	adjust_debug;//	extern int 	LearningDebugLevel; };*/FILE *choice_file = NULL;        //Choice file ptrCLISTIZEH (PBLOB) CLISTIZE (PBLOB)/* DEBUGGING */INT16 blob_count(WERD *w) {  return w->blob_list ()->length ();}/********************************************************************** * recog_pseudo_word * * Make a word from the selected blobs and run Tess on them. **********************************************************************/void recog_pseudo_word(                         //recognize blobs                       BLOCK_LIST *block_list,  //blocks to check                       BOX &selection_box) {  WERD *word;  ROW *pseudo_row;               //row of word  BLOCK *pseudo_block;           //block of word  word = make_pseudo_word (block_list, selection_box,    pseudo_block, pseudo_row);  if (word != NULL) {    recog_interactive(pseudo_block, pseudo_row, word);    delete word;  }}/********************************************************************** * recog_interactive * * Recognize a single word in interactive mode. **********************************************************************/BOOL8 recog_interactive(            //recognize blobs                        BLOCK *,    //block                        ROW *row,   //row of word                        WERD *word  //word to recognize                       ) {  WERD_RES word_res(word);  INT16 char_qual;  INT16 good_char_qual;  classify_word_pass2(&word_res, row);  #ifndef SECURE_NAMES  if (tessedit_debug_quality_metrics) {    word_char_quality(&word_res, row, &char_qual, &good_char_qual);    tprintf      ("\n%d chars;  word_blob_quality: %d;  outline_errs: %d; char_quality: %d; good_char_quality: %d\n",      word_res.reject_map.length (), word_blob_quality (&word_res, row),      word_outline_errs (&word_res), char_qual, good_char_qual);  }  #endif  return TRUE;}/********************************************************************** * recog_all_words() * * Walk the current block list applying the specified word processor function * to all words **********************************************************************/void recog_all_words(                              //process words                     PAGE_RES *page_res,           //page structure                     volatile ETEXT_DESC *monitor  //progress monitor                    ) {                                 //reset page iterator  PAGE_RES_IT page_res_it(page_res);  INT16 chars_in_word;  INT16 rejects_in_word;  CHAR_SAMPLES_LIST em_clusters;  CHAR_SAMPLE_LIST ems_waiting;  CHAR_SAMPLES_LIST char_clusters;  CHAR_SAMPLE_LIST chars_waiting;  INT16 blob_quality = 0;  INT16 outline_errs = 0;  INT16 doc_blob_quality = 0;  INT16 doc_outline_errs = 0;  INT16 doc_char_quality = 0;  INT16 all_char_quality;  INT16 accepted_all_char_quality;  INT16 good_char_count = 0;  INT16 doc_good_char_quality = 0;  const STRING *wordstr;  const char *text;  int i;  BOOL8 good_quality_doc;  UINT8 permuter_type;  INT32 tess_adapt_mode = 0;  INT32 word_count;              //count of words in doc  INT32 word_index;              //current word  if (tessedit_minimal_rej_pass1) {    tessedit_test_adaption.set_value (TRUE);    tessedit_minimal_rejection.set_value (TRUE);  }  if (tessedit_cluster_adapt_before_pass1) {    tess_adapt_mode = tessedit_tess_adaption_mode;    tessedit_tess_adaption_mode.set_value (0);    tessedit_tess_adapt_to_rejmap.set_value (TRUE);  }  /* Pass 1 */  word_count = 0;  if (monitor != NULL) {    monitor->ocr_alive = TRUE;    while (page_res_it.word () != NULL) {      word_count++;      page_res_it.forward ();    }    page_res_it.restart_page ();  }  else    word_count = 1;  word_index = 0;  int dict_words = 0;  while (page_res_it.word () != NULL) {    set_global_loc_code(LOC_PASS1);    word_index++;    if (monitor != NULL) {      monitor->ocr_alive = TRUE;      monitor->progress = 30 + 50 * word_index / word_count;      if ((monitor->end_time != 0 && clock() > monitor->end_time) ||          (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,                                                         dict_words)))        return;    }    classify_word_pass1 (page_res_it.word (),      page_res_it.row ()->row, FALSE, NULL, NULL);    if (tessedit_test_adaption && !tessedit_minimal_rejection) {      if (!word_adaptable (page_res_it.word (),        tessedit_test_adaption_mode))        page_res_it.word ()->reject_map.rej_word_tess_failure ();      //FAKE PERM REJ      else {        wordstr = &(page_res_it.word ()->best_choice->string ());        /* Override rejection mechanisms for this word */        text = wordstr->string ();        for (i = 0; text[i] != '\0'; i++) {          if ((text[i] != ' ')            && page_res_it.word ()->reject_map[i].rejected ())            page_res_it.word ()->reject_map[i].              setrej_minimal_rej_accept();        }      }    }    if ((tessedit_cluster_adapt_after_pass1      || tessedit_cluster_adapt_after_pass3      || tessedit_cluster_adapt_before_pass1)    && tessedit_cluster_adaption_mode != 0) {      collect_characters_for_adaption (page_res_it.word (),        &char_clusters, &chars_waiting);    }    // Count dict words.    if (page_res_it.word()->best_choice->permuter() == USER_DAWG_PERM)      ++dict_words;    page_res_it.forward ();  }  if (tessedit_cluster_adapt_before_pass1)    tessedit_tess_adaption_mode.set_value (tess_adapt_mode);  page_res_it.restart_page ();  while ((tessedit_cluster_adapt_after_pass1    || tessedit_cluster_adapt_before_pass1)  && page_res_it.word () != NULL) {    if (monitor != NULL)      monitor->ocr_alive = TRUE;    if (tessedit_cluster_adapt_after_pass1)      adapt_to_good_samples (page_res_it.word (),        &char_clusters, &chars_waiting);    else      classify_word_pass1 (page_res_it.word (),        page_res_it.row ()->row,        TRUE, &char_clusters, &chars_waiting);    page_res_it.forward ();  }  /* Pass 2 */  page_res_it.restart_page ();  word_index = 0;  while (!tessedit_test_adaption && page_res_it.word () != NULL) {    set_global_loc_code(LOC_PASS2);    word_index++;    if (monitor != NULL) {      monitor->ocr_alive = TRUE;      monitor->progress = 80 + 10 * word_index / word_count;      if ((monitor->end_time != 0 && clock() > monitor->end_time) ||          (monitor->cancel != NULL && (*monitor->cancel)(monitor->cancel_this,                                                         dict_words)))        return;    }    classify_word_pass2 (page_res_it.word (), page_res_it.row ()->row);    if (tessedit_em_adaption_mode > 0)      collect_ems_for_adaption (page_res_it.word (),        &em_clusters, &ems_waiting);    if (tessedit_cluster_adapt_after_pass2      && tessedit_cluster_adaption_mode != 0)      collect_characters_for_adaption (page_res_it.word (),        &char_clusters, &chars_waiting);    page_res_it.forward ();  }  /* Another pass */  set_global_loc_code(LOC_FUZZY_SPACE);  if (!tessedit_test_adaption && tessedit_fix_fuzzy_spaces    && !tessedit_word_for_word)    fix_fuzzy_spaces(monitor, word_count, page_res);  if (!tessedit_test_adaption && tessedit_em_adaption_mode != 0)                                 // Initially ems only    print_em_stats(&em_clusters, &ems_waiting);  /* Pass 3 - used for checking confusion sets */  page_res_it.restart_page ();  word_index = 0;  while (!tessedit_test_adaption && page_res_it.word () != NULL) {    set_global_loc_code(LOC_MM_ADAPT);    word_index++;    if (monitor != NULL) {      monitor->ocr_alive = TRUE;      monitor->progress = 95 + 5 * word_index / word_count;    }    check_debug_pt (page_res_it.word (), 70);    /* Use good matches to sort out confusions */    if (tessedit_em_adaption_mode != 0)      adapt_to_good_ems (page_res_it.word (), &em_clusters, &ems_waiting);    if (tessedit_cluster_adapt_after_pass2      && tessedit_cluster_adaption_mode != 0)      adapt_to_good_samples (page_res_it.word (),        &char_clusters, &chars_waiting);    if (tessedit_reject_fullstops      && strchr (page_res_it.word ()->best_choice->string ().string (),      '.') != NULL)      reject_all_fullstops (page_res_it.word ());    else if (tessedit_reject_suspect_fullstops      && strchr (page_res_it.word ()->best_choice->string ().      string (), '.') != NULL)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -