⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tordmain.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
/********************************************************************** * File:        tordmain.cpp  (Formerly textordp.c) * Description: C++ top level textord code. * Author:					Ray Smith * Created:					Tue Jul 28 17:12:33 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/#include "mfcpch.h"#ifdef __UNIX__#include          <assert.h>#endif#include          "stderr.h"#include          "globaloc.h"#include          "tessout.h"#include          "blread.h"#include          "blobbox.h"//#include                                      "lmedsq.h"#include          "edgblob.h"//#include                                      "adthsh.h"#include          "drawtord.h"#include          "makerow.h"#include          "wordseg.h"#include          "ocrclass.h"#include          "genblob.h"#include          "imgs.h"//#include                                      "bairdskw.h"#include          "tordmain.h"#include          "secname.h"const ERRCODE BLOCKLESS_BLOBS = "Warning:some blobs assigned to no block";#ifdef GRAPHICS_DISABLEDETEXT_DESC *global_monitor = NULL;#endif#define EXTERNEXTERN BOOL_VAR (textord_show_blobs, FALSE, "Display unsorted blobs");EXTERN BOOL_VAR (textord_show_boxes, FALSE, "Display unsorted blobs");EXTERN BOOL_VAR (textord_new_initial_xheight, TRUE,"Use test xheight mechanism");EXTERN BOOL_VAR (textord_exit_after, FALSE, "Exit after completing textord");EXTERN INT_VAR (textord_max_noise_size, 7, "Pixel size of noise");EXTERN double_VAR (textord_blob_size_bigile, 95,"Percentile for large blobs");EXTERN double_VAR (textord_noise_area_ratio, 0.7,"Fraction of bounding box for noise");EXTERN double_VAR (textord_blob_size_smallile, 20,"Percentile for small blobs");EXTERN double_VAR (textord_initialx_ile, 0.75,"Ile of sizes for xheight guess");EXTERN double_VAR (textord_initialasc_ile, 0.90,"Ile of sizes for xheight guess");EXTERN INT_VAR (textord_noise_sizefraction, 10,"Fraction of size for maxima");EXTERN double_VAR (textord_noise_sizelimit, 0.5,"Fraction of x for big t count");EXTERN INT_VAR (textord_noise_translimit, 16, "Transitions for normal blob");EXTERN double_VAR (textord_noise_normratio, 2.0,"Dot to norm ratio for deletion");EXTERN BOOL_VAR (textord_noise_rejwords, TRUE, "Reject noise-like words");EXTERN BOOL_VAR (textord_noise_rejrows, TRUE, "Reject noise-like rows");EXTERN double_VAR (textord_noise_syfract, 0.2,"xh fract error for norm blobs");EXTERN double_VAR (textord_noise_sxfract, 0.4,"xh fract width error for norm blobs");EXTERN INT_VAR (textord_noise_sncount, 1, "super norm blobs to save row");EXTERN double_VAR (textord_noise_rowratio, 6.0,"Dot to norm ratio for deletion");EXTERN BOOL_VAR (textord_noise_debug, FALSE, "Debug row garbage detector");EXTERN double_VAR (textord_blshift_maxshift, 0.00, "Max baseline shift");EXTERN double_VAR (textord_blshift_xfraction, 9.99,"Min size of baseline shift");EXTERN STRING_EVAR (tessedit_image_ext, ".tif", "Externsion for image file");#ifndef EMBEDDEDEXTERN clock_t previous_cpu;#endifextern BOOL_VAR_H (polygon_tess_approximation, TRUE,"Do tess poly instead of grey scale");#define MAX_NEAREST_DIST  600    //for block skew stats#define MAX_BLOB_TRANSITIONS100  //for nois statsextern IMAGE page_image;         //must be defined somewhereextern BOOL_VAR_H (interactive_mode, TRUE, "Run interactively?");extern /*"C" */ ETEXT_DESC *global_monitor;     //progress monitor/********************************************************************** * read_and_textord * * Read a file of blocks n blobs and textord them. **********************************************************************/void read_and_textord(                       //read .pb file                      const char *filename,  //.pb file                      BLOCK_LIST *blocks) {  int c;                         //input character  FILE *infp;                    //input file  BLOCK *block;                  //current block  BOX page_box;                  //bounding_box  BLOCK_IT block_it = blocks;    //iterator                                 //different orientations  TO_BLOCK_LIST land_blocks, port_blocks;  infp = fopen (filename, "r");  if (infp == NULL)    CANTOPENFILE.error ("read_and_textord", EXIT, filename);  while (((c = fgetc (infp)) != EOF) && (ungetc (c, infp) != EOF)) {                                 //get one    block = BLOCK::de_serialise (infp);                                 //add to list    block_it.add_after_then_move (block);                                 //find page size    page_box += block->bounding_box ();  }  fclose(infp);  assign_blobs_to_blocks2(blocks, &land_blocks, &port_blocks);  filter_blobs (page_box.topright (), &port_blocks, !textord_test_landscape);  filter_blobs (page_box.topright (), &land_blocks, textord_test_landscape);  textord_page (page_box.topright (), blocks, &land_blocks, &port_blocks);}/********************************************************************** * edges_and_textord * * Read a file of blocks n blobs and textord them. **********************************************************************/void edges_and_textord(                       //read .pb file                       const char *filename,  //.pb file                       BLOCK_LIST *blocks) {  BLOCK *block;                  //current block  char *lastdot;                 //of name  STRING name = filename;        //truncated name  ICOORD page_tr;  BOX page_box;                  //bounding_box  PDBLK_CLIST pd_blocks;         //copy of list  BLOCK_IT block_it = blocks;    //iterator  PDBLK_C_IT pd_it = &pd_blocks; //iterator                                 //different orientations  TO_BLOCK_LIST land_blocks, port_blocks;  IMAGE thresh_image;            //thresholded  lastdot = strrchr (name.string (), '.');  if (lastdot != NULL)    *lastdot = '\0';  if (page_image.get_bpp () == 0) {    name += tessedit_image_ext;    if (page_image.read_header (name.string ()))      CANTOPENFILE.error ("edges_and_textord", EXIT, name.string ());    if (page_image.read (0))      READFAILED.error ("edges_and_textord", EXIT, name.string ());    name = filename;    lastdot = strrchr (name.string (), '.');    if (lastdot != NULL)      *lastdot = '\0';  }  page_tr = ICOORD (page_image.get_xsize (), page_image.get_ysize ());  read_pd_file (name, page_image.get_xsize (), page_image.get_ysize (),    blocks);  block_it.set_to_list (blocks);  if (global_monitor != NULL)    global_monitor->ocr_alive = TRUE;  if (page_image.get_bpp () > 1) {    set_global_loc_code(LOC_ADAPTIVE);    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();    block_it.forward ()) {      block = block_it.data ();      pd_it.add_after_then_move (block);    }    //              adaptive_threshold(&page_image,&pd_blocks,&thresh_image);    set_global_loc_code(LOC_EDGE_PROG);#ifndef EMBEDDED    previous_cpu = clock ();#endif    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();    block_it.forward ()) {      block = block_it.data ();      if (!polygon_tess_approximation)        invert_image(&page_image);#ifndef GRAPHICS_DISABLED      extract_edges(NO_WINDOW, &page_image, &thresh_image, page_tr, block);#else      extract_edges(&page_image, &thresh_image, page_tr, block);#endif      page_box += block->bounding_box ();    }    page_image = thresh_image;   //everyone else gets it  }  else {    set_global_loc_code(LOC_EDGE_PROG);    if (!page_image.white_high ())      invert_image(&page_image);#ifndef EMBEDDED    previous_cpu = clock ();#endif    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();    block_it.forward ()) {      block = block_it.data ();#ifndef GRAPHICS_DISABLED      extract_edges(NO_WINDOW, &page_image, &page_image, page_tr, block);#else      extract_edges(&page_image, &page_image, page_tr, block);#endif      page_box += block->bounding_box ();    }  }  if (global_monitor != NULL) {    global_monitor->ocr_alive = TRUE;    global_monitor->progress = 10;  }  assign_blobs_to_blocks2(blocks, &land_blocks, &port_blocks);  if (global_monitor != NULL)    global_monitor->ocr_alive = TRUE;  filter_blobs (page_box.topright (), &land_blocks, textord_test_landscape);#ifndef EMBEDDED  previous_cpu = clock ();#endif  filter_blobs (page_box.topright (), &port_blocks, !textord_test_landscape);  if (global_monitor != NULL)    global_monitor->ocr_alive = TRUE;  textord_page (page_box.topright (), blocks, &land_blocks, &port_blocks);}/********************************************************************** * assign_blobs_to_blocks2 * * Make a list of TO_BLOCKs for portrait and landscape orientation. **********************************************************************/void assign_blobs_to_blocks2(                             //split into groups                             BLOCK_LIST *blocks,          //blocks to process                             TO_BLOCK_LIST *land_blocks,  //rotated for landscape                             TO_BLOCK_LIST *port_blocks   //output list                            ) {  BLOCK *block;                  //current block  BLOBNBOX *newblob;             //created blob  C_BLOB *blob;                  //current blob  BLOCK_IT block_it = blocks;  C_BLOB_IT blob_it;             //iterator  BLOBNBOX_IT port_box_it;       //iterator                                 //destination iterator  TO_BLOCK_IT port_block_it = port_blocks;  TO_BLOCK *port_block;          //created block  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();  block_it.forward ()) {    block = block_it.data ();    blob_it.set_to_list (block->blob_list ());                                 //make one    port_block = new TO_BLOCK (block);                                 //make one    port_box_it.set_to_list (&port_block->blobs);    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();    blob_it.forward ()) {      blob = blob_it.extract ();                                 //convert blob      newblob = new BLOBNBOX (blob);                                 //add to list      port_box_it.add_after_then_move (newblob);                                 //convert blob    }    port_block_it.add_after_then_move (port_block);  }}/********************************************************************** * filter_blobs * * Sort the blobs into sizes in all the blocks for later work. **********************************************************************/void filter_blobs(                        //split into groups                  ICOORD page_tr,         //top right                  TO_BLOCK_LIST *blocks,  //output list                  BOOL8 testing_on        //for plotting                 ) {  TO_BLOCK_IT block_it = blocks; //destination iterator

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -