📄 tordmain.cpp
字号:
/********************************************************************** * File: tordmain.cpp (Formerly textordp.c) * Description: C++ top level textord code. * Author: Ray Smith * Created: Tue Jul 28 17:12:33 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License. * **********************************************************************/#include "mfcpch.h"#ifdef __UNIX__#include <assert.h>#endif#include "stderr.h"#include "globaloc.h"#include "tessout.h"#include "blread.h"#include "blobbox.h"//#include "lmedsq.h"#include "edgblob.h"//#include "adthsh.h"#include "drawtord.h"#include "makerow.h"#include "wordseg.h"#include "ocrclass.h"#include "genblob.h"#include "imgs.h"//#include "bairdskw.h"#include "tordmain.h"#include "secname.h"const ERRCODE BLOCKLESS_BLOBS = "Warning:some blobs assigned to no block";#ifdef GRAPHICS_DISABLEDETEXT_DESC *global_monitor = NULL;#endif#define EXTERNEXTERN BOOL_VAR (textord_show_blobs, FALSE, "Display unsorted blobs");EXTERN BOOL_VAR (textord_show_boxes, FALSE, "Display unsorted blobs");EXTERN BOOL_VAR (textord_new_initial_xheight, TRUE,"Use test xheight mechanism");EXTERN BOOL_VAR (textord_exit_after, FALSE, "Exit after completing textord");EXTERN INT_VAR (textord_max_noise_size, 7, "Pixel size of noise");EXTERN double_VAR (textord_blob_size_bigile, 95,"Percentile for large blobs");EXTERN double_VAR (textord_noise_area_ratio, 0.7,"Fraction of bounding box for noise");EXTERN double_VAR (textord_blob_size_smallile, 20,"Percentile for small blobs");EXTERN double_VAR (textord_initialx_ile, 0.75,"Ile of sizes for xheight guess");EXTERN double_VAR (textord_initialasc_ile, 0.90,"Ile of sizes for xheight guess");EXTERN INT_VAR (textord_noise_sizefraction, 10,"Fraction of size for maxima");EXTERN double_VAR (textord_noise_sizelimit, 0.5,"Fraction of x for big t count");EXTERN INT_VAR (textord_noise_translimit, 16, "Transitions for normal blob");EXTERN double_VAR (textord_noise_normratio, 2.0,"Dot to norm ratio for deletion");EXTERN BOOL_VAR (textord_noise_rejwords, TRUE, "Reject noise-like words");EXTERN BOOL_VAR (textord_noise_rejrows, TRUE, "Reject noise-like rows");EXTERN double_VAR (textord_noise_syfract, 0.2,"xh fract error for norm blobs");EXTERN double_VAR (textord_noise_sxfract, 0.4,"xh fract width error for norm blobs");EXTERN INT_VAR (textord_noise_sncount, 1, "super norm blobs to save row");EXTERN double_VAR (textord_noise_rowratio, 6.0,"Dot to norm ratio for deletion");EXTERN BOOL_VAR (textord_noise_debug, FALSE, "Debug row garbage detector");EXTERN double_VAR (textord_blshift_maxshift, 0.00, "Max baseline shift");EXTERN double_VAR (textord_blshift_xfraction, 9.99,"Min size of baseline shift");EXTERN STRING_EVAR (tessedit_image_ext, ".tif", "Externsion for image file");#ifndef EMBEDDEDEXTERN clock_t previous_cpu;#endifextern BOOL_VAR_H (polygon_tess_approximation, TRUE,"Do tess poly instead of grey scale");#define MAX_NEAREST_DIST 600 //for block skew stats#define MAX_BLOB_TRANSITIONS100 //for nois statsextern IMAGE page_image; //must be defined somewhereextern BOOL_VAR_H (interactive_mode, TRUE, "Run interactively?");extern /*"C" */ ETEXT_DESC *global_monitor; //progress monitor/********************************************************************** * read_and_textord * * Read a file of blocks n blobs and textord them. **********************************************************************/void read_and_textord( //read .pb file const char *filename, //.pb file BLOCK_LIST *blocks) { int c; //input character FILE *infp; //input file BLOCK *block; //current block BOX page_box; //bounding_box BLOCK_IT block_it = blocks; //iterator //different orientations TO_BLOCK_LIST land_blocks, port_blocks; infp = fopen (filename, "r"); if (infp == NULL) CANTOPENFILE.error ("read_and_textord", EXIT, filename); while (((c = fgetc (infp)) != EOF) && (ungetc (c, infp) != EOF)) { //get one block = BLOCK::de_serialise (infp); //add to list block_it.add_after_then_move (block); //find page size page_box += block->bounding_box (); } fclose(infp); assign_blobs_to_blocks2(blocks, &land_blocks, &port_blocks); filter_blobs (page_box.topright (), &port_blocks, !textord_test_landscape); filter_blobs (page_box.topright (), &land_blocks, textord_test_landscape); textord_page (page_box.topright (), blocks, &land_blocks, &port_blocks);}/********************************************************************** * edges_and_textord * * Read a file of blocks n blobs and textord them. **********************************************************************/void edges_and_textord( //read .pb file const char *filename, //.pb file BLOCK_LIST *blocks) { BLOCK *block; //current block char *lastdot; //of name STRING name = filename; //truncated name ICOORD page_tr; BOX page_box; //bounding_box PDBLK_CLIST pd_blocks; //copy of list BLOCK_IT block_it = blocks; //iterator PDBLK_C_IT pd_it = &pd_blocks; //iterator //different orientations TO_BLOCK_LIST land_blocks, port_blocks; IMAGE thresh_image; //thresholded lastdot = strrchr (name.string (), '.'); if (lastdot != NULL) *lastdot = '\0'; if (page_image.get_bpp () == 0) { name += tessedit_image_ext; if (page_image.read_header (name.string ())) CANTOPENFILE.error ("edges_and_textord", EXIT, name.string ()); if (page_image.read (0)) READFAILED.error ("edges_and_textord", EXIT, name.string ()); name = filename; lastdot = strrchr (name.string (), '.'); if (lastdot != NULL) *lastdot = '\0'; } page_tr = ICOORD (page_image.get_xsize (), page_image.get_ysize ()); read_pd_file (name, page_image.get_xsize (), page_image.get_ysize (), blocks); block_it.set_to_list (blocks); if (global_monitor != NULL) global_monitor->ocr_alive = TRUE; if (page_image.get_bpp () > 1) { set_global_loc_code(LOC_ADAPTIVE); for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { block = block_it.data (); pd_it.add_after_then_move (block); } // adaptive_threshold(&page_image,&pd_blocks,&thresh_image); set_global_loc_code(LOC_EDGE_PROG);#ifndef EMBEDDED previous_cpu = clock ();#endif for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { block = block_it.data (); if (!polygon_tess_approximation) invert_image(&page_image);#ifndef GRAPHICS_DISABLED extract_edges(NO_WINDOW, &page_image, &thresh_image, page_tr, block);#else extract_edges(&page_image, &thresh_image, page_tr, block);#endif page_box += block->bounding_box (); } page_image = thresh_image; //everyone else gets it } else { set_global_loc_code(LOC_EDGE_PROG); if (!page_image.white_high ()) invert_image(&page_image);#ifndef EMBEDDED previous_cpu = clock ();#endif for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { block = block_it.data ();#ifndef GRAPHICS_DISABLED extract_edges(NO_WINDOW, &page_image, &page_image, page_tr, block);#else extract_edges(&page_image, &page_image, page_tr, block);#endif page_box += block->bounding_box (); } } if (global_monitor != NULL) { global_monitor->ocr_alive = TRUE; global_monitor->progress = 10; } assign_blobs_to_blocks2(blocks, &land_blocks, &port_blocks); if (global_monitor != NULL) global_monitor->ocr_alive = TRUE; filter_blobs (page_box.topright (), &land_blocks, textord_test_landscape);#ifndef EMBEDDED previous_cpu = clock ();#endif filter_blobs (page_box.topright (), &port_blocks, !textord_test_landscape); if (global_monitor != NULL) global_monitor->ocr_alive = TRUE; textord_page (page_box.topright (), blocks, &land_blocks, &port_blocks);}/********************************************************************** * assign_blobs_to_blocks2 * * Make a list of TO_BLOCKs for portrait and landscape orientation. **********************************************************************/void assign_blobs_to_blocks2( //split into groups BLOCK_LIST *blocks, //blocks to process TO_BLOCK_LIST *land_blocks, //rotated for landscape TO_BLOCK_LIST *port_blocks //output list ) { BLOCK *block; //current block BLOBNBOX *newblob; //created blob C_BLOB *blob; //current blob BLOCK_IT block_it = blocks; C_BLOB_IT blob_it; //iterator BLOBNBOX_IT port_box_it; //iterator //destination iterator TO_BLOCK_IT port_block_it = port_blocks; TO_BLOCK *port_block; //created block for (block_it.mark_cycle_pt (); !block_it.cycled_list (); block_it.forward ()) { block = block_it.data (); blob_it.set_to_list (block->blob_list ()); //make one port_block = new TO_BLOCK (block); //make one port_box_it.set_to_list (&port_block->blobs); for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { blob = blob_it.extract (); //convert blob newblob = new BLOBNBOX (blob); //add to list port_box_it.add_after_then_move (newblob); //convert blob } port_block_it.add_after_then_move (port_block); }}/********************************************************************** * filter_blobs * * Sort the blobs into sizes in all the blocks for later work. **********************************************************************/void filter_blobs( //split into groups ICOORD page_tr, //top right TO_BLOCK_LIST *blocks, //output list BOOL8 testing_on //for plotting ) { TO_BLOCK_IT block_it = blocks; //destination iterator
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -