⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cntraining.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/********************************************************************************	Filename:	cnTraining.cpp**	Purpose:	Generates a normproto and pffmtable.**	Author:		Dan Johnson**	Revisment:	Christy Russon**	History:     Fri Aug 18 08:53:50 1989, DSJ, Created.**		     5/25/90, DSJ, Adapted to multiple feature types.**				Tuesday, May 17, 1998 Changes made to make feature specific and**				simplify structures. First step in simplifying training process.** **	(c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License.******************************************************************************//**----------------------------------------------------------------------------					Include Files and Type Defines----------------------------------------------------------------------------**/#include "oldlist.h"#include "efio.h"#include "emalloc.h"#include "featdefs.h"#include "getopt.h"#include "ocrfeatures.h"#include "general.h"#include "clusttool.h"#include "cluster.h"#include "name2char.h"#include <string.h>#include <stdio.h>#include <math.h>#define MAXNAMESIZE	80#define MAX_NUM_SAMPLES	10000#define PROGRAM_FEATURE_TYPE "cn"#define MINSD (1.0f / 64.0f)int	row_number;						/* cjn: fixes link problem */typedef struct{  char		*Label;  LIST		List;}LABELEDLISTNODE, *LABELEDLIST;#define round(x,frag)(floor(x/frag+.5)*frag)/**----------------------------------------------------------------------------					Public Function Prototypes----------------------------------------------------------------------------**/int main (     int	argc,     char	**argv);/**----------------------------------------------------------------------------					Private Function Prototypes----------------------------------------------------------------------------**/void ParseArguments(     int	argc,     char	**argv);char *GetNextFilename ();void ReadTrainingSamples (     FILE	*File,	 LIST* TrainingSamples);LABELEDLIST FindList (     LIST	List,     char	*Label);LABELEDLIST NewLabeledList (     char	*Label);void WriteTrainingSamples (     char	*Directory,     LIST	CharList);void WriteNormProtos (     char	*Directory,     LIST	LabeledProtoList,	 CLUSTERER *Clusterer);void FreeTrainingSamples (     LIST	CharList);void FreeNormProtoList (     LIST	CharList);void FreeLabeledList (     LABELEDLIST	LabeledList);CLUSTERER *SetUpForClustering(     LABELEDLIST	CharSample);/*PARAMDESC *ConvertToPARAMDESC(	PARAM_DESC* Param_Desc,	int N);*/void AddToNormProtosList(	LIST* NormProtoList,	LIST ProtoList,	char* CharName);void WriteProtos(     FILE	*File,     UINT16	N,     LIST	ProtoList,     BOOL8	WriteSigProtos,     BOOL8	WriteInsigProtos);int NumberOfProtos(	LIST ProtoList,    BOOL8	CountSigProtos,    BOOL8	CountInsigProtos);/**----------------------------------------------------------------------------		  		Global Data Definitions and Declarations----------------------------------------------------------------------------**/static char FontName[MAXNAMESIZE];/* globals used for parsing command line arguments */static char	*Directory = NULL;static int	MaxNumSamples = MAX_NUM_SAMPLES;static int	Argc;static char	**Argv;/* globals used to control what information is saved in the output file */static BOOL8		ShowAllSamples = FALSE;static BOOL8		ShowSignificantProtos = TRUE;static BOOL8		ShowInsignificantProtos = FALSE;/* global variable to hold configuration parameters to control clustering *///-M 0.025   -B 0.05   -I 0.8   -C 1e-3static CLUSTERCONFIG	Config ={  elliptical, 0.025, 0.05, 0.8, 1e-3};static FLOAT32 RoundingAccuracy = 0.0;/**----------------------------------------------------------------------------							Public Code----------------------------------------------------------------------------**//*---------------------------------------------------------------------------*/int main (     int	argc,     char	**argv)/***	Parameters:**		argc	number of command line arguments**		argv	array of command line arguments**	Globals: none**	Operation:**		This program reads in a text file consisting of feature**		samples from a training page in the following format:****			FontName CharName NumberOfFeatureTypes(N)**			   FeatureTypeName1 NumberOfFeatures(M)**			      Feature1**			      ...**			      FeatureM**			   FeatureTypeName2 NumberOfFeatures(M)**			      Feature1**			      ...**			      FeatureM**			   ...**			   FeatureTypeNameN NumberOfFeatures(M)**			      Feature1**			      ...**			      FeatureM**			FontName CharName ...****		It then appends these samples into a separate file for each**		character.  The name of the file is****			DirectoryName/FontName/CharName.FeatureTypeName****		The DirectoryName can be specified via a command**		line argument.  If not specified, it defaults to the**		current directory.  The format of the resulting files is:****			NumberOfFeatures(M)**			   Feature1**			   ...**			   FeatureM**			NumberOfFeatures(M)**			...****		The output files each have a header which describes the**		type of feature which the file contains.  This header is**		in the format required by the clusterer.  A command line**		argument can also be used to specify that only the first**		N samples of each class should be used.**	Return: none**	Exceptions: none**	History: Fri Aug 18 08:56:17 1989, DSJ, Created.*/{	char	*PageName;	FILE	*TrainingPage;	LIST	CharList = NIL;	CLUSTERER	*Clusterer = NULL;	LIST		ProtoList = NIL;	LIST		NormProtoList = NIL;	LIST pCharList;	LABELEDLIST CharSample;	ParseArguments (argc, argv);	while ((PageName = GetNextFilename()) != NULL)	{		printf ("\nReading %s ...", PageName);		TrainingPage = Efopen (PageName, "r");		ReadTrainingSamples (TrainingPage, &CharList);		fclose (TrainingPage);		//WriteTrainingSamples (Directory, CharList);	}	pCharList = CharList;	iterate(pCharList)	{		//Cluster		CharSample = (LABELEDLIST) first (pCharList);		printf ("\nClustering %s ...", CharSample->Label);		Clusterer = SetUpForClustering(CharSample);		ProtoList = ClusterSamples(Clusterer, &Config);		AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label);	}	FreeTrainingSamples (CharList);	WriteNormProtos (Directory, NormProtoList, Clusterer);	FreeClusterer(Clusterer);	FreeProtoList(&ProtoList);	FreeNormProtoList(NormProtoList);	printf ("\n");  return 0;}	// main/**----------------------------------------------------------------------------							Private Code----------------------------------------------------------------------------**//*---------------------------------------------------------------------------*/void ParseArguments(     int	argc,     char	**argv)/***	Parameters:**		argc	number of command line arguments to parse**		argv	command line arguments**	Globals:**		ShowAllSamples		flag controlling samples display**		ShowSignificantProtos	flag controlling proto display**		ShowInsignificantProtos	flag controlling proto display**		Config			current clustering parameters**		optarg, optind		defined by getopt sys call**		Argc, Argv		global copies of argc and argv**	Operation:**		This routine parses the command line arguments that were**		passed to the program.  The legal arguments are:**			-d		"turn off display of samples"**			-p		"turn off significant protos"**			-n		"turn off insignificant proto"**			-S [ spherical | elliptical | mixed | automatic ]**			-M MinSamples	"min samples per prototype (%)"**			-B MaxIllegal	"max illegal chars per cluster (%)"**			-I Independence	"0 to 1"**			-C Confidence	"1e-200 to 1.0"**			-D Directory**			-N MaxNumSamples**			-R RoundingAccuracy**	Return: none**	Exceptions: Illegal options terminate the program.**	History: 7/24/89, DSJ, Created.*/{	int		Option;	int		ParametersRead;	BOOL8		Error;	extern char	*optarg;	Error = FALSE;	Argc = argc;	Argv = argv;	while (( Option = getopt( argc, argv, "R:N:D:C:I:M:B:S:d:n:p" )) != EOF )    {		switch ( Option )		{		case 'n':      sscanf(optarg,"%d", &ParametersRead);			ShowInsignificantProtos = ParametersRead;			break;		case 'p':      sscanf(optarg,"%d", &ParametersRead);			ShowSignificantProtos = ParametersRead;			break;		case 'd':			ShowAllSamples = FALSE;			break;		case 'C':			ParametersRead = sscanf( optarg, "%lf", &(Config.Confidence) );			if ( ParametersRead != 1 ) Error = TRUE;			else if ( Config.Confidence > 1 ) Config.Confidence = 1;			else if ( Config.Confidence < 0 ) Config.Confidence = 0;			break;		case 'I':			ParametersRead = sscanf( optarg, "%f", &(Config.Independence) );			if ( ParametersRead != 1 ) Error = TRUE;			else if ( Config.Independence > 1 ) Config.Independence = 1;			else if ( Config.Independence < 0 ) Config.Independence = 0;			break;		case 'M':			ParametersRead = sscanf( optarg, "%f", &(Config.MinSamples) );			if ( ParametersRead != 1 ) Error = TRUE;			else if ( Config.MinSamples > 1 ) Config.MinSamples = 1;			else if ( Config.MinSamples < 0 ) Config.MinSamples = 0;			break;		case 'B':			ParametersRead = sscanf( optarg, "%f", &(Config.MaxIllegal) );			if ( ParametersRead != 1 ) Error = TRUE;			else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1;			else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0;			break;		case 'R':			ParametersRead = sscanf( optarg, "%f", &RoundingAccuracy );			if ( ParametersRead != 1 ) Error = TRUE;			else if ( RoundingAccuracy > 0.01 ) RoundingAccuracy = 0.01;			else if ( RoundingAccuracy < 0.0 ) RoundingAccuracy = 0.0;			break;		case 'S':			switch ( optarg[0] )			{			case 's': Config.ProtoStyle = spherical; break;			case 'e': Config.ProtoStyle = elliptical; break;			case 'm': Config.ProtoStyle = mixed; break;			case 'a': Config.ProtoStyle = automatic; break;			default: Error = TRUE;			}			break;			case 'D':				Directory = optarg;				break;			case 'N':				if (sscanf (optarg, "%d", &MaxNumSamples) != 1 ||					MaxNumSamples <= 0)					Error = TRUE;				break;			case '?':				Error = TRUE;				break;		}		if ( Error )		{			fprintf (stderr, "usage: %s [-D] [-P] [-N]\n", argv[0] );			fprintf (stderr, "\t[-S ProtoStyle]\n");			fprintf (stderr, "\t[-M MinSamples] [-B MaxBad] [-I Independence] [-C Confidence]\n" );			fprintf (stderr, "\t[-d directory] [-n MaxNumSamples] [ TrainingPage ... ]\n");			exit (2);		}    }}	/* ParseArguments *//*---------------------------------------------------------------------------*/char *GetNextFilename ()/***	Parameters: none**	Globals:**		optind			defined by getopt sys call**		Argc, Argv		global copies of argc and argv**	Operation:**		This routine returns the next command line argument.  If**		there are no remaining command line arguments, it returns**		NULL.  This routine should only be called after all option**		arguments have been parsed and removed with ParseArguments.**	Return: Next command line argument or NULL.**	Exceptions: none**	History: Fri Aug 18 09:34:12 1989, DSJ, Created.*/{	if (optind < Argc)		return (Argv [optind++]);	else		return (NULL);}	/* GetNextFilename *//*---------------------------------------------------------------------------*/void ReadTrainingSamples (     FILE	*File,	 LIST* TrainingSamples)/***	Parameters:**		File		open text file to read samples from**	Globals: none**	Operation:**		This routine reads training samples from a file and**		places them into a data structure which organizes the**		samples by FontName and CharName.  It then returns this**		data structure.**	Return: none**	Exceptions: none**	History: Fri Aug 18 13:11:39 1989, DSJ, Created.**			 Tue May 17 1998 simplifications to structure, illiminated**				font, and feature specification levels of structure.*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -