⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mftraining.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 3 页
字号:
/********************************************************************************	Filename:	mfTraining.c**	Purpose:	Separates training pages into files for each character.**				Strips from files only the features and there parameters of				the feature type mf.**	Author:		Dan Johnson**	Revisment:	Christy Russon**	Environment: HPUX 6.5**	Library:     HPUX 6.5**	History:     Fri Aug 18 08:53:50 1989, DSJ, Created.**		     5/25/90, DSJ, Adapted to multiple feature types.**				Tuesday, May 17, 1998 Changes made to make feature specific and**				simplify structures. First step in simplifying training process.** **	(c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License.******************************************************************************//**----------------------------------------------------------------------------					Include Files and Type Defines----------------------------------------------------------------------------**/#include "oldlist.h"#include "efio.h"#include "emalloc.h"#include "featdefs.h"#include "getopt.h"#include "ocrfeatures.h"#include "general.h"#include "clusttool.h"#include "cluster.h"#include "protos.h"#include "minmax.h"#include "debug.h"#include "const.h"#include "mergenf.h"#include "name2char.h"#include "intproto.h"#include "variables.h"#include "freelist.h"#include <string.h>#include <stdio.h>#include <math.h>#define MAXNAMESIZE	80#define MAX_NUM_SAMPLES	10000#define PROGRAM_FEATURE_TYPE "mf"#define MINSD (1.0f / 128.0f)int	row_number;						/* cjn: fixes link problem */typedef struct{  char		*Label;  LIST		List;}LABELEDLISTNODE, *LABELEDLIST;typedef struct{	char* Label;	int	NumMerged[MAX_NUM_PROTOS];	CLASS_TYPE Class;}MERGE_CLASS_NODE;typedef MERGE_CLASS_NODE* MERGE_CLASS;#define round(x,frag)(floor(x/frag+.5)*frag)/**----------------------------------------------------------------------------					Public Function Prototypes----------------------------------------------------------------------------**/int main (     int	argc,     char	**argv);/**----------------------------------------------------------------------------					Private Function Prototypes----------------------------------------------------------------------------**/void ParseArguments(int	argc,char	**argv);char *GetNextFilename ();LIST ReadTrainingSamples (     FILE	*File);LABELEDLIST FindList (     LIST	List,     char	*Label);MERGE_CLASS FindClass (     LIST	List,     char	*Label);LABELEDLIST NewLabeledList (     char	*Label);MERGE_CLASS NewLabeledClass (     char	*Label);void WriteTrainingSamples (     char	*Directory,     LIST	CharList);void WriteClusteredTrainingSamples (     char	*Directory,     LIST	ProtoList,	 CLUSTERER *Clusterer,	 LABELEDLIST CharSample);/**/void WriteMergedTrainingSamples(    char	*Directory,	LIST ClassList);void WriteMicrofeat(    char	*Directory,	LIST	ClassList);void WriteProtos(	FILE* File,	MERGE_CLASS MergeClass);void WriteConfigs(	FILE* File,	CLASS_TYPE Class);void FreeTrainingSamples (     LIST	CharList);void FreeLabeledClassList (     LIST	ClassList);void FreeLabeledList (     LABELEDLIST	LabeledList);CLUSTERER *SetUpForClustering(     LABELEDLIST	CharSample);/*PARAMDESC *ConvertToPARAMDESC(	PARAM_DESC* Param_Desc,	int N);*/LIST RemoveInsignificantProtos(	LIST ProtoList,	BOOL8 KeepSigProtos,	BOOL8 KeepInsigProtos,	int N);void CleanUpUnusedData(	LIST ProtoList);void Normalize (   float  *Values);void SetUpForFloat2Int(	LIST LabeledClassList);void WritePFFMTable(INT_TEMPLATES Templates, const char* filename) {  FILE* fp = Efopen(filename, "wb");  /* then write out each class */  for (int i = 0; i < NumClassesIn (Templates); i++) {    int MaxLength = 0;    INT_CLASS Class = ClassForIndex (Templates, i);    for (int ConfigId = 0; ConfigId < NumIntConfigsIn (Class); ConfigId++) {      if (LengthForConfigId (Class, ConfigId) > MaxLength)        MaxLength = LengthForConfigId (Class, ConfigId);    }    fprintf(fp, "%c %d\n", ClassIdForIndex(Templates, i), MaxLength);  }  fclose(fp);}//--------------Global Data Definitions and Declarations--------------static char FontName[MAXNAMESIZE];// globals used for parsing command line argumentsstatic char	*Directory = NULL;static int	MaxNumSamples = MAX_NUM_SAMPLES;static int	Argc;static char	**Argv;// globals used to control what information is saved in the output filestatic BOOL8		ShowAllSamples = FALSE;static BOOL8		ShowSignificantProtos = TRUE;static BOOL8		ShowInsignificantProtos = FALSE;// global variable to hold configuration parameters to control clustering// -M 0.40   -B 0.05   -I 1.0   -C 1e-6.static CLUSTERCONFIG Config ={ elliptical, 0.40, 0.05, 1.0, 1e-6 };static FLOAT32 RoundingAccuracy = 0.0;/*----------------------------------------------------------------------------						Public Code-----------------------------------------------------------------------------*//*---------------------------------------------------------------------------*/int main (     int	argc,     char	**argv)/***	Parameters:**		argc	number of command line arguments**		argv	array of command line arguments**	Globals: none**	Operation:**		This program reads in a text file consisting of feature**		samples from a training page in the following format:****			FontName CharName NumberOfFeatureTypes(N)**			   FeatureTypeName1 NumberOfFeatures(M)**			      Feature1**			      ...**			      FeatureM**			   FeatureTypeName2 NumberOfFeatures(M)**			      Feature1**			      ...**			      FeatureM**			   ...**			   FeatureTypeNameN NumberOfFeatures(M)**			      Feature1**			      ...**			      FeatureM**			FontName CharName ...****		The result of this program is a binary inttemp file used by**		the OCR engine.**	Return: none**	Exceptions: none**	History:	Fri Aug 18 08:56:17 1989, DSJ, Created.**				Mon May 18 1998, Christy Russson, Revistion started.*/{	char	*PageName;	FILE	*TrainingPage;	FILE	*OutFile;	LIST	CharList;	CLUSTERER	*Clusterer = NULL;	LIST		ProtoList = NIL;	LABELEDLIST CharSample;	PROTOTYPE	*Prototype;	LIST   	ClassList = NIL;	int		Cid, Pid;	PROTO		Proto;	PROTO_STRUCT	DummyProto;	BIT_VECTOR	Config2;	MERGE_CLASS	MergeClass;	INT_TEMPLATES	IntTemplates;	LIST pCharList, pProtoList;	char Filename[MAXNAMESIZE];	ParseArguments (argc, argv);	InitFastTrainerVars ();	InitSubfeatureVars ();	while ((PageName = GetNextFilename()) != NULL)	{		printf ("\nReading %s ...", PageName);		TrainingPage = Efopen (PageName, "r");		CharList = ReadTrainingSamples (TrainingPage);		fclose (TrainingPage);		//WriteTrainingSamples (Directory, CharList);		pCharList = CharList;		iterate(pCharList)		{			//Cluster			CharSample = (LABELEDLIST) first (pCharList);			printf ("\nClustering %s ...", CharSample->Label);			Clusterer = SetUpForClustering(CharSample);			ProtoList = ClusterSamples(Clusterer, &Config);			//WriteClusteredTrainingSamples (Directory, ProtoList, Clusterer, CharSample);			CleanUpUnusedData(ProtoList);			//Merge			ProtoList = RemoveInsignificantProtos(ProtoList, ShowSignificantProtos,				ShowInsignificantProtos, Clusterer->SampleSize);			FreeClusterer(Clusterer);			MergeClass = FindClass (ClassList, CharSample->Label);			if (MergeClass == NULL)			{				MergeClass = NewLabeledClass (CharSample->Label);				ClassList = push (ClassList, MergeClass);			}			Cid = AddConfigToClass(MergeClass->Class);			pProtoList = ProtoList;			iterate (pProtoList)			{				Prototype = (PROTOTYPE *) first (pProtoList);				// see if proto can be approximated by existing proto				Pid = FindClosestExistingProto (MergeClass->Class, MergeClass->NumMerged, Prototype);				if (Pid == NO_PROTO)				{					Pid = AddProtoToClass (MergeClass->Class);					Proto = ProtoIn (MergeClass->Class, Pid);					MakeNewFromOld (Proto, Prototype);					MergeClass->NumMerged[Pid] = 1;				}				else				{					MakeNewFromOld (&DummyProto, Prototype);					ComputeMergedProto (ProtoIn (MergeClass->Class, Pid), &DummyProto,						(FLOAT32) MergeClass->NumMerged[Pid], 1.0,						ProtoIn (MergeClass->Class, Pid));					MergeClass->NumMerged[Pid] ++;				}				Config2 = ConfigIn (MergeClass->Class, Cid);				AddProtoToConfig (Pid, Config2);			}			FreeProtoList (&ProtoList);		}		FreeTrainingSamples (CharList);		printf ("\n");	}	//WriteMergedTrainingSamples(Directory,ClassList);	WriteMicrofeat(Directory, ClassList);	InitIntProtoVars ();	InitPrototypes ();	SetUpForFloat2Int(ClassList);	IntTemplates = CreateIntTemplates(TrainingData);	strcpy (Filename, "");	if (Directory != NULL)	{		strcat (Filename, Directory);		strcat (Filename, "/");	}	strcat (Filename, "inttemp");#ifdef __UNIX__	OutFile = Efopen (Filename, "w");#else	OutFile = Efopen (Filename, "wb");#endif	WriteIntTemplates(OutFile, IntTemplates);	fclose (OutFile);  // Now create pffmtable.  WritePFFMTable(IntTemplates, "pffmtable");	printf ("\nDone!\n"); /**/	FreeLabeledClassList (ClassList);  return 0;}	/* main *//**----------------------------------------------------------------------------							Private Code----------------------------------------------------------------------------**//*---------------------------------------------------------------------------*/void ParseArguments(int	argc,char	**argv)/***	Parameters:**		argc	number of command line arguments to parse**		argv	command line arguments**	Globals:**		ShowAllSamples		flag controlling samples display**		ShowSignificantProtos	flag controlling proto display**		ShowInsignificantProtos	flag controlling proto display**		Config			current clustering parameters**		optarg, optind		defined by getopt sys call**		Argc, Argv		global copies of argc and argv**	Operation:**		This routine parses the command line arguments that were**		passed to the program.  The legal arguments are:**			-d		"turn off display of samples"**			-p		"turn off significant protos"**			-n		"turn off insignificant proto"**			-S [ spherical | elliptical | mixed | automatic ]**			-M MinSamples	"min samples per prototype (%)"**			-B MaxIllegal	"max illegal chars per cluster (%)"**			-I Independence	"0 to 1"**			-C Confidence	"1e-200 to 1.0"**			-D Directory**			-N MaxNumSamples**			-R RoundingAccuracy**	Return: none**	Exceptions: Illegal options terminate the program.**	History: 7/24/89, DSJ, Created.*/{	int		Option;	int		ParametersRead;	BOOL8		Error;	extern char	*optarg;	Error = FALSE;	Argc = argc;	Argv = argv;	while (( Option = getopt( argc, argv, "R:N:D:C:I:M:B:S:d:n:p" )) != EOF )	{		switch ( Option )		{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -