📄 mftraining.cpp
字号:
/******************************************************************************** Filename: mfTraining.c** Purpose: Separates training pages into files for each character.** Strips from files only the features and there parameters of the feature type mf.** Author: Dan Johnson** Revisment: Christy Russon** Environment: HPUX 6.5** Library: HPUX 6.5** History: Fri Aug 18 08:53:50 1989, DSJ, Created.** 5/25/90, DSJ, Adapted to multiple feature types.** Tuesday, May 17, 1998 Changes made to make feature specific and** simplify structures. First step in simplifying training process.** ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License.******************************************************************************//**---------------------------------------------------------------------------- Include Files and Type Defines----------------------------------------------------------------------------**/#include "oldlist.h"#include "efio.h"#include "emalloc.h"#include "featdefs.h"#include "getopt.h"#include "ocrfeatures.h"#include "general.h"#include "clusttool.h"#include "cluster.h"#include "protos.h"#include "minmax.h"#include "debug.h"#include "const.h"#include "mergenf.h"#include "name2char.h"#include "intproto.h"#include "variables.h"#include "freelist.h"#include <string.h>#include <stdio.h>#include <math.h>#define MAXNAMESIZE 80#define MAX_NUM_SAMPLES 10000#define PROGRAM_FEATURE_TYPE "mf"#define MINSD (1.0f / 128.0f)int row_number; /* cjn: fixes link problem */typedef struct{ char *Label; LIST List;}LABELEDLISTNODE, *LABELEDLIST;typedef struct{ char* Label; int NumMerged[MAX_NUM_PROTOS]; CLASS_TYPE Class;}MERGE_CLASS_NODE;typedef MERGE_CLASS_NODE* MERGE_CLASS;#define round(x,frag)(floor(x/frag+.5)*frag)/**---------------------------------------------------------------------------- Public Function Prototypes----------------------------------------------------------------------------**/int main ( int argc, char **argv);/**---------------------------------------------------------------------------- Private Function Prototypes----------------------------------------------------------------------------**/void ParseArguments(int argc,char **argv);char *GetNextFilename ();LIST ReadTrainingSamples ( FILE *File);LABELEDLIST FindList ( LIST List, char *Label);MERGE_CLASS FindClass ( LIST List, char *Label);LABELEDLIST NewLabeledList ( char *Label);MERGE_CLASS NewLabeledClass ( char *Label);void WriteTrainingSamples ( char *Directory, LIST CharList);void WriteClusteredTrainingSamples ( char *Directory, LIST ProtoList, CLUSTERER *Clusterer, LABELEDLIST CharSample);/**/void WriteMergedTrainingSamples( char *Directory, LIST ClassList);void WriteMicrofeat( char *Directory, LIST ClassList);void WriteProtos( FILE* File, MERGE_CLASS MergeClass);void WriteConfigs( FILE* File, CLASS_TYPE Class);void FreeTrainingSamples ( LIST CharList);void FreeLabeledClassList ( LIST ClassList);void FreeLabeledList ( LABELEDLIST LabeledList);CLUSTERER *SetUpForClustering( LABELEDLIST CharSample);/*PARAMDESC *ConvertToPARAMDESC( PARAM_DESC* Param_Desc, int N);*/LIST RemoveInsignificantProtos( LIST ProtoList, BOOL8 KeepSigProtos, BOOL8 KeepInsigProtos, int N);void CleanUpUnusedData( LIST ProtoList);void Normalize ( float *Values);void SetUpForFloat2Int( LIST LabeledClassList);void WritePFFMTable(INT_TEMPLATES Templates, const char* filename) { FILE* fp = Efopen(filename, "wb"); /* then write out each class */ for (int i = 0; i < NumClassesIn (Templates); i++) { int MaxLength = 0; INT_CLASS Class = ClassForIndex (Templates, i); for (int ConfigId = 0; ConfigId < NumIntConfigsIn (Class); ConfigId++) { if (LengthForConfigId (Class, ConfigId) > MaxLength) MaxLength = LengthForConfigId (Class, ConfigId); } fprintf(fp, "%c %d\n", ClassIdForIndex(Templates, i), MaxLength); } fclose(fp);}//--------------Global Data Definitions and Declarations--------------static char FontName[MAXNAMESIZE];// globals used for parsing command line argumentsstatic char *Directory = NULL;static int MaxNumSamples = MAX_NUM_SAMPLES;static int Argc;static char **Argv;// globals used to control what information is saved in the output filestatic BOOL8 ShowAllSamples = FALSE;static BOOL8 ShowSignificantProtos = TRUE;static BOOL8 ShowInsignificantProtos = FALSE;// global variable to hold configuration parameters to control clustering// -M 0.40 -B 0.05 -I 1.0 -C 1e-6.static CLUSTERCONFIG Config ={ elliptical, 0.40, 0.05, 1.0, 1e-6 };static FLOAT32 RoundingAccuracy = 0.0;/*---------------------------------------------------------------------------- Public Code-----------------------------------------------------------------------------*//*---------------------------------------------------------------------------*/int main ( int argc, char **argv)/*** Parameters:** argc number of command line arguments** argv array of command line arguments** Globals: none** Operation:** This program reads in a text file consisting of feature** samples from a training page in the following format:**** FontName CharName NumberOfFeatureTypes(N)** FeatureTypeName1 NumberOfFeatures(M)** Feature1** ...** FeatureM** FeatureTypeName2 NumberOfFeatures(M)** Feature1** ...** FeatureM** ...** FeatureTypeNameN NumberOfFeatures(M)** Feature1** ...** FeatureM** FontName CharName ...**** The result of this program is a binary inttemp file used by** the OCR engine.** Return: none** Exceptions: none** History: Fri Aug 18 08:56:17 1989, DSJ, Created.** Mon May 18 1998, Christy Russson, Revistion started.*/{ char *PageName; FILE *TrainingPage; FILE *OutFile; LIST CharList; CLUSTERER *Clusterer = NULL; LIST ProtoList = NIL; LABELEDLIST CharSample; PROTOTYPE *Prototype; LIST ClassList = NIL; int Cid, Pid; PROTO Proto; PROTO_STRUCT DummyProto; BIT_VECTOR Config2; MERGE_CLASS MergeClass; INT_TEMPLATES IntTemplates; LIST pCharList, pProtoList; char Filename[MAXNAMESIZE]; ParseArguments (argc, argv); InitFastTrainerVars (); InitSubfeatureVars (); while ((PageName = GetNextFilename()) != NULL) { printf ("\nReading %s ...", PageName); TrainingPage = Efopen (PageName, "r"); CharList = ReadTrainingSamples (TrainingPage); fclose (TrainingPage); //WriteTrainingSamples (Directory, CharList); pCharList = CharList; iterate(pCharList) { //Cluster CharSample = (LABELEDLIST) first (pCharList); printf ("\nClustering %s ...", CharSample->Label); Clusterer = SetUpForClustering(CharSample); ProtoList = ClusterSamples(Clusterer, &Config); //WriteClusteredTrainingSamples (Directory, ProtoList, Clusterer, CharSample); CleanUpUnusedData(ProtoList); //Merge ProtoList = RemoveInsignificantProtos(ProtoList, ShowSignificantProtos, ShowInsignificantProtos, Clusterer->SampleSize); FreeClusterer(Clusterer); MergeClass = FindClass (ClassList, CharSample->Label); if (MergeClass == NULL) { MergeClass = NewLabeledClass (CharSample->Label); ClassList = push (ClassList, MergeClass); } Cid = AddConfigToClass(MergeClass->Class); pProtoList = ProtoList; iterate (pProtoList) { Prototype = (PROTOTYPE *) first (pProtoList); // see if proto can be approximated by existing proto Pid = FindClosestExistingProto (MergeClass->Class, MergeClass->NumMerged, Prototype); if (Pid == NO_PROTO) { Pid = AddProtoToClass (MergeClass->Class); Proto = ProtoIn (MergeClass->Class, Pid); MakeNewFromOld (Proto, Prototype); MergeClass->NumMerged[Pid] = 1; } else { MakeNewFromOld (&DummyProto, Prototype); ComputeMergedProto (ProtoIn (MergeClass->Class, Pid), &DummyProto, (FLOAT32) MergeClass->NumMerged[Pid], 1.0, ProtoIn (MergeClass->Class, Pid)); MergeClass->NumMerged[Pid] ++; } Config2 = ConfigIn (MergeClass->Class, Cid); AddProtoToConfig (Pid, Config2); } FreeProtoList (&ProtoList); } FreeTrainingSamples (CharList); printf ("\n"); } //WriteMergedTrainingSamples(Directory,ClassList); WriteMicrofeat(Directory, ClassList); InitIntProtoVars (); InitPrototypes (); SetUpForFloat2Int(ClassList); IntTemplates = CreateIntTemplates(TrainingData); strcpy (Filename, ""); if (Directory != NULL) { strcat (Filename, Directory); strcat (Filename, "/"); } strcat (Filename, "inttemp");#ifdef __UNIX__ OutFile = Efopen (Filename, "w");#else OutFile = Efopen (Filename, "wb");#endif WriteIntTemplates(OutFile, IntTemplates); fclose (OutFile); // Now create pffmtable. WritePFFMTable(IntTemplates, "pffmtable"); printf ("\nDone!\n"); /**/ FreeLabeledClassList (ClassList); return 0;} /* main *//**---------------------------------------------------------------------------- Private Code----------------------------------------------------------------------------**//*---------------------------------------------------------------------------*/void ParseArguments(int argc,char **argv)/*** Parameters:** argc number of command line arguments to parse** argv command line arguments** Globals:** ShowAllSamples flag controlling samples display** ShowSignificantProtos flag controlling proto display** ShowInsignificantProtos flag controlling proto display** Config current clustering parameters** optarg, optind defined by getopt sys call** Argc, Argv global copies of argc and argv** Operation:** This routine parses the command line arguments that were** passed to the program. The legal arguments are:** -d "turn off display of samples"** -p "turn off significant protos"** -n "turn off insignificant proto"** -S [ spherical | elliptical | mixed | automatic ]** -M MinSamples "min samples per prototype (%)"** -B MaxIllegal "max illegal chars per cluster (%)"** -I Independence "0 to 1"** -C Confidence "1e-200 to 1.0"** -D Directory** -N MaxNumSamples** -R RoundingAccuracy** Return: none** Exceptions: Illegal options terminate the program.** History: 7/24/89, DSJ, Created.*/{ int Option; int ParametersRead; BOOL8 Error; extern char *optarg; Error = FALSE; Argc = argc; Argv = argv; while (( Option = getopt( argc, argv, "R:N:D:C:I:M:B:S:d:n:p" )) != EOF ) { switch ( Option ) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -