📄 cntraining.cpp
字号:
/******************************************************************************** Filename: cnTraining.cpp** Purpose: Generates a normproto and pffmtable.** Author: Dan Johnson** Revisment: Christy Russon** History: Fri Aug 18 08:53:50 1989, DSJ, Created.** 5/25/90, DSJ, Adapted to multiple feature types.** Tuesday, May 17, 1998 Changes made to make feature specific and** simplify structures. First step in simplifying training process.** ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License.******************************************************************************//**---------------------------------------------------------------------------- Include Files and Type Defines----------------------------------------------------------------------------**/#include "oldlist.h"#include "efio.h"#include "emalloc.h"#include "featdefs.h"#include "getopt.h"#include "ocrfeatures.h"#include "general.h"#include "clusttool.h"#include "cluster.h"#include "name2char.h"#include <string.h>#include <stdio.h>#include <math.h>#define MAXNAMESIZE 80#define MAX_NUM_SAMPLES 10000#define PROGRAM_FEATURE_TYPE "cn"#define MINSD (1.0f / 64.0f)int row_number; /* cjn: fixes link problem */typedef struct{ char *Label; LIST List;}LABELEDLISTNODE, *LABELEDLIST;#define round(x,frag)(floor(x/frag+.5)*frag)/**---------------------------------------------------------------------------- Public Function Prototypes----------------------------------------------------------------------------**/int main ( int argc, char **argv);/**---------------------------------------------------------------------------- Private Function Prototypes----------------------------------------------------------------------------**/void ParseArguments( int argc, char **argv);char *GetNextFilename ();void ReadTrainingSamples ( FILE *File, LIST* TrainingSamples);LABELEDLIST FindList ( LIST List, char *Label);LABELEDLIST NewLabeledList ( char *Label);void WriteTrainingSamples ( char *Directory, LIST CharList);void WriteNormProtos ( char *Directory, LIST LabeledProtoList, CLUSTERER *Clusterer);void FreeTrainingSamples ( LIST CharList);void FreeNormProtoList ( LIST CharList);void FreeLabeledList ( LABELEDLIST LabeledList);CLUSTERER *SetUpForClustering( LABELEDLIST CharSample);/*PARAMDESC *ConvertToPARAMDESC( PARAM_DESC* Param_Desc, int N);*/void AddToNormProtosList( LIST* NormProtoList, LIST ProtoList, char* CharName);void WriteProtos( FILE *File, UINT16 N, LIST ProtoList, BOOL8 WriteSigProtos, BOOL8 WriteInsigProtos);int NumberOfProtos( LIST ProtoList, BOOL8 CountSigProtos, BOOL8 CountInsigProtos);/**---------------------------------------------------------------------------- Global Data Definitions and Declarations----------------------------------------------------------------------------**/static char FontName[MAXNAMESIZE];/* globals used for parsing command line arguments */static char *Directory = NULL;static int MaxNumSamples = MAX_NUM_SAMPLES;static int Argc;static char **Argv;/* globals used to control what information is saved in the output file */static BOOL8 ShowAllSamples = FALSE;static BOOL8 ShowSignificantProtos = TRUE;static BOOL8 ShowInsignificantProtos = FALSE;/* global variable to hold configuration parameters to control clustering *///-M 0.025 -B 0.05 -I 0.8 -C 1e-3static CLUSTERCONFIG Config ={ elliptical, 0.025, 0.05, 0.8, 1e-3};static FLOAT32 RoundingAccuracy = 0.0;/**---------------------------------------------------------------------------- Public Code----------------------------------------------------------------------------**//*---------------------------------------------------------------------------*/int main ( int argc, char **argv)/*** Parameters:** argc number of command line arguments** argv array of command line arguments** Globals: none** Operation:** This program reads in a text file consisting of feature** samples from a training page in the following format:**** FontName CharName NumberOfFeatureTypes(N)** FeatureTypeName1 NumberOfFeatures(M)** Feature1** ...** FeatureM** FeatureTypeName2 NumberOfFeatures(M)** Feature1** ...** FeatureM** ...** FeatureTypeNameN NumberOfFeatures(M)** Feature1** ...** FeatureM** FontName CharName ...**** It then appends these samples into a separate file for each** character. The name of the file is**** DirectoryName/FontName/CharName.FeatureTypeName**** The DirectoryName can be specified via a command** line argument. If not specified, it defaults to the** current directory. The format of the resulting files is:**** NumberOfFeatures(M)** Feature1** ...** FeatureM** NumberOfFeatures(M)** ...**** The output files each have a header which describes the** type of feature which the file contains. This header is** in the format required by the clusterer. A command line** argument can also be used to specify that only the first** N samples of each class should be used.** Return: none** Exceptions: none** History: Fri Aug 18 08:56:17 1989, DSJ, Created.*/{ char *PageName; FILE *TrainingPage; LIST CharList = NIL; CLUSTERER *Clusterer = NULL; LIST ProtoList = NIL; LIST NormProtoList = NIL; LIST pCharList; LABELEDLIST CharSample; ParseArguments (argc, argv); while ((PageName = GetNextFilename()) != NULL) { printf ("\nReading %s ...", PageName); TrainingPage = Efopen (PageName, "r"); ReadTrainingSamples (TrainingPage, &CharList); fclose (TrainingPage); //WriteTrainingSamples (Directory, CharList); } pCharList = CharList; iterate(pCharList) { //Cluster CharSample = (LABELEDLIST) first (pCharList); printf ("\nClustering %s ...", CharSample->Label); Clusterer = SetUpForClustering(CharSample); ProtoList = ClusterSamples(Clusterer, &Config); AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label); } FreeTrainingSamples (CharList); WriteNormProtos (Directory, NormProtoList, Clusterer); FreeClusterer(Clusterer); FreeProtoList(&ProtoList); FreeNormProtoList(NormProtoList); printf ("\n"); return 0;} // main/**---------------------------------------------------------------------------- Private Code----------------------------------------------------------------------------**//*---------------------------------------------------------------------------*/void ParseArguments( int argc, char **argv)/*** Parameters:** argc number of command line arguments to parse** argv command line arguments** Globals:** ShowAllSamples flag controlling samples display** ShowSignificantProtos flag controlling proto display** ShowInsignificantProtos flag controlling proto display** Config current clustering parameters** optarg, optind defined by getopt sys call** Argc, Argv global copies of argc and argv** Operation:** This routine parses the command line arguments that were** passed to the program. The legal arguments are:** -d "turn off display of samples"** -p "turn off significant protos"** -n "turn off insignificant proto"** -S [ spherical | elliptical | mixed | automatic ]** -M MinSamples "min samples per prototype (%)"** -B MaxIllegal "max illegal chars per cluster (%)"** -I Independence "0 to 1"** -C Confidence "1e-200 to 1.0"** -D Directory** -N MaxNumSamples** -R RoundingAccuracy** Return: none** Exceptions: Illegal options terminate the program.** History: 7/24/89, DSJ, Created.*/{ int Option; int ParametersRead; BOOL8 Error; extern char *optarg; Error = FALSE; Argc = argc; Argv = argv; while (( Option = getopt( argc, argv, "R:N:D:C:I:M:B:S:d:n:p" )) != EOF ) { switch ( Option ) { case 'n': sscanf(optarg,"%d", &ParametersRead); ShowInsignificantProtos = ParametersRead; break; case 'p': sscanf(optarg,"%d", &ParametersRead); ShowSignificantProtos = ParametersRead; break; case 'd': ShowAllSamples = FALSE; break; case 'C': ParametersRead = sscanf( optarg, "%lf", &(Config.Confidence) ); if ( ParametersRead != 1 ) Error = TRUE; else if ( Config.Confidence > 1 ) Config.Confidence = 1; else if ( Config.Confidence < 0 ) Config.Confidence = 0; break; case 'I': ParametersRead = sscanf( optarg, "%f", &(Config.Independence) ); if ( ParametersRead != 1 ) Error = TRUE; else if ( Config.Independence > 1 ) Config.Independence = 1; else if ( Config.Independence < 0 ) Config.Independence = 0; break; case 'M': ParametersRead = sscanf( optarg, "%f", &(Config.MinSamples) ); if ( ParametersRead != 1 ) Error = TRUE; else if ( Config.MinSamples > 1 ) Config.MinSamples = 1; else if ( Config.MinSamples < 0 ) Config.MinSamples = 0; break; case 'B': ParametersRead = sscanf( optarg, "%f", &(Config.MaxIllegal) ); if ( ParametersRead != 1 ) Error = TRUE; else if ( Config.MaxIllegal > 1 ) Config.MaxIllegal = 1; else if ( Config.MaxIllegal < 0 ) Config.MaxIllegal = 0; break; case 'R': ParametersRead = sscanf( optarg, "%f", &RoundingAccuracy ); if ( ParametersRead != 1 ) Error = TRUE; else if ( RoundingAccuracy > 0.01 ) RoundingAccuracy = 0.01; else if ( RoundingAccuracy < 0.0 ) RoundingAccuracy = 0.0; break; case 'S': switch ( optarg[0] ) { case 's': Config.ProtoStyle = spherical; break; case 'e': Config.ProtoStyle = elliptical; break; case 'm': Config.ProtoStyle = mixed; break; case 'a': Config.ProtoStyle = automatic; break; default: Error = TRUE; } break; case 'D': Directory = optarg; break; case 'N': if (sscanf (optarg, "%d", &MaxNumSamples) != 1 || MaxNumSamples <= 0) Error = TRUE; break; case '?': Error = TRUE; break; } if ( Error ) { fprintf (stderr, "usage: %s [-D] [-P] [-N]\n", argv[0] ); fprintf (stderr, "\t[-S ProtoStyle]\n"); fprintf (stderr, "\t[-M MinSamples] [-B MaxBad] [-I Independence] [-C Confidence]\n" ); fprintf (stderr, "\t[-d directory] [-n MaxNumSamples] [ TrainingPage ... ]\n"); exit (2); } }} /* ParseArguments *//*---------------------------------------------------------------------------*/char *GetNextFilename ()/*** Parameters: none** Globals:** optind defined by getopt sys call** Argc, Argv global copies of argc and argv** Operation:** This routine returns the next command line argument. If** there are no remaining command line arguments, it returns** NULL. This routine should only be called after all option** arguments have been parsed and removed with ParseArguments.** Return: Next command line argument or NULL.** Exceptions: none** History: Fri Aug 18 09:34:12 1989, DSJ, Created.*/{ if (optind < Argc) return (Argv [optind++]); else return (NULL);} /* GetNextFilename *//*---------------------------------------------------------------------------*/void ReadTrainingSamples ( FILE *File, LIST* TrainingSamples)/*** Parameters:** File open text file to read samples from** Globals: none** Operation:** This routine reads training samples from a file and** places them into a data structure which organizes the** samples by FontName and CharName. It then returns this** data structure.** Return: none** Exceptions: none** History: Fri Aug 18 13:11:39 1989, DSJ, Created.** Tue May 17 1998 simplifications to structure, illiminated** font, and feature specification levels of structure.*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -