mftraining.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 1,342 行 · 第 1/3 页
CPP
1,342 行
/******************************************************************************** Filename: mfTraining.c** Purpose: Separates training pages into files for each character.** Strips from files only the features and there parameters of the feature type mf.** Author: Dan Johnson** Revisment: Christy Russon** Environment: HPUX 6.5** Library: HPUX 6.5** History: Fri Aug 18 08:53:50 1989, DSJ, Created.** 5/25/90, DSJ, Adapted to multiple feature types.** Tuesday, May 17, 1998 Changes made to make feature specific and** simplify structures. First step in simplifying training process.** ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at ** http://www.apache.org/licenses/LICENSE-2.0 ** Unless required by applicable law or agreed to in writing, software ** distributed under the License is distributed on an "AS IS" BASIS, ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ** See the License for the specific language governing permissions and ** limitations under the License.******************************************************************************//**---------------------------------------------------------------------------- Include Files and Type Defines----------------------------------------------------------------------------**/#include "oldlist.h"#include "efio.h"#include "emalloc.h"#include "featdefs.h"#include "tessopt.h"#include "ocrfeatures.h"#include "mf.h"#include "general.h"#include "clusttool.h"#include "cluster.h"#include "protos.h"#include "minmax.h"#include "debug.h"#include "tprintf.h"#include "const.h"#include "mergenf.h"#include "name2char.h"#include "intproto.h"#include "variables.h"#include "freelist.h"#include "efio.h"#include "danerror.h"#include "globals.h"#include <string.h>#include <stdio.h>#define _USE_MATH_DEFINES#include <math.h>#ifdef WIN32#ifndef M_PI#define M_PI 3.14159265358979323846#endif#endif#define MAXNAMESIZE 80#define MAX_NUM_SAMPLES 10000#define PROGRAM_FEATURE_TYPE "mf"#define MINSD (1.0f / 128.0f)#define MINSD_ANGLE (1.0f / 64.0f)int row_number; /* cjn: fixes link problem */typedef struct{ char *Label; int SampleCount; LIST List;}LABELEDLISTNODE, *LABELEDLIST;typedef struct{ char* Label; int NumMerged[MAX_NUM_PROTOS]; CLASS_TYPE Class;}MERGE_CLASS_NODE;typedef MERGE_CLASS_NODE* MERGE_CLASS;#define round(x,frag)(floor(x/frag+.5)*frag)/**---------------------------------------------------------------------------- Public Function Prototypes----------------------------------------------------------------------------**/int main ( int argc, char **argv);/**---------------------------------------------------------------------------- Private Function Prototypes----------------------------------------------------------------------------**/void ParseArguments(int argc,char **argv);char *GetNextFilename ();LIST ReadTrainingSamples ( FILE *File);LABELEDLIST FindList ( LIST List, char *Label);MERGE_CLASS FindClass ( LIST List, char *Label);LABELEDLIST NewLabeledList ( char *Label);MERGE_CLASS NewLabeledClass ( char *Label);void WriteTrainingSamples ( char *Directory, LIST CharList);void WriteClusteredTrainingSamples ( char *Directory, LIST ProtoList, CLUSTERER *Clusterer, LABELEDLIST CharSample);/**/void WriteMergedTrainingSamples( char *Directory, LIST ClassList);void WriteMicrofeat( char *Directory, LIST ClassList);void WriteProtos( FILE* File, MERGE_CLASS MergeClass);void WriteConfigs( FILE* File, CLASS_TYPE Class);void FreeTrainingSamples ( LIST CharList);void FreeLabeledClassList ( LIST ClassList);void FreeLabeledList ( LABELEDLIST LabeledList);CLUSTERER *SetUpForClustering( LABELEDLIST CharSample);/*PARAMDESC *ConvertToPARAMDESC( PARAM_DESC* Param_Desc, int N);*/void MergeInsignificantProtos(LIST ProtoList, const char* label, CLUSTERER *Clusterer, CLUSTERCONFIG *Config);LIST RemoveInsignificantProtos( LIST ProtoList, BOOL8 KeepSigProtos, BOOL8 KeepInsigProtos, int N);void CleanUpUnusedData( LIST ProtoList);void Normalize ( float *Values);void SetUpForFloat2Int( LIST LabeledClassList);void WritePFFMTable(INT_TEMPLATES Templates, const char* filename);//--------------Global Data Definitions and Declarations--------------static char FontName[MAXNAMESIZE];// globals used for parsing command line argumentsstatic char *Directory = NULL;static int MaxNumSamples = MAX_NUM_SAMPLES;static int Argc;static char **Argv;// globals used to control what information is saved in the output filestatic BOOL8 ShowAllSamples = FALSE;static BOOL8 ShowSignificantProtos = TRUE;static BOOL8 ShowInsignificantProtos = FALSE;// global variable to hold configuration parameters to control clustering// -M 0.40 -B 0.05 -I 1.0 -C 1e-6.static CLUSTERCONFIG Config ={ elliptical, 0.625, 0.05, 1.0, 1e-6, 0 };static FLOAT32 RoundingAccuracy = 0.0f;// The unicharset used during mftrainingstatic UNICHARSET unicharset_mftraining;const char* test_ch = "";/*---------------------------------------------------------------------------- Public Code-----------------------------------------------------------------------------*/void DisplayProtoList(const char* ch, LIST protolist) { void* window = c_create_window("Char samples", 50, 200, 520, 520, -130.0, 130.0, -130.0, 130.0); LIST proto = protolist; iterate(proto) { PROTOTYPE* prototype = reinterpret_cast<PROTOTYPE *>(first_node(proto)); if (prototype->Significant) c_line_color_index(window, Green); else if (prototype->NumSamples == 0) c_line_color_index(window, Blue); else if (prototype->Merged) c_line_color_index(window, Magenta); else c_line_color_index(window, Red); float x = CenterX(prototype->Mean); float y = CenterY(prototype->Mean); double angle = OrientationOf(prototype->Mean) * 2 * M_PI; float dx = static_cast<float>(LengthOf(prototype->Mean) * cos(angle) / 2); float dy = static_cast<float>(LengthOf(prototype->Mean) * sin(angle) / 2); c_move(window, (x - dx) * 256, (y - dy) * 256); c_draw(window, (x + dx) * 256, (y + dy) * 256); if (prototype->Significant) tprintf("Green proto at (%g,%g)+(%g,%g) %d samples\n", x, y, dx, dy, prototype->NumSamples); else if (prototype->NumSamples > 0 && !prototype->Merged) tprintf("Red proto at (%g,%g)+(%g,%g) %d samples\n", x, y, dx, dy, prototype->NumSamples); } c_make_current(window);}/*---------------------------------------------------------------------------*/int main (int argc, char **argv) {/*** Parameters:** argc number of command line arguments** argv array of command line arguments** Globals: none** Operation:** This program reads in a text file consisting of feature** samples from a training page in the following format:**** FontName CharName NumberOfFeatureTypes(N)** FeatureTypeName1 NumberOfFeatures(M)** Feature1** ...** FeatureM** FeatureTypeName2 NumberOfFeatures(M)** Feature1** ...** FeatureM** ...** FeatureTypeNameN NumberOfFeatures(M)** Feature1** ...** FeatureM** FontName CharName ...**** The result of this program is a binary inttemp file used by** the OCR engine.** Return: none** Exceptions: none** History: Fri Aug 18 08:56:17 1989, DSJ, Created.** Mon May 18 1998, Christy Russson, Revistion started.*/ char *PageName; FILE *TrainingPage; FILE *OutFile; LIST CharList; CLUSTERER *Clusterer = NULL; LIST ProtoList = NIL; LABELEDLIST CharSample; PROTOTYPE *Prototype; LIST ClassList = NIL; int Cid, Pid; PROTO Proto; PROTO_STRUCT DummyProto; BIT_VECTOR Config2; MERGE_CLASS MergeClass; INT_TEMPLATES IntTemplates; LIST pCharList, pProtoList; char Filename[MAXNAMESIZE]; // Clean the unichar set unicharset_mftraining.clear(); // Space character needed to represent NIL classification unicharset_mftraining.unichar_insert(" "); ParseArguments (argc, argv); InitFastTrainerVars (); InitSubfeatureVars (); while ((PageName = GetNextFilename()) != NULL) { printf ("Reading %s ...\n", PageName); TrainingPage = Efopen (PageName, "r"); CharList = ReadTrainingSamples (TrainingPage); fclose (TrainingPage); //WriteTrainingSamples (Directory, CharList); pCharList = CharList; iterate(pCharList) { //Cluster CharSample = (LABELEDLIST) first_node (pCharList);// printf ("\nClustering %s ...", CharSample->Label); Clusterer = SetUpForClustering(CharSample); Config.MagicSamples = CharSample->SampleCount; ProtoList = ClusterSamples(Clusterer, &Config); CleanUpUnusedData(ProtoList); //Merge MergeInsignificantProtos(ProtoList, CharSample->Label, Clusterer, &Config); if (strcmp(test_ch, CharSample->Label) == 0) DisplayProtoList(test_ch, ProtoList); ProtoList = RemoveInsignificantProtos(ProtoList, ShowSignificantProtos, ShowInsignificantProtos, Clusterer->SampleSize); FreeClusterer(Clusterer); MergeClass = FindClass (ClassList, CharSample->Label); if (MergeClass == NULL) { MergeClass = NewLabeledClass (CharSample->Label); ClassList = push (ClassList, MergeClass); } Cid = AddConfigToClass(MergeClass->Class); pProtoList = ProtoList; iterate (pProtoList) { Prototype = (PROTOTYPE *) first_node (pProtoList); // see if proto can be approximated by existing proto Pid = FindClosestExistingProto(MergeClass->Class, MergeClass->NumMerged, Prototype); if (Pid == NO_PROTO) { Pid = AddProtoToClass (MergeClass->Class); Proto = ProtoIn (MergeClass->Class, Pid); MakeNewFromOld (Proto, Prototype); MergeClass->NumMerged[Pid] = 1; } else { MakeNewFromOld (&DummyProto, Prototype); ComputeMergedProto (ProtoIn (MergeClass->Class, Pid), &DummyProto, (FLOAT32) MergeClass->NumMerged[Pid], 1.0, ProtoIn (MergeClass->Class, Pid)); MergeClass->NumMerged[Pid] ++; } Config2 = ConfigIn (MergeClass->Class, Cid); AddProtoToConfig (Pid, Config2); } FreeProtoList (&ProtoList); } FreeTrainingSamples (CharList); } //WriteMergedTrainingSamples(Directory,ClassList); WriteMicrofeat(Directory, ClassList); InitIntProtoVars (); InitPrototypes (); SetUpForFloat2Int(ClassList); IntTemplates = CreateIntTemplates(TrainingData, unicharset_mftraining); strcpy (Filename, ""); if (Directory != NULL) { strcat (Filename, Directory); strcat (Filename, "/"); } strcat (Filename, "inttemp");#ifdef __UNIX__ OutFile = Efopen (Filename, "w");#else OutFile = Efopen (Filename, "wb");#endif WriteIntTemplates(OutFile, IntTemplates, unicharset_mftraining); fclose (OutFile); strcpy (Filename, ""); if (Directory != NULL) { strcat (Filename, Directory); strcat (Filename, "/"); } strcat (Filename, "pffmtable"); // Now create pffmtable. WritePFFMTable(IntTemplates, Filename); printf ("Done!\n"); /**/ FreeLabeledClassList (ClassList); return 0;} /* main *//**---------------------------------------------------------------------------- Private Code----------------------------------------------------------------------------**//*---------------------------------------------------------------------------*/void ParseArguments(int argc,char **argv)/*** Parameters:** argc number of command line arguments to parse** argv command line arguments** Globals:** ShowAllSamples flag controlling samples display** ShowSignificantProtos flag controlling proto display** ShowInsignificantProtos flag controlling proto display** Config current clustering parameters** tessoptarg, tessoptind defined by tessopt sys call** Argc, Argv global copies of argc and argv** Operation:** This routine parses the command line arguments that were** passed to the program. The legal arguments are:** -d "turn off display of samples"** -p "turn off significant protos"** -n "turn off insignificant proto"** -S [ spherical | elliptical | mixed | automatic ]** -M MinSamples "min samples per prototype (%)"** -B MaxIllegal "max illegal chars per cluster (%)"** -I Independence "0 to 1"** -C Confidence "1e-200 to 1.0"** -D Directory** -N MaxNumSamples** -R RoundingAccuracy** Return: none** Exceptions: Illegal options terminate the program.** History: 7/24/89, DSJ, Created.*/{ int Option; int ParametersRead; BOOL8 Error; Error = FALSE; Argc = argc; Argv = argv; while (( Option = tessopt( argc, argv, "R:N:D:C:I:M:B:S:d:n:p" )) != EOF ) { switch ( Option ) { case 'n': ShowInsignificantProtos = FALSE; break; case 'p': ShowSignificantProtos = FALSE;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?