mftraining.cpp
来自「一个google的OCR源码」· C++ 代码 · 共 1,342 行 · 第 1/3 页
CPP
1,342 行
fprintf(File, "%s\n", MergeClass->Label); fprintf(File, "%d\n", NumProtosIn(MergeClass->Class)); for(i=0; i < NumProtosIn(MergeClass->Class); i++) { Proto = ProtoIn(MergeClass->Class,i); fprintf(File, "\t%8.4f %8.4f %8.4f %8.4f ", ProtoX(Proto), ProtoY(Proto), ProtoLength(Proto), ProtoAngle(Proto)); Values[0] = ProtoX(Proto); Values[1] = ProtoY(Proto); Values[2] = ProtoAngle(Proto); Normalize(Values); fprintf(File, "%8.4f %8.4f %8.4f\n", Values[0], Values[1], Values[2]); }} // WriteProtos/*----------------------------------------------------------------------------*/void WriteConfigs( FILE* File, CLASS_TYPE Class){ BIT_VECTOR Config; int i, j, WordsPerConfig; WordsPerConfig = WordsInVectorOfSize(NumProtosIn(Class)); fprintf(File, "%d %d\n", NumConfigsIn(Class),WordsPerConfig); for(i=0; i < NumConfigsIn(Class); i++) { Config = ConfigIn(Class,i); for(j=0; j < WordsPerConfig; j++) fprintf(File, "%08x ", Config[j]); fprintf(File, "\n"); } fprintf(File, "\n");} // WriteConfigs/*---------------------------------------------------------------------------*/void FreeTrainingSamples ( LIST CharList)/*** Parameters:** FontList list of all fonts in document** Globals: none** Operation:** This routine deallocates all of the space allocated to** the specified list of training samples.** Return: none** Exceptions: none** History: Fri Aug 18 17:44:27 1989, DSJ, Created.*/{ LABELEDLIST CharSample; FEATURE_SET FeatureSet; LIST FeatureList;// printf ("FreeTrainingSamples...\n"); iterate (CharList) /* iterate thru all of the fonts */ { CharSample = (LABELEDLIST) first_node (CharList); FeatureList = CharSample->List; iterate (FeatureList) /* iterate thru all of the classes */ { FeatureSet = (FEATURE_SET) first_node (FeatureList); FreeFeatureSet (FeatureSet); } FreeLabeledList (CharSample); } destroy (CharList);} /* FreeTrainingSamples *//*-----------------------------------------------------------------------------*/void FreeLabeledClassList ( LIST ClassList)/*** Parameters:** FontList list of all fonts in document** Globals: none** Operation:** This routine deallocates all of the space allocated to** the specified list of training samples.** Return: none** Exceptions: none** History: Fri Aug 18 17:44:27 1989, DSJ, Created.*/{ MERGE_CLASS MergeClass; iterate (ClassList) /* iterate thru all of the fonts */ { MergeClass = (MERGE_CLASS) first_node (ClassList); free (MergeClass->Label); FreeClass(MergeClass->Class); free (MergeClass); } destroy (ClassList);} /* FreeLabeledClassList *//*---------------------------------------------------------------------------*/void FreeLabeledList ( LABELEDLIST LabeledList)/*** Parameters:** LabeledList labeled list to be freed** Globals: none** Operation:** This routine deallocates all of the memory consumed by** a labeled list. It does not free any memory which may be** consumed by the items in the list.** Return: none** Exceptions: none** History: Fri Aug 18 17:52:45 1989, DSJ, Created.*/{ destroy (LabeledList->List); free (LabeledList->Label); free (LabeledList);} /* FreeLabeledList *//*---------------------------------------------------------------------------*/CLUSTERER *SetUpForClustering( LABELEDLIST CharSample)/*** Parameters:** CharSample: LABELEDLIST that holds all the feature information for a** given character.** Globals:** None** Operation:** This routine reads samples from a LABELEDLIST and enters** those samples into a clusterer data structure. This** data structure is then returned to the caller.** Return:** Pointer to new clusterer data structure.** Exceptions:** None** History:** 8/16/89, DSJ, Created.*/{ uinT16 N; int i, j; FLOAT32 *Sample = NULL; CLUSTERER *Clusterer; inT32 CharID; LIST FeatureList = NULL; FEATURE_SET FeatureSet = NULL; FEATURE_DESC FeatureDesc = NULL;// PARAM_DESC* ParamDesc; FeatureDesc = DefinitionOf(ShortNameToFeatureType(PROGRAM_FEATURE_TYPE)); N = FeatureDesc->NumParams;// ParamDesc = ConvertToPARAMDESC(FeatureDesc->ParamDesc, N); Clusterer = MakeClusterer(N,FeatureDesc->ParamDesc);// free(ParamDesc); FeatureList = CharSample->List; CharID = 0; iterate(FeatureList) { if (CharID >= MaxNumSamples) break; FeatureSet = (FEATURE_SET) first_node (FeatureList); for (i=0; i < FeatureSet->MaxNumFeatures; i++) { if (Sample == NULL) Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); for (j=0; j < N; j++) if (RoundingAccuracy != 0.0f) Sample[j] = round(FeatureSet->Features[i]->Params[j], RoundingAccuracy); else Sample[j] = FeatureSet->Features[i]->Params[j]; MakeSample (Clusterer, Sample, CharID); } CharID++; } if ( Sample != NULL ) free( Sample ); return( Clusterer );} /* SetUpForClustering *//*------------------------------------------------------------------------*/void MergeInsignificantProtos(LIST ProtoList, const char* label, CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { PROTOTYPE *Prototype; bool debug = strcmp(test_ch, label) == 0; LIST pProtoList = ProtoList; iterate(pProtoList) { Prototype = (PROTOTYPE *) first_node (pProtoList); if (Prototype->Significant || Prototype->Merged) continue; FLOAT32 best_dist = 0.125; PROTOTYPE* best_match = NULL; // Find the nearest alive prototype. LIST list_it = ProtoList; iterate(list_it) { PROTOTYPE* test_p = (PROTOTYPE *) first_node (list_it); if (test_p != Prototype && !test_p->Merged) { FLOAT32 dist = ComputeDistance(Clusterer->SampleSize, Clusterer->ParamDesc, Prototype->Mean, test_p->Mean); if (dist < best_dist) { best_match = test_p; best_dist = dist; } } } if (best_match != NULL && !best_match->Significant) { if (debug) tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n", best_match->NumSamples, Prototype->NumSamples, best_match->Mean[0], best_match->Mean[1], Prototype->Mean[0], Prototype->Mean[1]); best_match->NumSamples = MergeClusters(Clusterer->SampleSize, Clusterer->ParamDesc, best_match->NumSamples, Prototype->NumSamples, best_match->Mean, best_match->Mean, Prototype->Mean); Prototype->NumSamples = 0; Prototype->Merged = 1; } else if (best_match != NULL) { if (debug) tprintf("Red proto at %g,%g matched a green one at %g,%g\n", Prototype->Mean[0], Prototype->Mean[1], best_match->Mean[0], best_match->Mean[1]); Prototype->Merged = 1; } } // Mark significant those that now have enough samples. int min_samples = (inT32) (Config->MinSamples * Clusterer->NumChar); pProtoList = ProtoList; iterate(pProtoList) { Prototype = (PROTOTYPE *) first_node (pProtoList); // Process insignificant protos that do not match a green one if (!Prototype->Significant && Prototype->NumSamples >= min_samples && !Prototype->Merged) { if (debug) tprintf("Red proto at %g,%g becoming green\n", Prototype->Mean[0], Prototype->Mean[1]); Prototype->Significant = true; } }} /* MergeInsignificantProtos *//*------------------------------------------------------------------------*/LIST RemoveInsignificantProtos( LIST ProtoList, BOOL8 KeepSigProtos, BOOL8 KeepInsigProtos, int N){ LIST NewProtoList = NIL; LIST pProtoList; PROTOTYPE* Proto; PROTOTYPE* NewProto; int i; pProtoList = ProtoList; iterate(pProtoList) { Proto = (PROTOTYPE *) first_node (pProtoList); if ((Proto->Significant && KeepSigProtos) || (!Proto->Significant && KeepInsigProtos)) { NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE)); NewProto->Mean = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); NewProto->Significant = Proto->Significant; NewProto->Style = Proto->Style; NewProto->NumSamples = Proto->NumSamples; NewProto->Cluster = NULL; NewProto->Distrib = NULL; for (i=0; i < N; i++) NewProto->Mean[i] = Proto->Mean[i]; if (Proto->Variance.Elliptical != NULL) { NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); for (i=0; i < N; i++) NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i]; } else NewProto->Variance.Elliptical = NULL; //--------------------------------------------- if (Proto->Magnitude.Elliptical != NULL) { NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); for (i=0; i < N; i++) NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i]; } else NewProto->Magnitude.Elliptical = NULL; //------------------------------------------------ if (Proto->Weight.Elliptical != NULL) { NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32)); for (i=0; i < N; i++) NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i]; } else NewProto->Weight.Elliptical = NULL; NewProto->TotalMagnitude = Proto->TotalMagnitude; NewProto->LogMagnitude = Proto->LogMagnitude; NewProtoList = push_last(NewProtoList, NewProto); } } //FreeProtoList (ProtoList); return (NewProtoList);} /* RemoveInsignificantProtos *//*-----------------------------------------------------------------------------*/void CleanUpUnusedData( LIST ProtoList){ PROTOTYPE* Prototype; iterate(ProtoList) { Prototype = (PROTOTYPE *) first_node (ProtoList); if(Prototype->Variance.Elliptical != NULL) { memfree(Prototype->Variance.Elliptical); Prototype->Variance.Elliptical = NULL; } if(Prototype->Magnitude.Elliptical != NULL) { memfree(Prototype->Magnitude.Elliptical); Prototype->Magnitude.Elliptical = NULL; } if(Prototype->Weight.Elliptical != NULL) { memfree(Prototype->Weight.Elliptical); Prototype->Weight.Elliptical = NULL; } }}/*--------------------------------------------------------------------------*/void Normalize ( float *Values){ register float Slope; register float Intercept; register float Normalizer; Slope = tan (Values [2] * 2 * PI); Intercept = Values [1] - Slope * Values [0]; Normalizer = 1 / sqrt (Slope * Slope + 1.0); Values [0] = Slope * Normalizer; Values [1] = - Normalizer; Values [2] = Intercept * Normalizer;} // Normalize/** SetUpForFloat2Int **************************************************/void SetUpForFloat2Int( LIST LabeledClassList){ MERGE_CLASS MergeClass; CLASS_TYPE Class; int NumProtos; int NumConfigs; int NumWords; int i, j; float Values[3]; PROTO NewProto; PROTO OldProto; BIT_VECTOR NewConfig; BIT_VECTOR OldConfig;// printf("Float2Int ...\n"); iterate(LabeledClassList) { MergeClass = (MERGE_CLASS) first_node (LabeledClassList); Class = &TrainingData[unicharset_mftraining.unichar_to_id( MergeClass->Label)]; NumProtos = NumProtosIn(MergeClass->Class); NumConfigs = NumConfigsIn(MergeClass->Class); NumProtosIn(Class) = NumProtos; Class->MaxNumProtos = NumProtos; Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos); for(i=0; i < NumProtos; i++) { NewProto = ProtoIn(Class, i); OldProto = ProtoIn(MergeClass->Class, i); Values[0] = ProtoX(OldProto); Values[1] = ProtoY(OldProto); Values[2] = ProtoAngle(OldProto); Normalize(Values); ProtoX(NewProto) = ProtoX(OldProto); ProtoY(NewProto) = ProtoY(OldProto); ProtoLength(NewProto) = ProtoLength(OldProto); ProtoAngle(NewProto) = ProtoAngle(OldProto); CoefficientA(NewProto) = Values[0]; CoefficientB(NewProto) = Values[1]; CoefficientC(NewProto) = Values[2]; } NumConfigsIn(Class) = NumConfigs; Class->MaxNumConfigs = NumConfigs; Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs); NumWords = WordsInVectorOfSize(NumProtos); for(i=0; i < NumConfigs; i++) { NewConfig = NewBitVector(NumProtos); OldConfig = ConfigIn(MergeClass->Class, i); for(j=0; j < NumWords; j++) NewConfig[j] = OldConfig[j]; ConfigIn(Class, i) = NewConfig; } }} // SetUpForFloat2Int/*--------------------------------------------------------------------------*/void WritePFFMTable(INT_TEMPLATES Templates, const char* filename) { FILE* fp = Efopen(filename, "wb"); /* then write out each class */ for (int i = 0; i < NumClassesIn (Templates); i++) { int MaxLength = 0; INT_CLASS Class = ClassForIndex (Templates, i); for (int ConfigId = 0; ConfigId < NumIntConfigsIn (Class); ConfigId++) { if (LengthForConfigId (Class, ConfigId) > MaxLength) MaxLength = LengthForConfigId (Class, ConfigId); } fprintf(fp, "%s %d\n", unicharset_mftraining.id_to_unichar( ClassIdForIndex(Templates, i)), MaxLength); } fclose(fp);} // WritePFFMTable
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?