mftraining.cpp

来自「一个google的OCR源码」· C++ 代码 · 共 1,342 行 · 第 1/3 页

CPP
1,342
字号
	fprintf(File, "%s\n", MergeClass->Label);	fprintf(File, "%d\n", NumProtosIn(MergeClass->Class));	for(i=0; i < NumProtosIn(MergeClass->Class); i++)	{		Proto = ProtoIn(MergeClass->Class,i);		fprintf(File, "\t%8.4f %8.4f %8.4f %8.4f ", ProtoX(Proto), ProtoY(Proto),			ProtoLength(Proto), ProtoAngle(Proto));		Values[0] = ProtoX(Proto);		Values[1] = ProtoY(Proto);		Values[2] = ProtoAngle(Proto);		Normalize(Values);		fprintf(File, "%8.4f %8.4f %8.4f\n", Values[0], Values[1], Values[2]);	}} // WriteProtos/*----------------------------------------------------------------------------*/void WriteConfigs(	FILE* File,	CLASS_TYPE Class){	BIT_VECTOR Config;	int i, j, WordsPerConfig;	WordsPerConfig = WordsInVectorOfSize(NumProtosIn(Class));	fprintf(File, "%d %d\n", NumConfigsIn(Class),WordsPerConfig);	for(i=0; i < NumConfigsIn(Class); i++)	{		Config = ConfigIn(Class,i);		for(j=0; j < WordsPerConfig; j++)			fprintf(File, "%08x ", Config[j]);		fprintf(File, "\n");	}	fprintf(File, "\n");} // WriteConfigs/*---------------------------------------------------------------------------*/void FreeTrainingSamples (     LIST	CharList)/***	Parameters:**		FontList	list of all fonts in document**	Globals: none**	Operation:**		This routine deallocates all of the space allocated to**		the specified list of training samples.**	Return: none**	Exceptions: none**	History: Fri Aug 18 17:44:27 1989, DSJ, Created.*/{	LABELEDLIST	CharSample;	FEATURE_SET	FeatureSet;	LIST		FeatureList;// 	printf ("FreeTrainingSamples...\n");	iterate (CharList) 		/* iterate thru all of the fonts */	{		CharSample = (LABELEDLIST) first_node (CharList);		FeatureList = CharSample->List;		iterate (FeatureList)	/* iterate thru all of the classes */		{			FeatureSet = (FEATURE_SET) first_node (FeatureList);			FreeFeatureSet (FeatureSet);		}		FreeLabeledList (CharSample);	}	destroy (CharList);}	/* FreeTrainingSamples *//*-----------------------------------------------------------------------------*/void FreeLabeledClassList (     LIST	ClassList)/***	Parameters:**		FontList	list of all fonts in document**	Globals: none**	Operation:**		This routine deallocates all of the space allocated to**		the specified list of training samples.**	Return: none**	Exceptions: none**	History: Fri Aug 18 17:44:27 1989, DSJ, Created.*/{	MERGE_CLASS	MergeClass;	iterate (ClassList) 		/* iterate thru all of the fonts */	{		MergeClass = (MERGE_CLASS) first_node (ClassList);		free (MergeClass->Label);		FreeClass(MergeClass->Class);		free (MergeClass);	}	destroy (ClassList);}	/* FreeLabeledClassList *//*---------------------------------------------------------------------------*/void FreeLabeledList (     LABELEDLIST	LabeledList)/***	Parameters:**		LabeledList	labeled list to be freed**	Globals: none**	Operation:**		This routine deallocates all of the memory consumed by**		a labeled list.  It does not free any memory which may be**		consumed by the items in the list.**	Return: none**	Exceptions: none**	History: Fri Aug 18 17:52:45 1989, DSJ, Created.*/{	destroy (LabeledList->List);	free (LabeledList->Label);	free (LabeledList);}	/* FreeLabeledList *//*---------------------------------------------------------------------------*/CLUSTERER *SetUpForClustering(     LABELEDLIST	CharSample)/***	Parameters:**		CharSample: LABELEDLIST that holds all the feature information for a**		given character.**	Globals:**		None**	Operation:**		This routine reads samples from a LABELEDLIST and enters**		those samples into a clusterer data structure.  This**		data structure is then returned to the caller.**	Return:**		Pointer to new clusterer data structure.**	Exceptions:**		None**	History:**		8/16/89, DSJ, Created.*/{	uinT16	N;	int		i, j;	FLOAT32	*Sample = NULL;	CLUSTERER	*Clusterer;	inT32		CharID;	LIST FeatureList = NULL;	FEATURE_SET FeatureSet = NULL;	FEATURE_DESC FeatureDesc = NULL;//	PARAM_DESC* ParamDesc;	FeatureDesc = DefinitionOf(ShortNameToFeatureType(PROGRAM_FEATURE_TYPE));	N = FeatureDesc->NumParams;//	ParamDesc = ConvertToPARAMDESC(FeatureDesc->ParamDesc, N);	Clusterer = MakeClusterer(N,FeatureDesc->ParamDesc);//	free(ParamDesc);	FeatureList = CharSample->List;	CharID = 0;	iterate(FeatureList)	{		if (CharID >= MaxNumSamples) break;		FeatureSet = (FEATURE_SET) first_node (FeatureList);		for (i=0; i < FeatureSet->MaxNumFeatures; i++)		{			if (Sample == NULL)				Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));			for (j=0; j < N; j++)				if (RoundingAccuracy != 0.0f)					Sample[j] = round(FeatureSet->Features[i]->Params[j], RoundingAccuracy);				else					Sample[j] = FeatureSet->Features[i]->Params[j];				MakeSample (Clusterer, Sample, CharID);		}		CharID++;	}	if ( Sample != NULL ) free( Sample );	return( Clusterer );}	/* SetUpForClustering *//*------------------------------------------------------------------------*/void MergeInsignificantProtos(LIST ProtoList, const char* label,                              CLUSTERER	*Clusterer, CLUSTERCONFIG *Config) {  PROTOTYPE	*Prototype;  bool debug = strcmp(test_ch, label) == 0;  LIST pProtoList = ProtoList;  iterate(pProtoList) {    Prototype = (PROTOTYPE *) first_node (pProtoList);    if (Prototype->Significant || Prototype->Merged)      continue;    FLOAT32 best_dist = 0.125;    PROTOTYPE* best_match = NULL;    // Find the nearest alive prototype.    LIST list_it = ProtoList;    iterate(list_it) {      PROTOTYPE* test_p = (PROTOTYPE *) first_node (list_it);      if (test_p != Prototype && !test_p->Merged) {        FLOAT32 dist = ComputeDistance(Clusterer->SampleSize,                                       Clusterer->ParamDesc,                                       Prototype->Mean, test_p->Mean);        if (dist < best_dist) {          best_match = test_p;          best_dist = dist;        }      }    }    if (best_match != NULL && !best_match->Significant) {      if (debug)         tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n",                 best_match->NumSamples, Prototype->NumSamples,                 best_match->Mean[0], best_match->Mean[1],                 Prototype->Mean[0], Prototype->Mean[1]);      best_match->NumSamples = MergeClusters(Clusterer->SampleSize,                                             Clusterer->ParamDesc,                                             best_match->NumSamples,                                             Prototype->NumSamples,                                             best_match->Mean,                                             best_match->Mean, Prototype->Mean);      Prototype->NumSamples = 0;      Prototype->Merged = 1;    } else if (best_match != NULL) {      if (debug)        tprintf("Red proto at %g,%g matched a green one at %g,%g\n",                Prototype->Mean[0], Prototype->Mean[1],                best_match->Mean[0], best_match->Mean[1]);      Prototype->Merged = 1;    }  }  // Mark significant those that now have enough samples.  int min_samples = (inT32) (Config->MinSamples * Clusterer->NumChar);  pProtoList = ProtoList;  iterate(pProtoList) {    Prototype = (PROTOTYPE *) first_node (pProtoList);    // Process insignificant protos that do not match a green one    if (!Prototype->Significant && Prototype->NumSamples >= min_samples &&        !Prototype->Merged) {      if (debug)        tprintf("Red proto at %g,%g becoming green\n",                Prototype->Mean[0], Prototype->Mean[1]);      Prototype->Significant = true;    }  }}	/* MergeInsignificantProtos *//*------------------------------------------------------------------------*/LIST RemoveInsignificantProtos(	LIST ProtoList,	BOOL8 KeepSigProtos,	BOOL8 KeepInsigProtos,	int N){	LIST NewProtoList = NIL;	LIST pProtoList;	PROTOTYPE* Proto;	PROTOTYPE* NewProto;	int i;	pProtoList = ProtoList;	iterate(pProtoList)	{		Proto = (PROTOTYPE *) first_node (pProtoList);		if ((Proto->Significant && KeepSigProtos) ||			(!Proto->Significant && KeepInsigProtos))		{			NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));			NewProto->Mean = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));			NewProto->Significant = Proto->Significant;			NewProto->Style = Proto->Style;			NewProto->NumSamples = Proto->NumSamples;			NewProto->Cluster = NULL;			NewProto->Distrib = NULL;			for (i=0; i < N; i++)				NewProto->Mean[i] = Proto->Mean[i];			if (Proto->Variance.Elliptical != NULL)			{				NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));				for (i=0; i < N; i++)					NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];			}			else				NewProto->Variance.Elliptical = NULL;			//---------------------------------------------			if (Proto->Magnitude.Elliptical != NULL)			{				NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));				for (i=0; i < N; i++)					NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];			}			else				NewProto->Magnitude.Elliptical = NULL;			//------------------------------------------------			if (Proto->Weight.Elliptical != NULL)			{				NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));				for (i=0; i < N; i++)					NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];			}			else				NewProto->Weight.Elliptical = NULL;			NewProto->TotalMagnitude = Proto->TotalMagnitude;			NewProto->LogMagnitude = Proto->LogMagnitude;			NewProtoList = push_last(NewProtoList, NewProto);		}	}	//FreeProtoList (ProtoList);	return (NewProtoList);}	/* RemoveInsignificantProtos *//*-----------------------------------------------------------------------------*/void CleanUpUnusedData(	LIST ProtoList){	PROTOTYPE* Prototype;	iterate(ProtoList)	{		Prototype = (PROTOTYPE *) first_node (ProtoList);		if(Prototype->Variance.Elliptical != NULL)		{			memfree(Prototype->Variance.Elliptical);			Prototype->Variance.Elliptical = NULL;		}		if(Prototype->Magnitude.Elliptical != NULL)		{			memfree(Prototype->Magnitude.Elliptical);			Prototype->Magnitude.Elliptical = NULL;		}		if(Prototype->Weight.Elliptical != NULL)		{			memfree(Prototype->Weight.Elliptical);			Prototype->Weight.Elliptical = NULL;		}	}}/*--------------------------------------------------------------------------*/void Normalize (   float  *Values){	register float Slope;	register float Intercept;	register float Normalizer;	Slope      = tan (Values [2] * 2 * PI);	Intercept  = Values [1] - Slope * Values [0];	Normalizer = 1 / sqrt (Slope * Slope + 1.0);	Values [0] = Slope * Normalizer;	Values [1] = - Normalizer;	Values [2] = Intercept * Normalizer;} // Normalize/** SetUpForFloat2Int **************************************************/void SetUpForFloat2Int(	LIST LabeledClassList){	MERGE_CLASS	MergeClass;	CLASS_TYPE		Class;	int				NumProtos;	int				NumConfigs;	int				NumWords;	int				i, j;	float			Values[3];	PROTO			NewProto;	PROTO			OldProto;	BIT_VECTOR		NewConfig;	BIT_VECTOR		OldConfig;// 	printf("Float2Int ...\n");	iterate(LabeledClassList)	{		MergeClass = (MERGE_CLASS) first_node (LabeledClassList);		Class = &TrainingData[unicharset_mftraining.unichar_to_id(                                          MergeClass->Label)];		NumProtos = NumProtosIn(MergeClass->Class);		NumConfigs = NumConfigsIn(MergeClass->Class);		NumProtosIn(Class) = NumProtos;		Class->MaxNumProtos = NumProtos;		Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos);		for(i=0; i < NumProtos; i++)		{			NewProto = ProtoIn(Class, i);			OldProto = ProtoIn(MergeClass->Class, i);			Values[0] = ProtoX(OldProto);			Values[1] = ProtoY(OldProto);			Values[2] = ProtoAngle(OldProto);			Normalize(Values);			ProtoX(NewProto) = ProtoX(OldProto);			ProtoY(NewProto) = ProtoY(OldProto);			ProtoLength(NewProto) = ProtoLength(OldProto);			ProtoAngle(NewProto) = ProtoAngle(OldProto);			CoefficientA(NewProto) = Values[0];			CoefficientB(NewProto) = Values[1];			CoefficientC(NewProto) = Values[2];		}		NumConfigsIn(Class) = NumConfigs;		Class->MaxNumConfigs = NumConfigs;		Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs);		NumWords = WordsInVectorOfSize(NumProtos);		for(i=0; i < NumConfigs; i++)		{			NewConfig = NewBitVector(NumProtos);			OldConfig = ConfigIn(MergeClass->Class, i);			for(j=0; j < NumWords; j++)				NewConfig[j] = OldConfig[j];			ConfigIn(Class, i) = NewConfig;		}	}} // SetUpForFloat2Int/*--------------------------------------------------------------------------*/void WritePFFMTable(INT_TEMPLATES Templates, const char* filename) {  FILE* fp = Efopen(filename, "wb");  /* then write out each class */  for (int i = 0; i < NumClassesIn (Templates); i++) {    int MaxLength = 0;    INT_CLASS Class = ClassForIndex (Templates, i);    for (int ConfigId = 0; ConfigId < NumIntConfigsIn (Class); ConfigId++) {      if (LengthForConfigId (Class, ConfigId) > MaxLength)        MaxLength = LengthForConfigId (Class, ConfigId);    }    fprintf(fp, "%s %d\n", unicharset_mftraining.id_to_unichar(                ClassIdForIndex(Templates, i)), MaxLength);  }  fclose(fp);} // WritePFFMTable

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?