⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cluster.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 5 页
字号:
      can be, then a new prototype is formed and returned to the      caller.  If it can't be, then NULL is returned to the caller.Return:		Pointer to new elliptical prototype or NULL.Exceptions:	NoneHistory:	6/12/89, DSJ, Created.****************************************************************************/PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer,                               CLUSTER *Cluster,                               STATISTICS *Statistics,                               BUCKETS *Buckets) {  PROTOTYPE *Proto = NULL;  int i;  // check that each dimension is a normal distribution  for (i = 0; i < Clusterer->SampleSize; i++) {    if (Clusterer->ParamDesc[i].NonEssential)      continue;    FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]),      Cluster->Mean[i],      sqrt ((FLOAT64) Statistics->      CoVariance[i * (Clusterer->SampleSize + 1)]));    if (!DistributionOK (Buckets))      break;  }  // if all dimensions matched a normal distribution, make a proto  if (i >= Clusterer->SampleSize)    Proto = NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics);  return (Proto);}                                // MakeEllipticalProto/** MakeMixedProto ***********************************************************Parameters:	Clusterer	data struct containing samples being clustered      Cluster		cluster to be made into a prototype      Statistics	statistical info about cluster      NormalBuckets	histogram struct used to analyze distribution      Confidence	confidence level for alternate distributionsGlobals:	NoneOperation:	This routine tests each dimension of the specified cluster to      see what distribution would best approximate that dimension.      Each dimension is compared to the following distributions      in order: normal, random, uniform.  If each dimension can      be represented by one of these distributions,      then a new prototype is formed and returned to the      caller.  If it can't be, then NULL is returned to the caller.Return:		Pointer to new mixed prototype or NULL.Exceptions:	NoneHistory:	6/12/89, DSJ, Created.********************************************************************************/PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer,                          CLUSTER *Cluster,                          STATISTICS *Statistics,                          BUCKETS *NormalBuckets,                          FLOAT64 Confidence) {  PROTOTYPE *Proto;  int i;  BUCKETS *UniformBuckets = NULL;  BUCKETS *RandomBuckets = NULL;  // create a mixed proto to work on - initially assume all dimensions normal*/  Proto = NewMixedProto (Clusterer->SampleSize, Cluster, Statistics);  // find the proper distribution for each dimension  for (i = 0; i < Clusterer->SampleSize; i++) {    if (Clusterer->ParamDesc[i].NonEssential)      continue;    FillBuckets (NormalBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),      Proto->Mean[i],      sqrt ((FLOAT64) Proto->Variance.Elliptical[i]));    if (DistributionOK (NormalBuckets))      continue;    if (RandomBuckets == NULL)      RandomBuckets =        GetBuckets (D_random, Cluster->SampleCount, Confidence);    MakeDimRandom (i, Proto, &(Clusterer->ParamDesc[i]));    FillBuckets (RandomBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),      Proto->Mean[i], Proto->Variance.Elliptical[i]);    if (DistributionOK (RandomBuckets))      continue;    if (UniformBuckets == NULL)      UniformBuckets =        GetBuckets (uniform, Cluster->SampleCount, Confidence);    MakeDimUniform(i, Proto, Statistics);    FillBuckets (UniformBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),      Proto->Mean[i], Proto->Variance.Elliptical[i]);    if (DistributionOK (UniformBuckets))      continue;    break;  }  // if any dimension failed to match a distribution, discard the proto  if (i < Clusterer->SampleSize) {    FreePrototype(Proto);    Proto = NULL;  }  if (UniformBuckets != NULL)    FreeBuckets(UniformBuckets);  if (RandomBuckets != NULL)    FreeBuckets(RandomBuckets);  return (Proto);}                                // MakeMixedProto/* MakeDimRandom *************************************************************Parameters:	i		index of dimension to be changed      Proto		prototype whose dimension is to be altered      ParamDesc	description of specified dimensionGlobals:	NoneOperation:	This routine alters the ith dimension of the specified      mixed prototype to be D_random.Return:		NoneExceptions:	NoneHistory:	6/20/89, DSJ, Created.******************************************************************************/void MakeDimRandom(UINT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) {  Proto->Distrib[i] = D_random;  Proto->Mean[i] = ParamDesc->MidRange;  Proto->Variance.Elliptical[i] = ParamDesc->HalfRange;  // subtract out the previous magnitude of this dimension from the total  Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i];  Proto->Magnitude.Elliptical[i] = 1.0 / ParamDesc->Range;  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);  // note that the proto Weight is irrelevant for D_random protos}                                // MakeDimRandom/** MakeDimUniform ***********************************************************Parameters:	i		index of dimension to be changed      Proto		prototype whose dimension is to be altered      Statistics	statistical info about prototypeGlobals:	NoneOperation:	This routine alters the ith dimension of the specified      mixed prototype to be uniform.Return:		NoneExceptions:	NoneHistory:	6/20/89, DSJ, Created.******************************************************************************/void MakeDimUniform(UINT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) {  Proto->Distrib[i] = uniform;  Proto->Mean[i] = Proto->Cluster->Mean[i] +    (Statistics->Min[i] + Statistics->Max[i]) / 2;  Proto->Variance.Elliptical[i] =    (Statistics->Max[i] - Statistics->Min[i]) / 2;  if (Proto->Variance.Elliptical[i] < MINVARIANCE)    Proto->Variance.Elliptical[i] = MINVARIANCE;  // subtract out the previous magnitude of this dimension from the total  Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i];  Proto->Magnitude.Elliptical[i] =    1.0 / (2.0 * Proto->Variance.Elliptical[i]);  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);  // note that the proto Weight is irrelevant for uniform protos}                                // MakeDimUniform/** ComputeStatistics *********************************************************Parameters:	N		number of dimensions      ParamDesc	array of dimension descriptions      Cluster		cluster whose stats are to be computedGlobals:	NoneOperation:	This routine searches the cluster tree for all leaf nodes      which are samples in the specified cluster.  It computes      a full covariance matrix for these samples as well as      keeping track of the ranges (min and max) for each      dimension.  A special data structure is allocated to      return this information to the caller.  An incremental      algorithm for computing statistics is not used because      it will not work with circular dimensions.Return:		Pointer to new data structure containing statisticsExceptions:	NoneHistory:	6/2/89, DSJ, Created.*********************************************************************************/STATISTICS *ComputeStatistics (INT16 N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) {  STATISTICS *Statistics;  int i, j;  FLOAT32 *CoVariance;  FLOAT32 *Distance;  LIST SearchState;  SAMPLE *Sample;  UINT32 SampleCountAdjustedForBias;  // allocate memory to hold the statistics results  Statistics = (STATISTICS *) Emalloc (sizeof (STATISTICS));  Statistics->CoVariance = (FLOAT32 *) Emalloc (N * N * sizeof (FLOAT32));  Statistics->Min = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));  Statistics->Max = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));  // allocate temporary memory to hold the sample to mean distances  Distance = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));  // initialize the statistics  Statistics->AvgVariance = 1.0;  CoVariance = Statistics->CoVariance;  for (i = 0; i < N; i++) {    Statistics->Min[i] = 0.0;    Statistics->Max[i] = 0.0;    for (j = 0; j < N; j++, CoVariance++)      *CoVariance = 0;  }  // find each sample in the cluster and merge it into the statistics  InitSampleSearch(SearchState, Cluster);  while ((Sample = NextSample (&SearchState)) != NULL) {    for (i = 0; i < N; i++) {      Distance[i] = Sample->Mean[i] - Cluster->Mean[i];      if (ParamDesc[i].Circular) {        if (Distance[i] > ParamDesc[i].HalfRange)          Distance[i] -= ParamDesc[i].Range;        if (Distance[i] < -ParamDesc[i].HalfRange)          Distance[i] += ParamDesc[i].Range;      }      if (Distance[i] < Statistics->Min[i])        Statistics->Min[i] = Distance[i];      if (Distance[i] > Statistics->Max[i])        Statistics->Max[i] = Distance[i];    }    CoVariance = Statistics->CoVariance;    for (i = 0; i < N; i++)      for (j = 0; j < N; j++, CoVariance++)        *CoVariance += Distance[i] * Distance[j];  }  // normalize the variances by the total number of samples  // use SampleCount-1 instead of SampleCount to get an unbiased estimate  // also compute the geometic mean of the diagonal variances  // ensure that clusters with only 1 sample are handled correctly  if (Cluster->SampleCount > 1)    SampleCountAdjustedForBias = Cluster->SampleCount - 1;  else    SampleCountAdjustedForBias = 1;  CoVariance = Statistics->CoVariance;  for (i = 0; i < N; i++)  for (j = 0; j < N; j++, CoVariance++) {    *CoVariance /= SampleCountAdjustedForBias;    if (j == i) {      if (*CoVariance < MINVARIANCE)        *CoVariance = MINVARIANCE;      Statistics->AvgVariance *= *CoVariance;    }  }  Statistics->AvgVariance = (float)pow((double)Statistics->AvgVariance,                                       1.0 / N);  // release temporary memory and return  memfree(Distance);  return (Statistics);}                                // ComputeStatistics/** NewSpericalProto *********************************************************Parameters:	N		number of dimensions      Cluster		cluster to be made into a spherical prototype      Statistics	statistical info about samples in clusterGlobals:	NoneOperation:	This routine creates a spherical prototype data structure to      approximate the samples in the specified cluster.      Spherical prototypes have a single variance which is      common across all dimensions.  All dimensions are normally      distributed and independent.Return:		Pointer to a new spherical prototype data structureExceptions:	NoneHistory:	6/19/89, DSJ, Created.******************************************************************************/PROTOTYPE *NewSphericalProto(UINT16 N,                             CLUSTER *Cluster,                             STATISTICS *Statistics) {  PROTOTYPE *Proto;  Proto = NewSimpleProto (N, Cluster);  Proto->Variance.Spherical = Statistics->AvgVariance;  if (Proto->Variance.Spherical < MINVARIANCE)    Proto->Variance.Spherical = MINVARIANCE;  Proto->Magnitude.Spherical =    1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical));  Proto->TotalMagnitude = (float)pow((double)Proto->Magnitude.Spherical,                                     (double) N);  Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);  return (Proto);}                                // NewSphericalProto/** NewEllipticalProto *******************************************************Parameters:	N		number of dimensions      Cluster		cluster to be made into an elliptical prototype      Statistics	statistical info about samples in clusterGlobals:	NoneOperation:	This routine creates an elliptical prototype data structure to      approximate the samples in the specified cluster.      Elliptical prototypes have a variance for each dimension.      All dimensions are normally distributed and independent.Return:		Pointer to a new elliptical prototype data structureExceptions:	NoneHistory:	6/19/89, DSJ, Created.*******************************************************************************/PROTOTYPE *NewEllipticalProto(INT16 N,                              CLUSTER *Clust

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -