⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cluster.cpp

📁 一OCR的相关资料。.希望对研究OCR的朋友有所帮助.
💻 CPP
📖 第 1 页 / 共 5 页
字号:
        *m = (n1 * *m1 + n2 * (*m2 - ParamDesc->Range)) / n;        if (*m < ParamDesc->Min)          *m += ParamDesc->Range;      }      else if ((*m1 - *m2) > ParamDesc->HalfRange) {        *m = (n1 * (*m1 - ParamDesc->Range) + n2 * *m2) / n;        if (*m < ParamDesc->Min)          *m += ParamDesc->Range;      }      else        *m = (n1 * *m1 + n2 * *m2) / n;    }    else      *m = (n1 * *m1 + n2 * *m2) / n;  }  return (n);}                                // MergeClusters/** ComputePrototypes *******************************************************Parameters:	Clusterer	data structure holding cluster tree      Config		parameters used to control prototype generationGlobals:	NoneOperation:	This routine decides which clusters in the cluster tree      should be represented by prototypes, forms a list of these      prototypes, and places the list in the Clusterer data      structure.Return:		NoneExceptions:	NoneHistory:	5/30/89, DSJ, Created.*******************************************************************************/void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) {  LIST ClusterStack = NIL;  CLUSTER *Cluster;  PROTOTYPE *Prototype;  // use a stack to keep track of clusters waiting to be processed  // initially the only cluster on the stack is the root cluster  if (Clusterer->Root != NULL)    ClusterStack = push (NIL, Clusterer->Root);  // loop until we have analyzed all clusters which are potential prototypes  while (ClusterStack != NIL) {    // remove the next cluster to be analyzed from the stack    // try to make a prototype from the cluster    // if successful, put it on the proto list, else split the cluster    Cluster = (CLUSTER *) first (ClusterStack);    ClusterStack = pop (ClusterStack);    Prototype = MakePrototype (Clusterer, Config, Cluster);    if (Prototype != NULL) {      Clusterer->ProtoList = push (Clusterer->ProtoList, Prototype);    }    else {      ClusterStack = push (ClusterStack, Cluster->Right);      ClusterStack = push (ClusterStack, Cluster->Left);    }  }}                                // ComputePrototypes/** MakePrototype ***********************************************************Parameters:	Clusterer	data structure holding cluster tree      Config		parameters used to control prototype generation      Cluster		cluster to be made into a prototypeGlobals:	NoneOperation:	This routine attempts to create a prototype from the      specified cluster that conforms to the distribution      specified in Config.  If there are too few samples in the      cluster to perform a statistical analysis, then a prototype      is generated but labelled as insignificant.  If the      dimensions of the cluster are not independent, no prototype      is generated and NULL is returned.  If a prototype can be      found that matches the desired distribution then a pointer      to it is returned, otherwise NULL is returned.Return:		Pointer to new prototype or NULLExceptions:	NoneHistory:	6/19/89, DSJ, Created.*******************************************************************************/PROTOTYPE *MakePrototype(CLUSTERER *Clusterer,                         CLUSTERCONFIG *Config,                         CLUSTER *Cluster) {  STATISTICS *Statistics;  PROTOTYPE *Proto;  BUCKETS *Buckets;  // filter out clusters which contain samples from the same character  if (MultipleCharSamples (Clusterer, Cluster, Config->MaxIllegal))    return (NULL);  // compute the covariance matrix and ranges for the cluster  Statistics =    ComputeStatistics (Clusterer->SampleSize, Clusterer->ParamDesc, Cluster);  // check for degenerate clusters which need not be analyzed further  // note that the MinSamples test assumes that all clusters with multiple  // character samples have been removed (as above)  Proto = MakeDegenerateProto (Clusterer->SampleSize, Cluster, Statistics,    Config->ProtoStyle,    (INT32) (Config->MinSamples *    Clusterer->NumChar));  if (Proto != NULL) {    FreeStatistics(Statistics);    return (Proto);  }  // check to ensure that all dimensions are independent  if (!Independent (Clusterer->ParamDesc, Clusterer->SampleSize,  Statistics->CoVariance, Config->Independence)) {    FreeStatistics(Statistics);    return (NULL);  }  if (HOTELLING && Config->ProtoStyle == elliptical) {    Proto = TestEllipticalProto(Clusterer, Cluster, Statistics);    if (Proto != NULL) {      FreeStatistics(Statistics);      return Proto;    }  }  // create a histogram data structure used to evaluate distributions  Buckets = GetBuckets (normal, Cluster->SampleCount, Config->Confidence);  // create a prototype based on the statistics and test it  switch (Config->ProtoStyle) {    case spherical:      Proto = MakeSphericalProto (Clusterer, Cluster, Statistics, Buckets);      break;    case elliptical:      Proto = MakeEllipticalProto (Clusterer, Cluster, Statistics, Buckets);      break;    case mixed:      Proto = MakeMixedProto (Clusterer, Cluster, Statistics, Buckets,        Config->Confidence);      break;    case automatic:      Proto = MakeSphericalProto (Clusterer, Cluster, Statistics, Buckets);      if (Proto != NULL)        break;      Proto = MakeEllipticalProto (Clusterer, Cluster, Statistics, Buckets);      if (Proto != NULL)        break;      Proto = MakeMixedProto (Clusterer, Cluster, Statistics, Buckets,        Config->Confidence);      break;  }  FreeBuckets(Buckets);  FreeStatistics(Statistics);  return (Proto);}                                // MakePrototype/** MakeDegenerateProto ******************************************************Parameters:	N		number of dimensions      Cluster		cluster being analyzed      Statistics	statistical info about cluster      Style		type of prototype to be generated      MinSamples	minimum number of samples in a clusterGlobals:	NoneOperation:	This routine checks for clusters which are degenerate and      therefore cannot be analyzed in a statistically valid way.      A cluster is defined as degenerate if it does not have at      least MINSAMPLESNEEDED samples in it.  If the cluster is      found to be degenerate, a prototype of the specified style      is generated and marked as insignificant.  A cluster is      also degenerate if it does not have at least MinSamples      samples in it.      If the cluster is not degenerate, NULL is returned.Return:		Pointer to degenerate prototype or NULL.Exceptions:	NoneHistory:	6/20/89, DSJ, Created.      7/12/89, DSJ, Changed name and added check for 0 stddev.      8/8/89, DSJ, Removed check for 0 stddev (handled elsewhere).********************************************************************************/PROTOTYPE *MakeDegenerateProto(  //this was MinSample                               UINT16 N,                               CLUSTER *Cluster,                               STATISTICS *Statistics,                               PROTOSTYLE Style,                               INT32 MinSamples) {  PROTOTYPE *Proto = NULL;  if (MinSamples < MINSAMPLESNEEDED)    MinSamples = MINSAMPLESNEEDED;  if (Cluster->SampleCount < MinSamples) {    switch (Style) {      case spherical:        Proto = NewSphericalProto (N, Cluster, Statistics);        break;      case elliptical:      case automatic:        Proto = NewEllipticalProto (N, Cluster, Statistics);        break;      case mixed:        Proto = NewMixedProto (N, Cluster, Statistics);        break;    }    Proto->Significant = FALSE;  }  return (Proto);}                                // MakeDegenerateProto/** TestEllipticalProto ****************************************************Parameters:	Clusterer	data struct containing samples being clustered      Cluster		cluster to be made into an elliptical prototype      Statistics	statistical info about clusterGlobals:	NoneOperation:	This routine tests the specified cluster to see if ***     there is a statistically significant difference between*     the sub-clusters that would be made if the cluster were to*     be split. If not, then a new prototype is formed and*     returned to the caller. If there is, then NULL is returned*     to the caller.Return:		Pointer to new elliptical prototype or NULL.****************************************************************************/PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer,                               CLUSTER *Cluster,                               STATISTICS *Statistics) {  int N = Clusterer->SampleSize;  CLUSTER* Left = Cluster->Left;  CLUSTER* Right = Cluster->Right;  if (Left == NULL || Right == NULL)    return NULL;  int TotalDims = Left->SampleCount + Right->SampleCount;  if (TotalDims < N + 1)    return NULL;  FLOAT32* Inverse = (FLOAT32 *) Emalloc(N * N * sizeof(FLOAT32));  FLOAT32* Delta = (FLOAT32*) Emalloc(N * sizeof(FLOAT32));  double err = InvertMatrix(Statistics->CoVariance, N, Inverse);  if (err > 1) {    cprintf("Clustering error: Matrix inverse failed with error %g\n", err);  }  for (int dim = 0; dim < N; ++dim) {    Delta[dim] = Left->Mean[dim] - Right->Mean[dim];  }  // Compute Hotelling's T-squared.  double Tsq = 0.0;  for (int x = 0; x < N; ++x) {    double temp = 0.0;    for (int y = 0; y < N; ++y) {      temp += Inverse[y + N*x] * Delta[y];    }    Tsq += Delta[x] * temp;  }  memfree(Inverse);  memfree(Delta);  Tsq *= Left->SampleCount * Right->SampleCount / TotalDims;  double F = Tsq * (TotalDims - N - 1) / ((TotalDims - N) * 2);  int Fx = N;  if (Fx > FTABLE_X)    Fx = FTABLE_X;  --Fx;  int Fy = TotalDims - N - 1;  if (Fy > FTABLE_Y)    Fy = FTABLE_Y;  --Fy;  if (F < FTable[Fy][Fx]) {    return NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics);  }  return NULL;}/* MakeSphericalProto *******************************************************Parameters:	Clusterer	data struct containing samples being clustered      Cluster		cluster to be made into a spherical prototype      Statistics	statistical info about cluster      Buckets		histogram struct used to analyze distributionGlobals:	NoneOperation:	This routine tests the specified cluster to see if it can      be approximated by a spherical normal distribution.  If it      can be, then a new prototype is formed and returned to the      caller.  If it can't be, then NULL is returned to the caller.Return:		Pointer to new spherical prototype or NULL.Exceptions:	NoneHistory:	6/1/89, DSJ, Created.******************************************************************************/PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer,                              CLUSTER *Cluster,                              STATISTICS *Statistics,                              BUCKETS *Buckets) {  PROTOTYPE *Proto = NULL;  int i;  // check that each dimension is a normal distribution  for (i = 0; i < Clusterer->SampleSize; i++) {    if (Clusterer->ParamDesc[i].NonEssential)      continue;    FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]),      Cluster->Mean[i],      sqrt ((FLOAT64) (Statistics->AvgVariance)));    if (!DistributionOK (Buckets))      break;  }  // if all dimensions matched a normal distribution, make a proto  if (i >= Clusterer->SampleSize)    Proto = NewSphericalProto (Clusterer->SampleSize, Cluster, Statistics);  return (Proto);}                                // MakeSphericalProto/** MakeEllipticalProto ****************************************************Parameters:	Clusterer	data struct containing samples being clustered      Cluster		cluster to be made into an elliptical prototype      Statistics	statistical info about cluster      Buckets		histogram struct used to analyze distributionGlobals:	NoneOperation:	This routine tests the specified cluster to see if it can      be approximated by an elliptical normal distribution.  If it

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -