⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cluster.cpp

📁 一个google的OCR源码
💻 CPP
📖 第 1 页 / 共 5 页
字号:
Parameters:	Clusterer	data struct containing samples being clustered      Cluster		cluster to be made into a spherical prototype      Statistics	statistical info about cluster      Buckets		histogram struct used to analyze distributionGlobals:	NoneOperation:	This routine tests the specified cluster to see if it can      be approximated by a spherical normal distribution.  If it      can be, then a new prototype is formed and returned to the      caller.  If it can't be, then NULL is returned to the caller.Return:		Pointer to new spherical prototype or NULL.Exceptions:	NoneHistory:	6/1/89, DSJ, Created.******************************************************************************/PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer,                              CLUSTER *Cluster,                              STATISTICS *Statistics,                              BUCKETS *Buckets) {  PROTOTYPE *Proto = NULL;  int i;  // check that each dimension is a normal distribution  for (i = 0; i < Clusterer->SampleSize; i++) {    if (Clusterer->ParamDesc[i].NonEssential)      continue;    FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]),      Cluster->Mean[i],      sqrt ((FLOAT64) (Statistics->AvgVariance)));    if (!DistributionOK (Buckets))      break;  }  // if all dimensions matched a normal distribution, make a proto  if (i >= Clusterer->SampleSize)    Proto = NewSphericalProto (Clusterer->SampleSize, Cluster, Statistics);  return (Proto);}                                // MakeSphericalProto/** MakeEllipticalProto ****************************************************Parameters:	Clusterer	data struct containing samples being clustered      Cluster		cluster to be made into an elliptical prototype      Statistics	statistical info about cluster      Buckets		histogram struct used to analyze distributionGlobals:	NoneOperation:	This routine tests the specified cluster to see if it can      be approximated by an elliptical normal distribution.  If it      can be, then a new prototype is formed and returned to the      caller.  If it can't be, then NULL is returned to the caller.Return:		Pointer to new elliptical prototype or NULL.Exceptions:	NoneHistory:	6/12/89, DSJ, Created.****************************************************************************/PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer,                               CLUSTER *Cluster,                               STATISTICS *Statistics,                               BUCKETS *Buckets) {  PROTOTYPE *Proto = NULL;  int i;  // check that each dimension is a normal distribution  for (i = 0; i < Clusterer->SampleSize; i++) {    if (Clusterer->ParamDesc[i].NonEssential)      continue;    FillBuckets (Buckets, Cluster, i, &(Clusterer->ParamDesc[i]),      Cluster->Mean[i],      sqrt ((FLOAT64) Statistics->      CoVariance[i * (Clusterer->SampleSize + 1)]));    if (!DistributionOK (Buckets))      break;  }  // if all dimensions matched a normal distribution, make a proto  if (i >= Clusterer->SampleSize)    Proto = NewEllipticalProto (Clusterer->SampleSize, Cluster, Statistics);  return (Proto);}                                // MakeEllipticalProto/** MakeMixedProto ***********************************************************Parameters:	Clusterer	data struct containing samples being clustered      Cluster		cluster to be made into a prototype      Statistics	statistical info about cluster      NormalBuckets	histogram struct used to analyze distribution      Confidence	confidence level for alternate distributionsGlobals:	NoneOperation:	This routine tests each dimension of the specified cluster to      see what distribution would best approximate that dimension.      Each dimension is compared to the following distributions      in order: normal, random, uniform.  If each dimension can      be represented by one of these distributions,      then a new prototype is formed and returned to the      caller.  If it can't be, then NULL is returned to the caller.Return:		Pointer to new mixed prototype or NULL.Exceptions:	NoneHistory:	6/12/89, DSJ, Created.********************************************************************************/PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer,                          CLUSTER *Cluster,                          STATISTICS *Statistics,                          BUCKETS *NormalBuckets,                          FLOAT64 Confidence) {  PROTOTYPE *Proto;  int i;  BUCKETS *UniformBuckets = NULL;  BUCKETS *RandomBuckets = NULL;  // create a mixed proto to work on - initially assume all dimensions normal*/  Proto = NewMixedProto (Clusterer->SampleSize, Cluster, Statistics);  // find the proper distribution for each dimension  for (i = 0; i < Clusterer->SampleSize; i++) {    if (Clusterer->ParamDesc[i].NonEssential)      continue;    FillBuckets (NormalBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),      Proto->Mean[i],      sqrt ((FLOAT64) Proto->Variance.Elliptical[i]));    if (DistributionOK (NormalBuckets))      continue;    if (RandomBuckets == NULL)      RandomBuckets =        GetBuckets (D_random, Cluster->SampleCount, Confidence);    MakeDimRandom (i, Proto, &(Clusterer->ParamDesc[i]));    FillBuckets (RandomBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),      Proto->Mean[i], Proto->Variance.Elliptical[i]);    if (DistributionOK (RandomBuckets))      continue;    if (UniformBuckets == NULL)      UniformBuckets =        GetBuckets (uniform, Cluster->SampleCount, Confidence);    MakeDimUniform(i, Proto, Statistics);    FillBuckets (UniformBuckets, Cluster, i, &(Clusterer->ParamDesc[i]),      Proto->Mean[i], Proto->Variance.Elliptical[i]);    if (DistributionOK (UniformBuckets))      continue;    break;  }  // if any dimension failed to match a distribution, discard the proto  if (i < Clusterer->SampleSize) {    FreePrototype(Proto);    Proto = NULL;  }  if (UniformBuckets != NULL)    FreeBuckets(UniformBuckets);  if (RandomBuckets != NULL)    FreeBuckets(RandomBuckets);  return (Proto);}                                // MakeMixedProto/* MakeDimRandom *************************************************************Parameters:	i		index of dimension to be changed      Proto		prototype whose dimension is to be altered      ParamDesc	description of specified dimensionGlobals:	NoneOperation:	This routine alters the ith dimension of the specified      mixed prototype to be D_random.Return:		NoneExceptions:	NoneHistory:	6/20/89, DSJ, Created.******************************************************************************/void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) {  Proto->Distrib[i] = D_random;  Proto->Mean[i] = ParamDesc->MidRange;  Proto->Variance.Elliptical[i] = ParamDesc->HalfRange;  // subtract out the previous magnitude of this dimension from the total  Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i];  Proto->Magnitude.Elliptical[i] = 1.0 / ParamDesc->Range;  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);  // note that the proto Weight is irrelevant for D_random protos}                                // MakeDimRandom/** MakeDimUniform ***********************************************************Parameters:	i		index of dimension to be changed      Proto		prototype whose dimension is to be altered      Statistics	statistical info about prototypeGlobals:	NoneOperation:	This routine alters the ith dimension of the specified      mixed prototype to be uniform.Return:		NoneExceptions:	NoneHistory:	6/20/89, DSJ, Created.******************************************************************************/void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) {  Proto->Distrib[i] = uniform;  Proto->Mean[i] = Proto->Cluster->Mean[i] +    (Statistics->Min[i] + Statistics->Max[i]) / 2;  Proto->Variance.Elliptical[i] =    (Statistics->Max[i] - Statistics->Min[i]) / 2;  if (Proto->Variance.Elliptical[i] < MINVARIANCE)    Proto->Variance.Elliptical[i] = MINVARIANCE;  // subtract out the previous magnitude of this dimension from the total  Proto->TotalMagnitude /= Proto->Magnitude.Elliptical[i];  Proto->Magnitude.Elliptical[i] =    1.0 / (2.0 * Proto->Variance.Elliptical[i]);  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);  // note that the proto Weight is irrelevant for uniform protos}                                // MakeDimUniform/** ComputeStatistics *********************************************************Parameters:	N		number of dimensions      ParamDesc	array of dimension descriptions      Cluster		cluster whose stats are to be computedGlobals:	NoneOperation:	This routine searches the cluster tree for all leaf nodes      which are samples in the specified cluster.  It computes      a full covariance matrix for these samples as well as      keeping track of the ranges (min and max) for each      dimension.  A special data structure is allocated to      return this information to the caller.  An incremental      algorithm for computing statistics is not used because      it will not work with circular dimensions.Return:		Pointer to new data structure containing statisticsExceptions:	NoneHistory:	6/2/89, DSJ, Created.*********************************************************************************/STATISTICS *ComputeStatistics (inT16 N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) {  STATISTICS *Statistics;  int i, j;  FLOAT32 *CoVariance;  FLOAT32 *Distance;  LIST SearchState;  SAMPLE *Sample;  uinT32 SampleCountAdjustedForBias;  // allocate memory to hold the statistics results  Statistics = (STATISTICS *) Emalloc (sizeof (STATISTICS));  Statistics->CoVariance = (FLOAT32 *) Emalloc (N * N * sizeof (FLOAT32));  Statistics->Min = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));  Statistics->Max = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));  // allocate temporary memory to hold the sample to mean distances  Distance = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));  // initialize the statistics  Statistics->AvgVariance = 1.0;  CoVariance = Statistics->CoVariance;  for (i = 0; i < N; i++) {    Statistics->Min[i] = 0.0;    Statistics->Max[i] = 0.0;    for (j = 0; j < N; j++, CoVariance++)      *CoVariance = 0;  }  // find each sample in the cluster and merge it into the statistics  InitSampleSearch(SearchState, Cluster);  while ((Sample = NextSample (&SearchState)) != NULL) {    for (i = 0; i < N; i++) {      Distance[i] = Sample->Mean[i] - Cluster->Mean[i];      if (ParamDesc[i].Circular) {        if (Distance[i] > ParamDesc[i].HalfRange)          Distance[i] -= ParamDesc[i].Range;        if (Distance[i] < -ParamDesc[i].HalfRange)          Distance[i] += ParamDesc[i].Range;      }      if (Distance[i] < Statistics->Min[i])        Statistics->Min[i] = Distance[i];      if (Distance[i] > Statistics->Max[i])        Statistics->Max[i] = Distance[i];    }    CoVariance = Statistics->CoVariance;    for (i = 0; i < N; i++)      for (j = 0; j < N; j++, CoVariance++)        *CoVariance += Distance[i] * Distance[j];  }  // normalize the variances by the total number of samples  // use SampleCount-1 instead of SampleCount to get an unbiased estimate  // also compute the geometic mean of the diagonal variances  // ensure that clusters with only 1 sample are handled correctly  if (Cluster->SampleCount > 1)    SampleCountAdjustedForBias = Cluster->SampleCount - 1;  else    SampleCountAdjustedForBias = 1;  CoVariance = Statistics->CoVariance;  for (i = 0; i < N; i++)  for (j = 0; j < N; j++, CoVariance++) {    *CoVariance /= SampleCountAdjustedForBias;    if (j == i) {      if (*CoVariance < MINVARIANCE)        *CoVariance = MINVARIANCE;      Statistics->AvgVariance *= *CoVariance;    }  }  Statistics->AvgVariance = (float)pow((double)Statistics->AvgVariance,                                       1.0 / N);  // release temporary memory and return  memfree(Distance);  return (Statistics);}                                // ComputeStatistics/** NewSpericalProto *********************************************************Parameters:	N		number of dimensions      Cluster		cluster to be made into a spherical prototype      Statistics	statistical info about samples in clusterGlobals:

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -