⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hclust.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 2 页
字号:
      const float *minIndex1 = cluster1->distances + cluster1->rawIndexMinDistance;
      for(int ndi = cluster1->nDistances; ndi--; disti1++, disti2++)
        if (*disti2 < *disti1) // if one is -1, they both are
          if ((*disti1 = *disti2) < *minIndex1)
            minIndex1 = disti1;
      cluster1->minDistance = *minIndex1;
      cluster1->rawIndexMinDistance = minIndex1 - cluster1->distances;
    }

    while(*disti2 < 0)
      disti2++;        // should have at least one more >=0  - the one corresponding to distance to cluster1

    for(cluster = cluster1->next; cluster != cluster2; cluster = cluster->next) {
      while(*++disti2 < 0); // should have more - the one corresponding to cluster
      if (*disti2 < cluster->distances[rawIndex1])
        if ((cluster->distances[rawIndex1] = *disti2) < cluster->minDistance) {
          cluster->minDistance = *disti2;
          cluster->rawIndexMinDistance = rawIndex1;
        }
    }

    for(cluster = cluster->next; cluster; cluster = cluster->next) {
      if (cluster->distances[rawIndex2] < cluster->distances[rawIndex1])
        cluster->distances[rawIndex1] = cluster->distances[rawIndex2];

      // don't nest this in the above if -- both distances can be equal, yet index HAS TO move
      if (rawIndex2 == cluster->rawIndexMinDistance)
        cluster->rawIndexMinDistance = rawIndex1; // the smallest element got moved

      cluster->distances[rawIndex2] = -1;
    }

    cluster1->elevate(pcluster2, minDistance);
  }

  return *clusters;
}


TClusterW *THierarchicalClustering::merge_AverageLinkage(TClusterW **clusters, float *milestones)
{
  float *milestone = milestones;

  int step = 0;
  while((*clusters)->next) {
    if (milestone && (step++ ==*milestone))
      progressCallback->call(*((++milestone)++));

    TClusterW *cluster;
    TClusterW **pcluster2;

    float minDistance = numeric_limits<float>::max();
    for(TClusterW **tcluster = &((*clusters)->next); *tcluster; tcluster = &(*tcluster)->next)
      if ((*tcluster)->minDistance < minDistance) {
        minDistance = (*tcluster)->minDistance;
        pcluster2 = tcluster;
      }

    TClusterW *const cluster2 = *pcluster2;

    const int rawIndex1 = cluster2->rawIndexMinDistance;
    const int rawIndex2 = cluster2->nDistances;

    TClusterW *const cluster1 = clusters[rawIndex1];

    float *disti1 = cluster1->distances;
    float *disti2 = cluster2->distances;

    const int size1 = cluster1->size;
    const int size2 = cluster2->size;
    const int sumsize = size1 + size2;

    if (rawIndex1) { // not root - has no distances...
      *disti1 = (*disti1 * size1 + *disti2 * size2) / sumsize;
      const float *minIndex1 = disti1;
      int ndi = cluster1->nDistances-1;
      for(disti1++, disti2++; ndi--; disti1++, disti2++)
        if (*disti1 >= 0) {
          *disti1 = (*disti1 * size1 + *disti2 * size2) / sumsize;
          if (*disti1 < *minIndex1)
            minIndex1 = disti1;
        }
      cluster1->minDistance = *minIndex1;
      cluster1->rawIndexMinDistance = minIndex1 - cluster1->distances;
    }

    while(*disti2 < 0)
      disti2++;        // should have at least one more >=0  - the one corresponding to distance to cluster1

    for(cluster = cluster1->next; cluster != cluster2; cluster = cluster->next) {
      while(*++disti2 < 0); // should have more - the one corresponding to cluster
      float &distc = cluster->distances[rawIndex1];
      distc = (distc * size1 + *disti2 * size2) / sumsize;
      if (distc < cluster->minDistance) {
        cluster->minDistance = distc;
        cluster->rawIndexMinDistance = rawIndex1;
      }
      else if ((distc > cluster->minDistance) && (cluster->rawIndexMinDistance == rawIndex1))
        cluster->computeMinimalDistance();
    }

    for(cluster = cluster->next; cluster; cluster = cluster->next) {
      float &distc = cluster->distances[rawIndex1];
      distc = (distc * size1 + cluster->distances[rawIndex2] * size2) / sumsize;
      if (distc < cluster->minDistance) {
        cluster->minDistance = distc;
        cluster->rawIndexMinDistance = rawIndex1;
      }
      else if (   (distc > cluster->minDistance) && (cluster->rawIndexMinDistance == rawIndex1)
               || (cluster->rawIndexMinDistance == rawIndex2)) {
        cluster->distances[rawIndex2] = -1;
        cluster->computeMinimalDistance();
      }
      else
        cluster->distances[rawIndex2] = -1;
    }

    cluster1->elevate(pcluster2, minDistance);
  }

  return *clusters;
}



TClusterW *THierarchicalClustering::merge_CompleteLinkage(TClusterW **clusters, float *milestones)
{
  float *milestone = milestones;

  int step = 0;
  while((*clusters)->next) {
    if (milestone && (step++ ==*milestone))
      progressCallback->call(*((++milestone)++));

    TClusterW *cluster;
    TClusterW **pcluster2;

    float minDistance = numeric_limits<float>::max();
    for(TClusterW **tcluster = &((*clusters)->next); *tcluster; tcluster = &(*tcluster)->next)
      if ((*tcluster)->minDistance < minDistance) {
        minDistance = (*tcluster)->minDistance;
        pcluster2 = tcluster;
      }

    TClusterW *const cluster2 = *pcluster2;

    const int rawIndex1 = cluster2->rawIndexMinDistance;
    const int rawIndex2 = cluster2->nDistances;

    TClusterW *const cluster1 = clusters[rawIndex1];

    float *disti1 = cluster1->distances;
    float *disti2 = cluster2->distances;

    if (rawIndex1) { // not root - has no distances...
      if (*disti2 > *disti1)
        *disti1 = *disti2;
      const float *minIndex1 = disti1;
      int ndi = cluster1->nDistances-1;
      for(disti1++, disti2++; ndi--; disti1++, disti2++) {
        if (*disti1 >= 0) {
          if (*disti2 > *disti1) // if one is -1, they both are
            *disti1 = *disti2;
          if (*disti1 < *minIndex1)
            minIndex1 = disti1;
        }
      }
      cluster1->minDistance = *minIndex1;
      cluster1->rawIndexMinDistance = minIndex1 - cluster1->distances;
    }

    while(*disti2 < 0)
      disti2++;        // should have at least one more >=0  - the one corresponding to distance to cluster1

    for(cluster = cluster1->next; cluster != cluster2; cluster = cluster->next) {
      while(*++disti2 < 0); // should have more - the one corresponding to cluster
      float &distc = cluster->distances[rawIndex1];
      if (*disti2 > distc) {
        distc = *disti2;
        if (cluster->rawIndexMinDistance == rawIndex1)
          if (distc <= cluster->minDistance)
            cluster->minDistance = distc;
          else
            cluster->computeMinimalDistance();
      }
    }

    for(cluster = cluster->next; cluster; cluster = cluster->next) {
      float &distc = cluster->distances[rawIndex1];
      if (cluster->distances[rawIndex2] > distc)
        distc = cluster->distances[rawIndex2];

      cluster->distances[rawIndex2] = -1;
      if ((cluster->rawIndexMinDistance == rawIndex1) || (cluster->rawIndexMinDistance == rawIndex2))
        cluster->computeMinimalDistance();
    }

    cluster1->elevate(pcluster2, minDistance);
  }

  return *clusters;
}



TClusterW *THierarchicalClustering::merge(TClusterW **clusters, float *milestones)
{
  switch(linkage) {
    case Complete: return merge_CompleteLinkage(clusters, milestones);
    case Single: return merge_SingleLinkage(clusters, milestones);
    case Average: 
    default: return merge_AverageLinkage(clusters, milestones);
  }
}

PHierarchicalCluster THierarchicalClustering::restructure(TClusterW *root)
{
  PIntList elementIndices = new TIntList(root->size);
  TIntList::iterator currentElement(elementIndices->begin());
  int currentIndex = 0;

  return restructure(root, elementIndices, currentElement, currentIndex);
}


PHierarchicalCluster THierarchicalClustering::restructure(TClusterW *node, PIntList elementIndices, TIntList::iterator &currentElement, int &currentIndex)
{
  PHierarchicalCluster cluster;

  if (!node->left) {
    *currentElement++ = node->elementIndex;
    cluster = mlnew THierarchicalCluster(elementIndices, currentIndex++);
  }
  else {
    PHierarchicalCluster left = restructure(node->left, elementIndices, currentElement, currentIndex);
    PHierarchicalCluster right = restructure(node->right, elementIndices, currentElement, currentIndex);
    cluster = mlnew THierarchicalCluster(elementIndices, left, right, node->height, left->first, right->last);
  }

  // no need to care about 'distances' - they've been removed during clustering (in 'elevate') :)
  mldelete node;
  return cluster;
}


PHierarchicalCluster THierarchicalClustering::operator()(PSymMatrix distanceMatrix)
{
  float *distanceMatrixElements = NULL;
  TClusterW **clusters, *root;
  float *callbackMilestones = NULL;
  
  try {
    const int dim = distanceMatrix->dim;
    const int size = ((dim+1)*(dim+2))/2;
    float *distanceMatrixElements = overwriteMatrix ? distanceMatrix->elements : (float *)memcpy(new float[size], distanceMatrix->elements, size*sizeof(float));

    clusters = init(dim, distanceMatrixElements);
    callbackMilestones = (progressCallback && (distanceMatrix->dim >= 1000)) ? progressCallback->milestones(distanceMatrix->dim) : NULL;
    root = merge(clusters, callbackMilestones);
  }
  catch (...) {
    mldelete clusters;
    mldelete callbackMilestones;
    mldelete distanceMatrixElements;
    throw;
  }

  mldelete clusters;
  mldelete callbackMilestones;
  mldelete distanceMatrixElements;

  return restructure(root);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -