⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hclust.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/*
    This file is part of Orange.

    Orange is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    Orange is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with Orange; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Authors: Janez Demsar, Blaz Zupan, 1996--2002
    Contact: janez.demsar@fri.uni-lj.si
*/

#include "progress.hpp"

#include "hclust.ppp"

DEFINE_TOrangeVector_classDescription(PHierarchicalCluster, "THierarchicalClusterList", true, ORANGE_API)

class TClusterW {
public:
    TClusterW *next; // for cluster, this is next cluster, for elements next element
    TClusterW *left, *right; // subclusters, if left==NULL, this is an element
    int size;
    int elementIndex;
    float height;

    float *distances; // distances to the clusters before this one (lower left matrix)
    float minDistance; // minimal distance
    int rawIndexMinDistance; // index of minimal distance
    int nDistances;

    TClusterW(const int &elIndex, float *adistances, const int &anDistances)
    : next(NULL),
      left(NULL),
      right(NULL),
      size(1),
      elementIndex(elIndex),
      height(0.0),
      distances(adistances),
      minDistance(numeric_limits<float>::max()),
      rawIndexMinDistance(-1),
      nDistances(anDistances)
    {
      if (distances)
        computeMinimalDistance();
    }

    void elevate(TClusterW **aright, const float &aheight)
    {
      left = mlnew TClusterW(*this);
      right = *aright;

      left->distances = NULL;
      size = left->size + right->size;
      elementIndex = -1;
      height = aheight;

      if (next == right)
        next = right->next;
      else
        *aright = right->next;
    }


    void computeMinimalDistance()
    {
      float *dp = distances, *minp = dp++;
      for(int i = nDistances; --i; dp++)
        if ((*dp >= 0) && (*dp < *minp))
          minp = dp;
      minDistance = *minp;
      rawIndexMinDistance = minp - distances;
    }

    TClusterW **clusterAt(int rawIndex, TClusterW **cluster)
    {
      for(float *dp = distances; rawIndex; dp++)
        if (*dp>=0) {
          cluster = &(*cluster)->next;
          rawIndex--;
        }
      return cluster;
    }

    void clearDistances()
    {
      raiseErrorWho("TClusterW", "rewrite clean to adjust indexMinDistance");

      float *dst = distances;
      int i = nDistances;
      for(; nDistances && (*dst >= 0); nDistances--, dst++);
      if (!nDistances)
        return;
      
      float *src = dst;
      for(src++, nDistances--; nDistances && (*src >= 0); src++)
        if (*src >= 0)
          *dst++ = *src;

      nDistances = dst - distances;
    }
};


THierarchicalCluster::THierarchicalCluster()
: branches(),
  height(0.0),
  mapping(),
  first(0),
  last(0)
{}


THierarchicalCluster::THierarchicalCluster(PIntList els, const int &elementIndex)
: branches(),
  height(0.0),
  mapping(els),
  first(elementIndex),
  last(elementIndex+1)
{}


THierarchicalCluster::THierarchicalCluster(PIntList els, PHierarchicalCluster left, PHierarchicalCluster right, const float &h, const int &f, const int &l)
: branches(new THierarchicalClusterList(2)),
  height(h),
  mapping(els),
  first(f),
  last(l)
{ 
  branches->at(0) = left;
  branches->at(1) = right;
}


void THierarchicalCluster::swap()
{
  if (!branches || (branches->size()<2))
    return;
  if (branches->size() > 2)
    raiseError("cannot swap multiple branches (use method 'permutation' instead)");

  const TIntList::iterator beg0 = mapping->begin() + branches->at(0)->first;
  const TIntList::iterator beg1 = mapping->begin() + branches->at(1)->first;
  const TIntList::iterator end1 = mapping->begin() + branches->at(1)->last;

  if ((branches->at(0)->first > branches->at(1)->first) || (branches->at(1)->first > branches->at(1)->last))
    raiseError("internal inconsistency in clustering structure: invalid ordering of left's and right's 'first' and 'last'");

  TIntList::iterator li0, li1;

  int *temp = new int [beg1 - beg0], *t;
  for(li0 = beg0, t = temp; li0 != beg1; *t++ = *li0++);
  for(li0 = beg0, li1 = beg1; li1 != end1; *li0++ = *li1++);
  for(t = temp; li0 != end1; *li0++ = *t++);
  delete temp;

  branches->at(0)->recursiveMove(end1 - beg1);
  branches->at(1)->recursiveMove(beg0 - beg1);

  PHierarchicalCluster tbr = branches->at(0);
  branches->at(0) = branches->at(1);
  branches->at(1) = tbr;
}


void THierarchicalCluster::permute(const TIntList &neworder)
{
  if ((!branches && neworder.size()) || (branches->size() != neworder.size()))
    raiseError("the number of clusters does not match the lenght of the permutation vector");

  int *temp = new int [last - first], *t = temp;
  TIntList::const_iterator pi = neworder.begin();
  THierarchicalClusterList::iterator bi(branches->begin()), be(branches->end());
  THierarchicalClusterList newBranches;

  for(; bi != be; bi++, pi++) {
    PHierarchicalCluster branch = branches->at(*pi);
    newBranches.push_back(branch);
    TIntList::const_iterator bei(mapping->begin() + branch->first), bee(mapping->begin() + branch->last);
    const int offset = (t - temp) - (branch->first - first);
    for(; bei != bee; *t++ = *bei++);
    if (offset)
      branch->recursiveMove(offset);
  }

  TIntList::iterator bei(mapping->begin() + first), bee(mapping->begin() + last);
  for(t = temp; bei!=bee; *bei++ = *t++);

  bi = branches->begin();
  THierarchicalClusterList::const_iterator nbi(newBranches.begin());
  for(; bi != be; *bi++ = *nbi++);
}


void THierarchicalCluster::recursiveMove(const int &offset)
{
  first += offset;
  last += offset;
  if (branches)
    PITERATE(THierarchicalClusterList, bi, branches)
      (*bi)->recursiveMove(offset);
}


THierarchicalClustering::THierarchicalClustering()
: linkage(Single),
  overwriteMatrix(false)
{}


TClusterW **THierarchicalClustering::init(const int &dim, float *distanceMatrix)
{
  for(float *ddi = distanceMatrix, *dde = ddi + ((dim+1)*(dim+2))/2; ddi!=dde; ddi++)
    if (*ddi < 0) {
      int x, y;
      TSymMatrix::index2coordinates(ddi-distanceMatrix, x, y);
      raiseError("distance matrix contains negative element at (%i, %i)", x, y);
    }

  TClusterW **clusters = mlnew TClusterW *[dim];
  TClusterW **clusteri = clusters;

  *clusters = mlnew TClusterW(0, NULL, 0);
  distanceMatrix++;
  
  for(int elementIndex = 1, e = dim; elementIndex < e; distanceMatrix += ++elementIndex) {
    TClusterW *newcluster = mlnew TClusterW(elementIndex, distanceMatrix, elementIndex);
    (*clusteri++)->next = newcluster;
    *clusteri = newcluster; 
  }

  return clusters;
}



TClusterW *THierarchicalClustering::merge_SingleLinkage(TClusterW **clusters, float *milestones)
{
  float *milestone = milestones;

  int step = 0;
  while((*clusters)->next) {
    if (milestone && (step++ ==*milestone))
      progressCallback->call(*((++milestone)++));

    TClusterW *cluster;
    TClusterW **pcluster2;

    float minDistance = numeric_limits<float>::max();
    for(TClusterW **tcluster = &((*clusters)->next); *tcluster; tcluster = &(*tcluster)->next)
      if ((*tcluster)->minDistance < minDistance) {
        minDistance = (*tcluster)->minDistance;
        pcluster2 = tcluster;
      }

    TClusterW *const cluster2 = *pcluster2;

    const int rawIndex1 = cluster2->rawIndexMinDistance;
    const int rawIndex2 = cluster2->nDistances;

    TClusterW *const cluster1 = clusters[rawIndex1];

    float *disti1 = cluster1->distances;
    float *disti2 = cluster2->distances;

    if (rawIndex1) { // not root - has no distances...

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -