📄 hclust.cpp
字号:
/*
This file is part of Orange.
Orange is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
Orange is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Orange; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Authors: Janez Demsar, Blaz Zupan, 1996--2002
Contact: janez.demsar@fri.uni-lj.si
*/
#include "progress.hpp"
#include "hclust.ppp"
DEFINE_TOrangeVector_classDescription(PHierarchicalCluster, "THierarchicalClusterList", true, ORANGE_API)
class TClusterW {
public:
TClusterW *next; // for cluster, this is next cluster, for elements next element
TClusterW *left, *right; // subclusters, if left==NULL, this is an element
int size;
int elementIndex;
float height;
float *distances; // distances to the clusters before this one (lower left matrix)
float minDistance; // minimal distance
int rawIndexMinDistance; // index of minimal distance
int nDistances;
TClusterW(const int &elIndex, float *adistances, const int &anDistances)
: next(NULL),
left(NULL),
right(NULL),
size(1),
elementIndex(elIndex),
height(0.0),
distances(adistances),
minDistance(numeric_limits<float>::max()),
rawIndexMinDistance(-1),
nDistances(anDistances)
{
if (distances)
computeMinimalDistance();
}
void elevate(TClusterW **aright, const float &aheight)
{
left = mlnew TClusterW(*this);
right = *aright;
left->distances = NULL;
size = left->size + right->size;
elementIndex = -1;
height = aheight;
if (next == right)
next = right->next;
else
*aright = right->next;
}
void computeMinimalDistance()
{
float *dp = distances, *minp = dp++;
for(int i = nDistances; --i; dp++)
if ((*dp >= 0) && (*dp < *minp))
minp = dp;
minDistance = *minp;
rawIndexMinDistance = minp - distances;
}
TClusterW **clusterAt(int rawIndex, TClusterW **cluster)
{
for(float *dp = distances; rawIndex; dp++)
if (*dp>=0) {
cluster = &(*cluster)->next;
rawIndex--;
}
return cluster;
}
void clearDistances()
{
raiseErrorWho("TClusterW", "rewrite clean to adjust indexMinDistance");
float *dst = distances;
int i = nDistances;
for(; nDistances && (*dst >= 0); nDistances--, dst++);
if (!nDistances)
return;
float *src = dst;
for(src++, nDistances--; nDistances && (*src >= 0); src++)
if (*src >= 0)
*dst++ = *src;
nDistances = dst - distances;
}
};
THierarchicalCluster::THierarchicalCluster()
: branches(),
height(0.0),
mapping(),
first(0),
last(0)
{}
THierarchicalCluster::THierarchicalCluster(PIntList els, const int &elementIndex)
: branches(),
height(0.0),
mapping(els),
first(elementIndex),
last(elementIndex+1)
{}
THierarchicalCluster::THierarchicalCluster(PIntList els, PHierarchicalCluster left, PHierarchicalCluster right, const float &h, const int &f, const int &l)
: branches(new THierarchicalClusterList(2)),
height(h),
mapping(els),
first(f),
last(l)
{
branches->at(0) = left;
branches->at(1) = right;
}
void THierarchicalCluster::swap()
{
if (!branches || (branches->size()<2))
return;
if (branches->size() > 2)
raiseError("cannot swap multiple branches (use method 'permutation' instead)");
const TIntList::iterator beg0 = mapping->begin() + branches->at(0)->first;
const TIntList::iterator beg1 = mapping->begin() + branches->at(1)->first;
const TIntList::iterator end1 = mapping->begin() + branches->at(1)->last;
if ((branches->at(0)->first > branches->at(1)->first) || (branches->at(1)->first > branches->at(1)->last))
raiseError("internal inconsistency in clustering structure: invalid ordering of left's and right's 'first' and 'last'");
TIntList::iterator li0, li1;
int *temp = new int [beg1 - beg0], *t;
for(li0 = beg0, t = temp; li0 != beg1; *t++ = *li0++);
for(li0 = beg0, li1 = beg1; li1 != end1; *li0++ = *li1++);
for(t = temp; li0 != end1; *li0++ = *t++);
delete temp;
branches->at(0)->recursiveMove(end1 - beg1);
branches->at(1)->recursiveMove(beg0 - beg1);
PHierarchicalCluster tbr = branches->at(0);
branches->at(0) = branches->at(1);
branches->at(1) = tbr;
}
void THierarchicalCluster::permute(const TIntList &neworder)
{
if ((!branches && neworder.size()) || (branches->size() != neworder.size()))
raiseError("the number of clusters does not match the lenght of the permutation vector");
int *temp = new int [last - first], *t = temp;
TIntList::const_iterator pi = neworder.begin();
THierarchicalClusterList::iterator bi(branches->begin()), be(branches->end());
THierarchicalClusterList newBranches;
for(; bi != be; bi++, pi++) {
PHierarchicalCluster branch = branches->at(*pi);
newBranches.push_back(branch);
TIntList::const_iterator bei(mapping->begin() + branch->first), bee(mapping->begin() + branch->last);
const int offset = (t - temp) - (branch->first - first);
for(; bei != bee; *t++ = *bei++);
if (offset)
branch->recursiveMove(offset);
}
TIntList::iterator bei(mapping->begin() + first), bee(mapping->begin() + last);
for(t = temp; bei!=bee; *bei++ = *t++);
bi = branches->begin();
THierarchicalClusterList::const_iterator nbi(newBranches.begin());
for(; bi != be; *bi++ = *nbi++);
}
void THierarchicalCluster::recursiveMove(const int &offset)
{
first += offset;
last += offset;
if (branches)
PITERATE(THierarchicalClusterList, bi, branches)
(*bi)->recursiveMove(offset);
}
THierarchicalClustering::THierarchicalClustering()
: linkage(Single),
overwriteMatrix(false)
{}
TClusterW **THierarchicalClustering::init(const int &dim, float *distanceMatrix)
{
for(float *ddi = distanceMatrix, *dde = ddi + ((dim+1)*(dim+2))/2; ddi!=dde; ddi++)
if (*ddi < 0) {
int x, y;
TSymMatrix::index2coordinates(ddi-distanceMatrix, x, y);
raiseError("distance matrix contains negative element at (%i, %i)", x, y);
}
TClusterW **clusters = mlnew TClusterW *[dim];
TClusterW **clusteri = clusters;
*clusters = mlnew TClusterW(0, NULL, 0);
distanceMatrix++;
for(int elementIndex = 1, e = dim; elementIndex < e; distanceMatrix += ++elementIndex) {
TClusterW *newcluster = mlnew TClusterW(elementIndex, distanceMatrix, elementIndex);
(*clusteri++)->next = newcluster;
*clusteri = newcluster;
}
return clusters;
}
TClusterW *THierarchicalClustering::merge_SingleLinkage(TClusterW **clusters, float *milestones)
{
float *milestone = milestones;
int step = 0;
while((*clusters)->next) {
if (milestone && (step++ ==*milestone))
progressCallback->call(*((++milestone)++));
TClusterW *cluster;
TClusterW **pcluster2;
float minDistance = numeric_limits<float>::max();
for(TClusterW **tcluster = &((*clusters)->next); *tcluster; tcluster = &(*tcluster)->next)
if ((*tcluster)->minDistance < minDistance) {
minDistance = (*tcluster)->minDistance;
pcluster2 = tcluster;
}
TClusterW *const cluster2 = *pcluster2;
const int rawIndex1 = cluster2->rawIndexMinDistance;
const int rawIndex2 = cluster2->nDistances;
TClusterW *const cluster1 = clusters[rawIndex1];
float *disti1 = cluster1->distances;
float *disti2 = cluster2->distances;
if (rawIndex1) { // not root - has no distances...
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -