📄 hclust.cpp
字号:
const float *minIndex1 = cluster1->distances + cluster1->rawIndexMinDistance;
for(int ndi = cluster1->nDistances; ndi--; disti1++, disti2++)
if (*disti2 < *disti1) // if one is -1, they both are
if ((*disti1 = *disti2) < *minIndex1)
minIndex1 = disti1;
cluster1->minDistance = *minIndex1;
cluster1->rawIndexMinDistance = minIndex1 - cluster1->distances;
}
while(*disti2 < 0)
disti2++; // should have at least one more >=0 - the one corresponding to distance to cluster1
for(cluster = cluster1->next; cluster != cluster2; cluster = cluster->next) {
while(*++disti2 < 0); // should have more - the one corresponding to cluster
if (*disti2 < cluster->distances[rawIndex1])
if ((cluster->distances[rawIndex1] = *disti2) < cluster->minDistance) {
cluster->minDistance = *disti2;
cluster->rawIndexMinDistance = rawIndex1;
}
}
for(cluster = cluster->next; cluster; cluster = cluster->next) {
if (cluster->distances[rawIndex2] < cluster->distances[rawIndex1])
cluster->distances[rawIndex1] = cluster->distances[rawIndex2];
// don't nest this in the above if -- both distances can be equal, yet index HAS TO move
if (rawIndex2 == cluster->rawIndexMinDistance)
cluster->rawIndexMinDistance = rawIndex1; // the smallest element got moved
cluster->distances[rawIndex2] = -1;
}
cluster1->elevate(pcluster2, minDistance);
}
return *clusters;
}
TClusterW *THierarchicalClustering::merge_AverageLinkage(TClusterW **clusters, float *milestones)
{
float *milestone = milestones;
int step = 0;
while((*clusters)->next) {
if (milestone && (step++ ==*milestone))
progressCallback->call(*((++milestone)++));
TClusterW *cluster;
TClusterW **pcluster2;
float minDistance = numeric_limits<float>::max();
for(TClusterW **tcluster = &((*clusters)->next); *tcluster; tcluster = &(*tcluster)->next)
if ((*tcluster)->minDistance < minDistance) {
minDistance = (*tcluster)->minDistance;
pcluster2 = tcluster;
}
TClusterW *const cluster2 = *pcluster2;
const int rawIndex1 = cluster2->rawIndexMinDistance;
const int rawIndex2 = cluster2->nDistances;
TClusterW *const cluster1 = clusters[rawIndex1];
float *disti1 = cluster1->distances;
float *disti2 = cluster2->distances;
const int size1 = cluster1->size;
const int size2 = cluster2->size;
const int sumsize = size1 + size2;
if (rawIndex1) { // not root - has no distances...
*disti1 = (*disti1 * size1 + *disti2 * size2) / sumsize;
const float *minIndex1 = disti1;
int ndi = cluster1->nDistances-1;
for(disti1++, disti2++; ndi--; disti1++, disti2++)
if (*disti1 >= 0) {
*disti1 = (*disti1 * size1 + *disti2 * size2) / sumsize;
if (*disti1 < *minIndex1)
minIndex1 = disti1;
}
cluster1->minDistance = *minIndex1;
cluster1->rawIndexMinDistance = minIndex1 - cluster1->distances;
}
while(*disti2 < 0)
disti2++; // should have at least one more >=0 - the one corresponding to distance to cluster1
for(cluster = cluster1->next; cluster != cluster2; cluster = cluster->next) {
while(*++disti2 < 0); // should have more - the one corresponding to cluster
float &distc = cluster->distances[rawIndex1];
distc = (distc * size1 + *disti2 * size2) / sumsize;
if (distc < cluster->minDistance) {
cluster->minDistance = distc;
cluster->rawIndexMinDistance = rawIndex1;
}
else if ((distc > cluster->minDistance) && (cluster->rawIndexMinDistance == rawIndex1))
cluster->computeMinimalDistance();
}
for(cluster = cluster->next; cluster; cluster = cluster->next) {
float &distc = cluster->distances[rawIndex1];
distc = (distc * size1 + cluster->distances[rawIndex2] * size2) / sumsize;
if (distc < cluster->minDistance) {
cluster->minDistance = distc;
cluster->rawIndexMinDistance = rawIndex1;
}
else if ( (distc > cluster->minDistance) && (cluster->rawIndexMinDistance == rawIndex1)
|| (cluster->rawIndexMinDistance == rawIndex2)) {
cluster->distances[rawIndex2] = -1;
cluster->computeMinimalDistance();
}
else
cluster->distances[rawIndex2] = -1;
}
cluster1->elevate(pcluster2, minDistance);
}
return *clusters;
}
TClusterW *THierarchicalClustering::merge_CompleteLinkage(TClusterW **clusters, float *milestones)
{
float *milestone = milestones;
int step = 0;
while((*clusters)->next) {
if (milestone && (step++ ==*milestone))
progressCallback->call(*((++milestone)++));
TClusterW *cluster;
TClusterW **pcluster2;
float minDistance = numeric_limits<float>::max();
for(TClusterW **tcluster = &((*clusters)->next); *tcluster; tcluster = &(*tcluster)->next)
if ((*tcluster)->minDistance < minDistance) {
minDistance = (*tcluster)->minDistance;
pcluster2 = tcluster;
}
TClusterW *const cluster2 = *pcluster2;
const int rawIndex1 = cluster2->rawIndexMinDistance;
const int rawIndex2 = cluster2->nDistances;
TClusterW *const cluster1 = clusters[rawIndex1];
float *disti1 = cluster1->distances;
float *disti2 = cluster2->distances;
if (rawIndex1) { // not root - has no distances...
if (*disti2 > *disti1)
*disti1 = *disti2;
const float *minIndex1 = disti1;
int ndi = cluster1->nDistances-1;
for(disti1++, disti2++; ndi--; disti1++, disti2++) {
if (*disti1 >= 0) {
if (*disti2 > *disti1) // if one is -1, they both are
*disti1 = *disti2;
if (*disti1 < *minIndex1)
minIndex1 = disti1;
}
}
cluster1->minDistance = *minIndex1;
cluster1->rawIndexMinDistance = minIndex1 - cluster1->distances;
}
while(*disti2 < 0)
disti2++; // should have at least one more >=0 - the one corresponding to distance to cluster1
for(cluster = cluster1->next; cluster != cluster2; cluster = cluster->next) {
while(*++disti2 < 0); // should have more - the one corresponding to cluster
float &distc = cluster->distances[rawIndex1];
if (*disti2 > distc) {
distc = *disti2;
if (cluster->rawIndexMinDistance == rawIndex1)
if (distc <= cluster->minDistance)
cluster->minDistance = distc;
else
cluster->computeMinimalDistance();
}
}
for(cluster = cluster->next; cluster; cluster = cluster->next) {
float &distc = cluster->distances[rawIndex1];
if (cluster->distances[rawIndex2] > distc)
distc = cluster->distances[rawIndex2];
cluster->distances[rawIndex2] = -1;
if ((cluster->rawIndexMinDistance == rawIndex1) || (cluster->rawIndexMinDistance == rawIndex2))
cluster->computeMinimalDistance();
}
cluster1->elevate(pcluster2, minDistance);
}
return *clusters;
}
TClusterW *THierarchicalClustering::merge(TClusterW **clusters, float *milestones)
{
switch(linkage) {
case Complete: return merge_CompleteLinkage(clusters, milestones);
case Single: return merge_SingleLinkage(clusters, milestones);
case Average:
default: return merge_AverageLinkage(clusters, milestones);
}
}
PHierarchicalCluster THierarchicalClustering::restructure(TClusterW *root)
{
PIntList elementIndices = new TIntList(root->size);
TIntList::iterator currentElement(elementIndices->begin());
int currentIndex = 0;
return restructure(root, elementIndices, currentElement, currentIndex);
}
PHierarchicalCluster THierarchicalClustering::restructure(TClusterW *node, PIntList elementIndices, TIntList::iterator ¤tElement, int ¤tIndex)
{
PHierarchicalCluster cluster;
if (!node->left) {
*currentElement++ = node->elementIndex;
cluster = mlnew THierarchicalCluster(elementIndices, currentIndex++);
}
else {
PHierarchicalCluster left = restructure(node->left, elementIndices, currentElement, currentIndex);
PHierarchicalCluster right = restructure(node->right, elementIndices, currentElement, currentIndex);
cluster = mlnew THierarchicalCluster(elementIndices, left, right, node->height, left->first, right->last);
}
// no need to care about 'distances' - they've been removed during clustering (in 'elevate') :)
mldelete node;
return cluster;
}
PHierarchicalCluster THierarchicalClustering::operator()(PSymMatrix distanceMatrix)
{
float *distanceMatrixElements = NULL;
TClusterW **clusters, *root;
float *callbackMilestones = NULL;
try {
const int dim = distanceMatrix->dim;
const int size = ((dim+1)*(dim+2))/2;
float *distanceMatrixElements = overwriteMatrix ? distanceMatrix->elements : (float *)memcpy(new float[size], distanceMatrix->elements, size*sizeof(float));
clusters = init(dim, distanceMatrixElements);
callbackMilestones = (progressCallback && (distanceMatrix->dim >= 1000)) ? progressCallback->milestones(distanceMatrix->dim) : NULL;
root = merge(clusters, callbackMilestones);
}
catch (...) {
mldelete clusters;
mldelete callbackMilestones;
mldelete distanceMatrixElements;
throw;
}
mldelete clusters;
mldelete callbackMilestones;
mldelete distanceMatrixElements;
return restructure(root);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -