📄 clust.cpp
字号:
#include "muscle.h"
#include "clust.h"
#include "clustset.h"
#include <stdio.h>
#define TRACE 0
Clust::Clust()
{
m_Nodes = 0;
m_uNodeCount = 0;
m_uLeafCount = 0;
m_uClusterCount = 0;
m_JoinStyle = JOIN_Undefined;
m_dDist = 0;
m_uLeafCount = 0;
m_ptrSet = 0;
}
Clust::~Clust()
{
delete[] m_Nodes;
delete[] m_dDist;
delete[] m_ClusterIndexToNodeIndex;
}
void Clust::Create(ClustSet &Set, CLUSTER Method)
{
m_ptrSet = &Set;
SetLeafCount(Set.GetLeafCount());
switch (Method)
{
case CLUSTER_UPGMA:
m_JoinStyle = JOIN_NearestNeighbor;
m_CentroidStyle = LINKAGE_Avg;
break;
case CLUSTER_UPGMAMax:
m_JoinStyle = JOIN_NearestNeighbor;
m_CentroidStyle = LINKAGE_Max;
break;
case CLUSTER_UPGMAMin:
m_JoinStyle = JOIN_NearestNeighbor;
m_CentroidStyle = LINKAGE_Min;
break;
case CLUSTER_UPGMB:
m_JoinStyle = JOIN_NearestNeighbor;
m_CentroidStyle = LINKAGE_Biased;
break;
case CLUSTER_NeighborJoining:
m_JoinStyle = JOIN_NeighborJoining;
m_CentroidStyle = LINKAGE_NeighborJoining;
break;
default:
Quit("Clust::Create, invalid method %d", Method);
}
if (m_uLeafCount <= 1)
Quit("Clust::Create: no leaves");
m_uNodeCount = 2*m_uLeafCount - 1;
m_Nodes = new ClustNode[m_uNodeCount];
m_ClusterIndexToNodeIndex = new unsigned[m_uLeafCount];
m_ptrClusterList = 0;
for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
{
ClustNode &Node = m_Nodes[uNodeIndex];
Node.m_uIndex = uNodeIndex;
if (uNodeIndex < m_uLeafCount)
{
Node.m_uSize = 1;
Node.m_uLeafIndexes = new unsigned[1];
Node.m_uLeafIndexes[0] = uNodeIndex;
AddToClusterList(uNodeIndex);
}
else
Node.m_uSize = 0;
}
// Compute initial distance matrix between leaves
SetProgressDesc("Build dist matrix");
unsigned uPairIndex = 0;
const unsigned uPairCount = (m_uLeafCount*(m_uLeafCount - 1))/2;
for (unsigned i = 0; i < m_uLeafCount; ++i)
for (unsigned j = 0; j < i; ++j)
{
const float dDist = (float) m_ptrSet->ComputeDist(*this, i, j);
SetDist(i, j, dDist);
if (0 == uPairIndex%10000)
Progress(uPairIndex, uPairCount);
++uPairIndex;
}
ProgressStepsDone();
// Call CreateCluster once for each internal node in the tree
SetProgressDesc("Build guide tree");
m_uClusterCount = m_uLeafCount;
const unsigned uInternalNodeCount = m_uNodeCount - m_uLeafCount;
for (unsigned uNodeIndex = m_uLeafCount; uNodeIndex < m_uNodeCount; ++uNodeIndex)
{
unsigned i = uNodeIndex + 1 - m_uLeafCount;
Progress(i, uInternalNodeCount);
CreateCluster();
}
ProgressStepsDone();
}
void Clust::CreateCluster()
{
unsigned uLeftNodeIndex;
unsigned uRightNodeIndex;
float dLeftLength;
float dRightLength;
ChooseJoin(&uLeftNodeIndex, &uRightNodeIndex, &dLeftLength, &dRightLength);
const unsigned uNewNodeIndex = m_uNodeCount - m_uClusterCount + 1;
JoinNodes(uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength,
uNewNodeIndex);
#if TRACE
Log("Merge New=%u L=%u R=%u Ld=%7.2g Rd=%7.2g\n",
uNewNodeIndex, uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength);
#endif
// Compute distances to other clusters
--m_uClusterCount;
for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
uNodeIndex = GetNextCluster(uNodeIndex))
{
if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
continue;
if (uNewNodeIndex == uNodeIndex)
continue;
const float dDist = ComputeDist(uNewNodeIndex, uNodeIndex);
SetDist(uNewNodeIndex, uNodeIndex, dDist);
}
for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
uNodeIndex = GetNextCluster(uNodeIndex))
{
if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
continue;
if (uNewNodeIndex == uNodeIndex)
continue;
#if REDLACK
const float dMetric = ComputeMetric(uNewNodeIndex, uNodeIndex);
InsertMetric(uNewNodeIndex, uNodeIndex, dMetric);
#endif
}
}
void Clust::ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
float *ptrdLeftLength, float *ptrdRightLength)
{
switch (m_JoinStyle)
{
case JOIN_NearestNeighbor:
ChooseJoinNearestNeighbor(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
ptrdRightLength);
return;
case JOIN_NeighborJoining:
ChooseJoinNeighborJoining(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
ptrdRightLength);
return;
}
Quit("Clust::ChooseJoin, Invalid join style %u", m_JoinStyle);
}
void Clust::ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex,
unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
{
const unsigned uClusterCount = GetClusterCount();
unsigned uMinLeftNodeIndex;
unsigned uMinRightNodeIndex;
GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);
float dMinDist = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);
const float dLeftHeight = GetHeight(uMinLeftNodeIndex);
const float dRightHeight = GetHeight(uMinRightNodeIndex);
*ptruLeftIndex = uMinLeftNodeIndex;
*ptruRightIndex = uMinRightNodeIndex;
*ptrdLeftLength = dMinDist/2 - dLeftHeight;
*ptrdRightLength = dMinDist/2 - dRightHeight;
}
void Clust::ChooseJoinNeighborJoining(unsigned *ptruLeftIndex,
unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
{
const unsigned uClusterCount = GetClusterCount();
//unsigned uMinLeftNodeIndex = uInsane;
//unsigned uMinRightNodeIndex = uInsane;
//float dMinD = PLUS_INFINITY;
//for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
// {
// const float ri = Calc_r(i);
// for (unsigned j = GetNextCluster(i); j != uInsane; j = GetNextCluster(j))
// {
// const float rj = Calc_r(j);
// const float dij = GetDist(i, j);
// const float Dij = dij - (ri + rj);
// if (Dij < dMinD)
// {
// dMinD = Dij;
// uMinLeftNodeIndex = i;
// uMinRightNodeIndex = j;
// }
// }
// }
unsigned uMinLeftNodeIndex;
unsigned uMinRightNodeIndex;
GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);
const float dDistLR = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);
const float rL = Calc_r(uMinLeftNodeIndex);
const float rR = Calc_r(uMinRightNodeIndex);
const float dLeftLength = (dDistLR + rL - rR)/2;
const float dRightLength = (dDistLR - rL + rR)/2;
*ptruLeftIndex = uMinLeftNodeIndex;
*ptruRightIndex = uMinRightNodeIndex;
*ptrdLeftLength = dLeftLength;
*ptrdRightLength = dRightLength;
}
void Clust::JoinNodes(unsigned uLeftIndex, unsigned uRightIndex, float dLeftLength,
float dRightLength, unsigned uNodeIndex)
{
ClustNode &Parent = m_Nodes[uNodeIndex];
ClustNode &Left = m_Nodes[uLeftIndex];
ClustNode &Right = m_Nodes[uRightIndex];
Left.m_dLength = dLeftLength;
Right.m_dLength = dRightLength;
Parent.m_ptrLeft = &Left;
Parent.m_ptrRight = &Right;
Left.m_ptrParent = &Parent;
Right.m_ptrParent = &Parent;
const unsigned uLeftSize = Left.m_uSize;
const unsigned uRightSize = Right.m_uSize;
const unsigned uParentSize = uLeftSize + uRightSize;
Parent.m_uSize = uParentSize;
assert(0 == Parent.m_uLeafIndexes);
Parent.m_uLeafIndexes = new unsigned[uParentSize];
const unsigned uLeftBytes = uLeftSize*sizeof(unsigned);
const unsigned uRightBytes = uRightSize*sizeof(unsigned);
memcpy(Parent.m_uLeafIndexes, Left.m_uLeafIndexes, uLeftBytes);
memcpy(Parent.m_uLeafIndexes + uLeftSize, Right.m_uLeafIndexes, uRightBytes);
DeleteFromClusterList(uLeftIndex);
DeleteFromClusterList(uRightIndex);
AddToClusterList(uNodeIndex);
}
float Clust::Calc_r(unsigned uNodeIndex) const
{
const unsigned uClusterCount = GetClusterCount();
if (2 == uClusterCount)
return 0;
float dSum = 0;
for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
{
if (i == uNodeIndex)
continue;
dSum += GetDist(uNodeIndex, i);
}
return dSum/(uClusterCount - 2);
}
float Clust::ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex)
{
switch (m_CentroidStyle)
{
case LINKAGE_Avg:
return ComputeDistAverageLinkage(uNewNodeIndex, uNodeIndex);
case LINKAGE_Min:
return ComputeDistMinLinkage(uNewNodeIndex, uNodeIndex);
case LINKAGE_Max:
return ComputeDistMaxLinkage(uNewNodeIndex, uNodeIndex);
case LINKAGE_Biased:
return ComputeDistMAFFT(uNewNodeIndex, uNodeIndex);
case LINKAGE_NeighborJoining:
return ComputeDistNeighborJoining(uNewNodeIndex, uNodeIndex);
}
Quit("Clust::ComputeDist, invalid centroid style %u", m_CentroidStyle);
return (float) g_dNAN;
}
float Clust::ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
{
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
return (dDistL < dDistR ? dDistL : dDistR);
}
float Clust::ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
{
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
return (dDistL > dDistR ? dDistL : dDistR);
}
float Clust::ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -