📄 clust.cpp
字号:
{
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
return (dDistL + dDistR)/2;
}
float Clust::ComputeDistNeighborJoining(unsigned uNewNodeIndex, unsigned uNodeIndex)
{
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex);
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
const float dDist = (dDistL + dDistR - dDistLR)/2;
return dDist;
}
// This is a mysterious variant of UPGMA reverse-engineered from MAFFT source.
float Clust::ComputeDistMAFFT(unsigned uNewNodeIndex, unsigned uNodeIndex)
{
const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
const float dDistLR = GetDist(uLeftNodeIndex, uRightNodeIndex);
const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
const float dMinDistLR = (dDistL < dDistR ? dDistL : dDistR);
const float dSumDistLR = dDistL + dDistR;
const float dDist = dMinDistLR*(1 - g_dSUEFF) + dSumDistLR*g_dSUEFF/2;
return dDist;
}
unsigned Clust::GetClusterCount() const
{
return m_uClusterCount;
}
void Clust::LogMe() const
{
Log("Clust %u leaves, %u nodes, %u clusters.\n",
m_uLeafCount, m_uNodeCount, m_uClusterCount);
Log("Distance matrix\n");
const unsigned uNodeCount = GetNodeCount();
Log(" ");
for (unsigned i = 0; i < uNodeCount - 1; ++i)
Log(" %7u", i);
Log("\n");
Log(" ");
for (unsigned i = 0; i < uNodeCount - 1; ++i)
Log(" ------");
Log("\n");
for (unsigned i = 0; i < uNodeCount - 1; ++i)
{
Log("%4u: ", i);
for (unsigned j = 0; j < i; ++j)
Log(" %7.2g", GetDist(i, j));
Log("\n");
}
Log("\n");
Log("Node Size Prnt Left Rght Length Name\n");
Log("---- ---- ---- ---- ---- ------ ----\n");
for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
{
const ClustNode &Node = m_Nodes[uNodeIndex];
Log("%4u %4u", uNodeIndex, Node.m_uSize);
if (0 != Node.m_ptrParent)
Log(" %4u", Node.m_ptrParent->m_uIndex);
else
Log(" ");
if (0 != Node.m_ptrLeft)
Log(" %4u", Node.m_ptrLeft->m_uIndex);
else
Log(" ");
if (0 != Node.m_ptrRight)
Log(" %4u", Node.m_ptrRight->m_uIndex);
else
Log(" ");
if (uNodeIndex != m_uNodeCount - 1)
Log(" %7.3g", Node.m_dLength);
if (IsLeaf(uNodeIndex))
{
const char *ptrName = GetNodeName(uNodeIndex);
if (0 != ptrName)
Log(" %s", ptrName);
}
if (GetRootNodeIndex() == uNodeIndex)
Log(" [ROOT]");
Log("\n");
}
}
const ClustNode &Clust::GetNode(unsigned uNodeIndex) const
{
if (uNodeIndex >= m_uNodeCount)
Quit("ClustNode::GetNode(%u) %u", uNodeIndex, m_uNodeCount);
return m_Nodes[uNodeIndex];
}
bool Clust::IsLeaf(unsigned uNodeIndex) const
{
return uNodeIndex < m_uLeafCount;
}
unsigned Clust::GetClusterSize(unsigned uNodeIndex) const
{
const ClustNode &Node = GetNode(uNodeIndex);
return Node.m_uSize;
}
unsigned Clust::GetLeftIndex(unsigned uNodeIndex) const
{
const ClustNode &Node = GetNode(uNodeIndex);
if (0 == Node.m_ptrLeft)
Quit("Clust::GetLeftIndex: leaf");
return Node.m_ptrLeft->m_uIndex;
}
unsigned Clust::GetRightIndex(unsigned uNodeIndex) const
{
const ClustNode &Node = GetNode(uNodeIndex);
if (0 == Node.m_ptrRight)
Quit("Clust::GetRightIndex: leaf");
return Node.m_ptrRight->m_uIndex;
}
float Clust::GetLength(unsigned uNodeIndex) const
{
const ClustNode &Node = GetNode(uNodeIndex);
return Node.m_dLength;
}
void Clust::SetLeafCount(unsigned uLeafCount)
{
if (uLeafCount <= 1)
Quit("Clust::SetLeafCount(%u)", uLeafCount);
m_uLeafCount = uLeafCount;
const unsigned uNodeCount = GetNodeCount();
// Triangular matrix size excluding diagonal (all zeros in our case).
m_uTriangularMatrixSize = (uNodeCount*(uNodeCount - 1))/2;
m_dDist = new float[m_uTriangularMatrixSize];
}
unsigned Clust::GetLeafCount() const
{
return m_uLeafCount;
}
unsigned Clust::VectorIndex(unsigned uIndex1, unsigned uIndex2) const
{
const unsigned uNodeCount = GetNodeCount();
if (uIndex1 >= uNodeCount || uIndex2 >= uNodeCount)
Quit("DistVectorIndex(%u,%u) %u", uIndex1, uIndex2, uNodeCount);
unsigned v;
if (uIndex1 >= uIndex2)
v = uIndex2 + (uIndex1*(uIndex1 - 1))/2;
else
v = uIndex1 + (uIndex2*(uIndex2 - 1))/2;
assert(v < m_uTriangularMatrixSize);
return v;
}
float Clust::GetDist(unsigned uIndex1, unsigned uIndex2) const
{
unsigned v = VectorIndex(uIndex1, uIndex2);
return m_dDist[v];
}
void Clust::SetDist(unsigned uIndex1, unsigned uIndex2, float dDist)
{
unsigned v = VectorIndex(uIndex1, uIndex2);
m_dDist[v] = dDist;
}
float Clust::GetHeight(unsigned uNodeIndex) const
{
if (IsLeaf(uNodeIndex))
return 0;
const unsigned uLeftIndex = GetLeftIndex(uNodeIndex);
const unsigned uRightIndex = GetRightIndex(uNodeIndex);
const float dLeftLength = GetLength(uLeftIndex);
const float dRightLength = GetLength(uRightIndex);
const float dLeftHeight = dLeftLength + GetHeight(uLeftIndex);
const float dRightHeight = dRightLength + GetHeight(uRightIndex);
return (dLeftHeight + dRightHeight)/2;
}
const char *Clust::GetNodeName(unsigned uNodeIndex) const
{
if (!IsLeaf(uNodeIndex))
Quit("Clust::GetNodeName, is not leaf");
return m_ptrSet->GetLeafName(uNodeIndex);
}
unsigned Clust::GetNodeId(unsigned uNodeIndex) const
{
if (uNodeIndex >= GetLeafCount())
return 0;
return m_ptrSet->GetLeafId(uNodeIndex);
}
unsigned Clust::GetLeaf(unsigned uNodeIndex, unsigned uLeafIndex) const
{
const ClustNode &Node = GetNode(uNodeIndex);
const unsigned uLeafCount = Node.m_uSize;
if (uLeafIndex >= uLeafCount)
Quit("Clust::GetLeaf, invalid index");
const unsigned uIndex = Node.m_uLeafIndexes[uLeafIndex];
if (uIndex >= m_uNodeCount)
Quit("Clust::GetLeaf, index out of range");
return uIndex;
}
unsigned Clust::GetFirstCluster() const
{
if (0 == m_ptrClusterList)
return uInsane;
return m_ptrClusterList->m_uIndex;
}
unsigned Clust::GetNextCluster(unsigned uIndex) const
{
ClustNode *ptrNode = &m_Nodes[uIndex];
if (0 == ptrNode->m_ptrNextCluster)
return uInsane;
return ptrNode->m_ptrNextCluster->m_uIndex;
}
void Clust::DeleteFromClusterList(unsigned uNodeIndex)
{
assert(uNodeIndex < m_uNodeCount);
ClustNode *ptrNode = &m_Nodes[uNodeIndex];
ClustNode *ptrPrev = ptrNode->m_ptrPrevCluster;
ClustNode *ptrNext = ptrNode->m_ptrNextCluster;
if (0 != ptrNext)
ptrNext->m_ptrPrevCluster = ptrPrev;
if (0 == ptrPrev)
{
assert(m_ptrClusterList == ptrNode);
m_ptrClusterList = ptrNext;
}
else
ptrPrev->m_ptrNextCluster = ptrNext;
ptrNode->m_ptrNextCluster = 0;
ptrNode->m_ptrPrevCluster = 0;
}
void Clust::AddToClusterList(unsigned uNodeIndex)
{
assert(uNodeIndex < m_uNodeCount);
ClustNode *ptrNode = &m_Nodes[uNodeIndex];
if (0 != m_ptrClusterList)
m_ptrClusterList->m_ptrPrevCluster = ptrNode;
ptrNode->m_ptrNextCluster = m_ptrClusterList;
ptrNode->m_ptrPrevCluster = 0;
m_ptrClusterList = ptrNode;
}
float Clust::ComputeMetric(unsigned uIndex1, unsigned uIndex2) const
{
switch (m_JoinStyle)
{
case JOIN_NearestNeighbor:
return ComputeMetricNearestNeighbor(uIndex1, uIndex2);
case JOIN_NeighborJoining:
return ComputeMetricNeighborJoining(uIndex1, uIndex2);
}
Quit("Clust::ComputeMetric");
return 0;
}
float Clust::ComputeMetricNeighborJoining(unsigned i, unsigned j) const
{
float ri = Calc_r(i);
float rj = Calc_r(j);
float dij = GetDist(i, j);
float dMetric = dij - (ri + rj);
return (float) dMetric;
}
float Clust::ComputeMetricNearestNeighbor(unsigned i, unsigned j) const
{
return (float) GetDist(i, j);
}
float Clust::GetMinMetricBruteForce(unsigned *ptruIndex1, unsigned *ptruIndex2) const
{
unsigned uMinLeftNodeIndex = uInsane;
unsigned uMinRightNodeIndex = uInsane;
float dMinMetric = PLUS_INFINITY;
for (unsigned uLeftNodeIndex = GetFirstCluster(); uLeftNodeIndex != uInsane;
uLeftNodeIndex = GetNextCluster(uLeftNodeIndex))
{
for (unsigned uRightNodeIndex = GetNextCluster(uLeftNodeIndex);
uRightNodeIndex != uInsane;
uRightNodeIndex = GetNextCluster(uRightNodeIndex))
{
float dMetric = ComputeMetric(uLeftNodeIndex, uRightNodeIndex);
if (dMetric < dMinMetric)
{
dMinMetric = dMetric;
uMinLeftNodeIndex = uLeftNodeIndex;
uMinRightNodeIndex = uRightNodeIndex;
}
}
}
*ptruIndex1 = uMinLeftNodeIndex;
*ptruIndex2 = uMinRightNodeIndex;
return dMinMetric;
}
float Clust::GetMinMetric(unsigned *ptruIndex1, unsigned *ptruIndex2) const
{
return GetMinMetricBruteForce(ptruIndex1, ptruIndex2);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -