⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 clust.cpp

📁 unix,linux下编译。用于蛋白质
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include "muscle.h"
#include "clust.h"
#include "clustset.h"
#include <stdio.h>

#define TRACE		0

Clust::Clust()
	{
	m_Nodes = 0;
	m_uNodeCount = 0;
	m_uLeafCount = 0;
	m_uClusterCount = 0;
	m_JoinStyle = JOIN_Undefined;
	m_dDist = 0;
	m_uLeafCount = 0;
	m_ptrSet = 0;
	}

Clust::~Clust()
	{
	delete[] m_Nodes;
	delete[] m_dDist;
	delete[] m_ClusterIndexToNodeIndex;
	}

void Clust::Create(ClustSet &Set, CLUSTER Method)
	{
	m_ptrSet = &Set;

	SetLeafCount(Set.GetLeafCount());

	switch (Method)
		{
	case CLUSTER_UPGMA:
		m_JoinStyle = JOIN_NearestNeighbor;
		m_CentroidStyle = LINKAGE_Avg;
		break;

	case CLUSTER_UPGMAMax:
		m_JoinStyle = JOIN_NearestNeighbor;
		m_CentroidStyle = LINKAGE_Max;
		break;

	case CLUSTER_UPGMAMin:
		m_JoinStyle = JOIN_NearestNeighbor;
		m_CentroidStyle = LINKAGE_Min;
		break;

	case CLUSTER_UPGMB:
		m_JoinStyle = JOIN_NearestNeighbor;
		m_CentroidStyle = LINKAGE_Biased;
		break;

	case CLUSTER_NeighborJoining:
		m_JoinStyle = JOIN_NeighborJoining;
		m_CentroidStyle = LINKAGE_NeighborJoining;
		break;

	default:
		Quit("Clust::Create, invalid method %d", Method);
		}

	if (m_uLeafCount <= 1)
		Quit("Clust::Create: no leaves");

	m_uNodeCount = 2*m_uLeafCount - 1;
	m_Nodes = new ClustNode[m_uNodeCount];
	m_ClusterIndexToNodeIndex = new unsigned[m_uLeafCount];

	m_ptrClusterList = 0;
	for (unsigned uNodeIndex = 0; uNodeIndex < m_uNodeCount; ++uNodeIndex)
		{
		ClustNode &Node = m_Nodes[uNodeIndex];
		Node.m_uIndex = uNodeIndex;
		if (uNodeIndex < m_uLeafCount)
			{
			Node.m_uSize = 1;
			Node.m_uLeafIndexes = new unsigned[1];
			Node.m_uLeafIndexes[0] = uNodeIndex;
			AddToClusterList(uNodeIndex);
			}
		else
			Node.m_uSize = 0;
		}

// Compute initial distance matrix between leaves
	SetProgressDesc("Build dist matrix");
	unsigned uPairIndex = 0;
	const unsigned uPairCount = (m_uLeafCount*(m_uLeafCount - 1))/2;
	for (unsigned i = 0; i < m_uLeafCount; ++i)
		for (unsigned j = 0; j < i; ++j)
			{
			const float dDist = (float) m_ptrSet->ComputeDist(*this, i, j);
			SetDist(i, j, dDist);
			if (0 == uPairIndex%10000)
				Progress(uPairIndex, uPairCount);
			++uPairIndex;
			}
	ProgressStepsDone();

// Call CreateCluster once for each internal node in the tree
	SetProgressDesc("Build guide tree");
	m_uClusterCount = m_uLeafCount;
	const unsigned uInternalNodeCount = m_uNodeCount - m_uLeafCount;
	for (unsigned uNodeIndex = m_uLeafCount; uNodeIndex < m_uNodeCount; ++uNodeIndex)
		{
		unsigned i = uNodeIndex + 1 - m_uLeafCount;
		Progress(i, uInternalNodeCount);
		CreateCluster();
		}
	ProgressStepsDone();
	}

void Clust::CreateCluster()
	{
	unsigned uLeftNodeIndex;
	unsigned uRightNodeIndex;
	float dLeftLength;
	float dRightLength;
	ChooseJoin(&uLeftNodeIndex, &uRightNodeIndex, &dLeftLength, &dRightLength);

	const unsigned uNewNodeIndex = m_uNodeCount - m_uClusterCount + 1;

	JoinNodes(uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength,
	  uNewNodeIndex);

#if	TRACE
	Log("Merge New=%u L=%u R=%u Ld=%7.2g Rd=%7.2g\n",
	  uNewNodeIndex, uLeftNodeIndex, uRightNodeIndex, dLeftLength, dRightLength);
#endif

// Compute distances to other clusters
	--m_uClusterCount;
	for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
	  uNodeIndex = GetNextCluster(uNodeIndex))
		{
		if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
			continue;

		if (uNewNodeIndex == uNodeIndex)
			continue;

		const float dDist = ComputeDist(uNewNodeIndex, uNodeIndex);
		SetDist(uNewNodeIndex, uNodeIndex, dDist);
		}

	for (unsigned uNodeIndex = GetFirstCluster(); uNodeIndex != uInsane;
	  uNodeIndex = GetNextCluster(uNodeIndex))
		{
		if (uNodeIndex == uLeftNodeIndex || uNodeIndex == uRightNodeIndex)
			continue;

		if (uNewNodeIndex == uNodeIndex)
			continue;

#if	REDLACK
		const float dMetric = ComputeMetric(uNewNodeIndex, uNodeIndex);
		InsertMetric(uNewNodeIndex, uNodeIndex, dMetric);
#endif
		}
	}

void Clust::ChooseJoin(unsigned *ptruLeftIndex, unsigned *ptruRightIndex,
  float *ptrdLeftLength, float *ptrdRightLength)
	{
	switch (m_JoinStyle)
		{
	case JOIN_NearestNeighbor:
		ChooseJoinNearestNeighbor(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
		  ptrdRightLength);
		return;
	case JOIN_NeighborJoining:
		ChooseJoinNeighborJoining(ptruLeftIndex, ptruRightIndex, ptrdLeftLength,
		  ptrdRightLength);
		return;
		}
	Quit("Clust::ChooseJoin, Invalid join style %u", m_JoinStyle);
	}

void Clust::ChooseJoinNearestNeighbor(unsigned *ptruLeftIndex,
  unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
	{
	const unsigned uClusterCount = GetClusterCount();

	unsigned uMinLeftNodeIndex;
	unsigned uMinRightNodeIndex;
	GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);

	float dMinDist = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);

	const float dLeftHeight = GetHeight(uMinLeftNodeIndex);
	const float dRightHeight = GetHeight(uMinRightNodeIndex);

	*ptruLeftIndex = uMinLeftNodeIndex;
	*ptruRightIndex = uMinRightNodeIndex;
	*ptrdLeftLength = dMinDist/2 - dLeftHeight;
	*ptrdRightLength = dMinDist/2 - dRightHeight;
	}

void Clust::ChooseJoinNeighborJoining(unsigned *ptruLeftIndex,
  unsigned *ptruRightIndex, float *ptrdLeftLength, float *ptrdRightLength)
	{
	const unsigned uClusterCount = GetClusterCount();

	//unsigned uMinLeftNodeIndex = uInsane;
	//unsigned uMinRightNodeIndex = uInsane;
	//float dMinD = PLUS_INFINITY;
	//for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
	//	{
	//	const float ri = Calc_r(i);
	//	for (unsigned j = GetNextCluster(i); j != uInsane; j = GetNextCluster(j))
	//		{
	//		const float rj = Calc_r(j);
	//		const float dij = GetDist(i, j);
	//		const float Dij = dij - (ri + rj);
	//		if (Dij < dMinD)
	//			{
	//			dMinD = Dij;
	//			uMinLeftNodeIndex = i;
	//			uMinRightNodeIndex = j;
	//			}
	//		}
	//	}

	unsigned uMinLeftNodeIndex;
	unsigned uMinRightNodeIndex;
	GetMinMetric(&uMinLeftNodeIndex, &uMinRightNodeIndex);

	const float dDistLR = GetDist(uMinLeftNodeIndex, uMinRightNodeIndex);
	const float rL = Calc_r(uMinLeftNodeIndex);
	const float rR = Calc_r(uMinRightNodeIndex);

	const float dLeftLength = (dDistLR + rL - rR)/2;
	const float dRightLength = (dDistLR - rL + rR)/2;

	*ptruLeftIndex = uMinLeftNodeIndex;
	*ptruRightIndex = uMinRightNodeIndex;
	*ptrdLeftLength = dLeftLength;
	*ptrdRightLength = dRightLength;
	}

void Clust::JoinNodes(unsigned uLeftIndex, unsigned uRightIndex, float dLeftLength,
  float dRightLength, unsigned uNodeIndex)
	{
	ClustNode &Parent = m_Nodes[uNodeIndex];
	ClustNode &Left = m_Nodes[uLeftIndex];
	ClustNode &Right = m_Nodes[uRightIndex];

	Left.m_dLength = dLeftLength;
	Right.m_dLength = dRightLength;

	Parent.m_ptrLeft = &Left;
	Parent.m_ptrRight = &Right;

	Left.m_ptrParent = &Parent;
	Right.m_ptrParent = &Parent;

	const unsigned uLeftSize = Left.m_uSize;
	const unsigned uRightSize = Right.m_uSize;
	const unsigned uParentSize = uLeftSize + uRightSize;
	Parent.m_uSize = uParentSize;

	assert(0 == Parent.m_uLeafIndexes);
	Parent.m_uLeafIndexes = new unsigned[uParentSize];

	const unsigned uLeftBytes = uLeftSize*sizeof(unsigned);
	const unsigned uRightBytes = uRightSize*sizeof(unsigned);
	memcpy(Parent.m_uLeafIndexes, Left.m_uLeafIndexes, uLeftBytes);
	memcpy(Parent.m_uLeafIndexes + uLeftSize, Right.m_uLeafIndexes, uRightBytes);

	DeleteFromClusterList(uLeftIndex);
	DeleteFromClusterList(uRightIndex);
	AddToClusterList(uNodeIndex);
	}

float Clust::Calc_r(unsigned uNodeIndex) const
	{
	const unsigned uClusterCount = GetClusterCount();
	if (2 == uClusterCount)
		return 0;

	float dSum = 0;
	for (unsigned i = GetFirstCluster(); i != uInsane; i = GetNextCluster(i))
		{
		if (i == uNodeIndex)
			continue;
		dSum += GetDist(uNodeIndex, i);
		}
	return dSum/(uClusterCount - 2);
	}

float Clust::ComputeDist(unsigned uNewNodeIndex, unsigned uNodeIndex)
	{
	switch (m_CentroidStyle)
		{
	case LINKAGE_Avg:
		return ComputeDistAverageLinkage(uNewNodeIndex, uNodeIndex);

	case LINKAGE_Min:
		return ComputeDistMinLinkage(uNewNodeIndex, uNodeIndex);

	case LINKAGE_Max:
		return ComputeDistMaxLinkage(uNewNodeIndex, uNodeIndex);

	case LINKAGE_Biased:
		return ComputeDistMAFFT(uNewNodeIndex, uNodeIndex);

	case LINKAGE_NeighborJoining:
		return ComputeDistNeighborJoining(uNewNodeIndex, uNodeIndex);
		}
	Quit("Clust::ComputeDist, invalid centroid style %u", m_CentroidStyle);
	return (float) g_dNAN;
	}

float Clust::ComputeDistMinLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
	{
	const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
	const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
	const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
	const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
	return (dDistL < dDistR ? dDistL : dDistR);
	}

float Clust::ComputeDistMaxLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)
	{
	const unsigned uLeftNodeIndex = GetLeftIndex(uNewNodeIndex);
	const unsigned uRightNodeIndex = GetRightIndex(uNewNodeIndex);
	const float dDistL = GetDist(uLeftNodeIndex, uNodeIndex);
	const float dDistR = GetDist(uRightNodeIndex, uNodeIndex);
	return (dDistL > dDistR ? dDistL : dDistR);
	}

float Clust::ComputeDistAverageLinkage(unsigned uNewNodeIndex, unsigned uNodeIndex)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -