⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 refinesubfams.cpp

📁 unix,linux下编译。用于蛋白质
💻 CPP
字号:
#include "muscle.h"
#include "msa.h"
#include "tree.h"
#include "clust.h"
#include "profile.h"
#include "pwpath.h"

#define TRACE 0

static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],
  unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa);

// Identify subfamilies in a tree.
// Returns array of internal node indexes, one for each subfamily.
// First try is to select groups by height (which should approximate
// minimum percent identity), if this gives too many subfamilies then
// we cut at a point that gives the maximum allowed number of subfams.
static void GetSubfams(const Tree &tree, double dMaxHeight,
  unsigned uMaxSubfamCount, unsigned **ptrptrSubfams, unsigned *ptruSubfamCount)
	{
	const unsigned uNodeCount = tree.GetNodeCount();

	unsigned *Subfams = new unsigned[uNodeCount];

	unsigned uSubfamCount;
	ClusterByHeight(tree, dMaxHeight, Subfams, &uSubfamCount);

	if (uSubfamCount > uMaxSubfamCount)
		ClusterBySubfamCount(tree, uMaxSubfamCount, Subfams, &uSubfamCount);

	*ptrptrSubfams = Subfams;
	*ptruSubfamCount = uSubfamCount;
	}

static void LogSubfams(const Tree &tree, const unsigned Subfams[],
  unsigned uSubfamCount)
	{
	const unsigned uNodeCount = tree.GetNodeCount();
	Log("%u subfamilies found\n", uSubfamCount);
	Log("Subfam  Sequence\n");
	Log("------  --------\n");
	unsigned *Leaves = new unsigned[uNodeCount];
	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
		{
		unsigned uSubfamNodeIndex = Subfams[uSubfamIndex];
		unsigned uLeafCount;
		GetLeaves(tree, uSubfamNodeIndex, Leaves, &uLeafCount);
		for (unsigned uLeafIndex = 0; uLeafIndex < uLeafCount; ++uLeafIndex)
			Log("%6u  %s\n", uSubfamIndex + 1, tree.GetLeafName(Leaves[uLeafIndex]));
		Log("\n");
		}
	delete[] Leaves;
	}

bool RefineSubfams(MSA &msa, const Tree &tree, unsigned uIters)
	{
	const unsigned uSeqCount = msa.GetSeqCount();
	if (uSeqCount < 3)
		return false;

	const double dMaxHeight = 0.6;
	const unsigned uMaxSubfamCount = 16;
	const unsigned uNodeCount = tree.GetNodeCount();

	unsigned *Subfams;
	unsigned uSubfamCount;
	GetSubfams(tree, dMaxHeight, uMaxSubfamCount, &Subfams, &uSubfamCount);
	assert(uSubfamCount <= uSeqCount);

	if (g_bVerbose)
		LogSubfams(tree, Subfams, uSubfamCount);

	MSA *SubfamMSAs = new MSA[uSubfamCount];
	unsigned *Leaves = new unsigned[uSeqCount];
	unsigned *Ids = new unsigned[uSeqCount];

	bool bAnyChanges = false;
	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
		{
		unsigned uSubfam = Subfams[uSubfamIndex];
		unsigned uLeafCount;
		GetLeaves(tree, uSubfam, Leaves, &uLeafCount);
		assert(uLeafCount <= uSeqCount);

		LeafIndexesToIds(tree, Leaves, uLeafCount, Ids);

		MSA &msaSubfam = SubfamMSAs[uSubfamIndex];
		MSASubsetByIds(msa, Ids, uLeafCount, msaSubfam);
		DeleteGappedCols(msaSubfam);

#if	TRACE
		Log("Subfam %u MSA=\n", uSubfamIndex);
		msaSubfam.LogMe();
#endif

		if (msaSubfam.GetSeqCount() <= 2)
			continue;

	// TODO /////////////////////////////////////////
	// Try using existing tree, may actually hurt to
	// re-estimate, may also be a waste of CPU & mem.
	/////////////////////////////////////////////////
		Tree SubfamTree;
		TreeFromMSA(msaSubfam, SubfamTree, g_Cluster2, g_Distance2, g_Root2);

		bool bAnyChangesThisSubfam;
		if (g_bAnchors)
			bAnyChangesThisSubfam = RefineVert(msaSubfam, SubfamTree, uIters);
		else
			bAnyChangesThisSubfam = RefineHoriz(msaSubfam, SubfamTree, uIters, false, false);
#if	TRACE
		Log("Subfam %u Changed %d\n", uSubfamIndex, bAnyChangesThisSubfam);
#endif
		if (bAnyChangesThisSubfam)
			bAnyChanges = true;
		}

	if (bAnyChanges)
		ProgressiveAlignSubfams(tree, Subfams, uSubfamCount, SubfamMSAs, msa);

	delete[] Leaves;
	delete[] Subfams;
	delete[] SubfamMSAs;

	return bAnyChanges;
	}

static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],
  unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa)
	{
	const unsigned uNodeCount = tree.GetNodeCount();

	bool *Ready = new bool[uNodeCount];
	MSA **MSAs = new MSA *[uNodeCount];
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		Ready[uNodeIndex] = false;
		MSAs[uNodeIndex] = 0;
		}

	for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
		{
		unsigned uNodeIndex = Subfams[uSubfamIndex];
		Ready[uNodeIndex] = true;
		MSA *ptrMSA = new MSA;
	// TODO: Wasteful copy, needs re-design
		ptrMSA->Copy(SubfamMSAs[uSubfamIndex]);
		MSAs[uNodeIndex] = ptrMSA;
		}

	for (unsigned uNodeIndex = tree.FirstDepthFirstNode();
	  NULL_NEIGHBOR != uNodeIndex;
	  uNodeIndex = tree.NextDepthFirstNode(uNodeIndex))
		{
		if (tree.IsLeaf(uNodeIndex))
			continue;

		unsigned uRight = tree.GetRight(uNodeIndex);
		unsigned uLeft = tree.GetLeft(uNodeIndex);
		if (!Ready[uRight] || !Ready[uLeft])
			continue;

		MSA *ptrLeft = MSAs[uLeft];
		MSA *ptrRight = MSAs[uRight];
		assert(ptrLeft != 0 && ptrRight != 0);

		MSA *ptrParent = new MSA;

		PWPath Path;
		AlignTwoMSAs(*ptrLeft, *ptrRight, *ptrParent, Path);

		MSAs[uNodeIndex] = ptrParent;
		Ready[uNodeIndex] = true;
		Ready[uLeft] = false;
		Ready[uRight] = false;

		delete MSAs[uLeft];
		delete MSAs[uRight];
		MSAs[uLeft] = 0;
		MSAs[uRight] = 0;
		}

#if	DEBUG
	{
	unsigned uReadyCount = 0;
	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
		{
		if (Ready[uNodeIndex])
			{
			assert(tree.IsRoot(uNodeIndex));
			++uReadyCount;
			assert(0 != MSAs[uNodeIndex]);
			}
		else
			assert(0 == MSAs[uNodeIndex]);
		}
	assert(1 == uReadyCount);
	}
#endif

	const unsigned uRoot = tree.GetRootNodeIndex();
	MSA *ptrRootAlignment = MSAs[uRoot];

	msa.Copy(*ptrRootAlignment);

	delete ptrRootAlignment;

#if	TRACE
	Log("After refine subfamilies, root alignment=\n");
	msa.LogMe();
#endif
	}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -