📄 refinesubfams.cpp
字号:
#include "muscle.h"
#include "msa.h"
#include "tree.h"
#include "clust.h"
#include "profile.h"
#include "pwpath.h"
#define TRACE 0
static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],
unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa);
// Identify subfamilies in a tree.
// Returns array of internal node indexes, one for each subfamily.
// First try is to select groups by height (which should approximate
// minimum percent identity), if this gives too many subfamilies then
// we cut at a point that gives the maximum allowed number of subfams.
static void GetSubfams(const Tree &tree, double dMaxHeight,
unsigned uMaxSubfamCount, unsigned **ptrptrSubfams, unsigned *ptruSubfamCount)
{
const unsigned uNodeCount = tree.GetNodeCount();
unsigned *Subfams = new unsigned[uNodeCount];
unsigned uSubfamCount;
ClusterByHeight(tree, dMaxHeight, Subfams, &uSubfamCount);
if (uSubfamCount > uMaxSubfamCount)
ClusterBySubfamCount(tree, uMaxSubfamCount, Subfams, &uSubfamCount);
*ptrptrSubfams = Subfams;
*ptruSubfamCount = uSubfamCount;
}
static void LogSubfams(const Tree &tree, const unsigned Subfams[],
unsigned uSubfamCount)
{
const unsigned uNodeCount = tree.GetNodeCount();
Log("%u subfamilies found\n", uSubfamCount);
Log("Subfam Sequence\n");
Log("------ --------\n");
unsigned *Leaves = new unsigned[uNodeCount];
for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
{
unsigned uSubfamNodeIndex = Subfams[uSubfamIndex];
unsigned uLeafCount;
GetLeaves(tree, uSubfamNodeIndex, Leaves, &uLeafCount);
for (unsigned uLeafIndex = 0; uLeafIndex < uLeafCount; ++uLeafIndex)
Log("%6u %s\n", uSubfamIndex + 1, tree.GetLeafName(Leaves[uLeafIndex]));
Log("\n");
}
delete[] Leaves;
}
bool RefineSubfams(MSA &msa, const Tree &tree, unsigned uIters)
{
const unsigned uSeqCount = msa.GetSeqCount();
if (uSeqCount < 3)
return false;
const double dMaxHeight = 0.6;
const unsigned uMaxSubfamCount = 16;
const unsigned uNodeCount = tree.GetNodeCount();
unsigned *Subfams;
unsigned uSubfamCount;
GetSubfams(tree, dMaxHeight, uMaxSubfamCount, &Subfams, &uSubfamCount);
assert(uSubfamCount <= uSeqCount);
if (g_bVerbose)
LogSubfams(tree, Subfams, uSubfamCount);
MSA *SubfamMSAs = new MSA[uSubfamCount];
unsigned *Leaves = new unsigned[uSeqCount];
unsigned *Ids = new unsigned[uSeqCount];
bool bAnyChanges = false;
for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
{
unsigned uSubfam = Subfams[uSubfamIndex];
unsigned uLeafCount;
GetLeaves(tree, uSubfam, Leaves, &uLeafCount);
assert(uLeafCount <= uSeqCount);
LeafIndexesToIds(tree, Leaves, uLeafCount, Ids);
MSA &msaSubfam = SubfamMSAs[uSubfamIndex];
MSASubsetByIds(msa, Ids, uLeafCount, msaSubfam);
DeleteGappedCols(msaSubfam);
#if TRACE
Log("Subfam %u MSA=\n", uSubfamIndex);
msaSubfam.LogMe();
#endif
if (msaSubfam.GetSeqCount() <= 2)
continue;
// TODO /////////////////////////////////////////
// Try using existing tree, may actually hurt to
// re-estimate, may also be a waste of CPU & mem.
/////////////////////////////////////////////////
Tree SubfamTree;
TreeFromMSA(msaSubfam, SubfamTree, g_Cluster2, g_Distance2, g_Root2);
bool bAnyChangesThisSubfam;
if (g_bAnchors)
bAnyChangesThisSubfam = RefineVert(msaSubfam, SubfamTree, uIters);
else
bAnyChangesThisSubfam = RefineHoriz(msaSubfam, SubfamTree, uIters, false, false);
#if TRACE
Log("Subfam %u Changed %d\n", uSubfamIndex, bAnyChangesThisSubfam);
#endif
if (bAnyChangesThisSubfam)
bAnyChanges = true;
}
if (bAnyChanges)
ProgressiveAlignSubfams(tree, Subfams, uSubfamCount, SubfamMSAs, msa);
delete[] Leaves;
delete[] Subfams;
delete[] SubfamMSAs;
return bAnyChanges;
}
static void ProgressiveAlignSubfams(const Tree &tree, const unsigned Subfams[],
unsigned uSubfamCount, const MSA SubfamMSAs[], MSA &msa)
{
const unsigned uNodeCount = tree.GetNodeCount();
bool *Ready = new bool[uNodeCount];
MSA **MSAs = new MSA *[uNodeCount];
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
{
Ready[uNodeIndex] = false;
MSAs[uNodeIndex] = 0;
}
for (unsigned uSubfamIndex = 0; uSubfamIndex < uSubfamCount; ++uSubfamIndex)
{
unsigned uNodeIndex = Subfams[uSubfamIndex];
Ready[uNodeIndex] = true;
MSA *ptrMSA = new MSA;
// TODO: Wasteful copy, needs re-design
ptrMSA->Copy(SubfamMSAs[uSubfamIndex]);
MSAs[uNodeIndex] = ptrMSA;
}
for (unsigned uNodeIndex = tree.FirstDepthFirstNode();
NULL_NEIGHBOR != uNodeIndex;
uNodeIndex = tree.NextDepthFirstNode(uNodeIndex))
{
if (tree.IsLeaf(uNodeIndex))
continue;
unsigned uRight = tree.GetRight(uNodeIndex);
unsigned uLeft = tree.GetLeft(uNodeIndex);
if (!Ready[uRight] || !Ready[uLeft])
continue;
MSA *ptrLeft = MSAs[uLeft];
MSA *ptrRight = MSAs[uRight];
assert(ptrLeft != 0 && ptrRight != 0);
MSA *ptrParent = new MSA;
PWPath Path;
AlignTwoMSAs(*ptrLeft, *ptrRight, *ptrParent, Path);
MSAs[uNodeIndex] = ptrParent;
Ready[uNodeIndex] = true;
Ready[uLeft] = false;
Ready[uRight] = false;
delete MSAs[uLeft];
delete MSAs[uRight];
MSAs[uLeft] = 0;
MSAs[uRight] = 0;
}
#if DEBUG
{
unsigned uReadyCount = 0;
for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
{
if (Ready[uNodeIndex])
{
assert(tree.IsRoot(uNodeIndex));
++uReadyCount;
assert(0 != MSAs[uNodeIndex]);
}
else
assert(0 == MSAs[uNodeIndex]);
}
assert(1 == uReadyCount);
}
#endif
const unsigned uRoot = tree.GetRootNodeIndex();
MSA *ptrRootAlignment = MSAs[uRoot];
msa.Copy(*ptrRootAlignment);
delete ptrRootAlignment;
#if TRACE
Log("After refine subfamilies, root alignment=\n");
msa.LogMe();
#endif
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -