⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 aligngivenpath.cpp

📁 unix,linux下编译。用于蛋白质
💻 CPP
字号:
#include "muscle.h"
#include "msa.h"
#include "pwpath.h"
#include "profile.h"

#define	TRACE	0

static void LogPP(const ProfPos &PP)
	{
	Log("ResidueGroup   %u\n", PP.m_uResidueGroup);
	Log("AllGaps      %d\n", PP.m_bAllGaps);
	Log("Occ          %.3g\n", PP.m_fOcc);
	Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG);
	Log("Freqs        ");
	for (unsigned i = 0; i < 20; ++i)
		if (PP.m_fcCounts[i] > 0)
			Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]);
	Log("\n");
	}

static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i)
	{
	const ProfPos &PPA = PA[i];
	const ProfPos &PPB = PB[i];
#define	eq(x)	if (PPA.m_##x != PPB.m_##x) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
#define be(x)	if (!BTEq(PPA.m_##x, PPB.m_##x)) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }
	eq(bAllGaps)
	eq(uResidueGroup)

	be(LL)
	be(LG)
	be(GL)
	be(GG)
	be(fOcc)
	be(scoreGapOpen)
	be(scoreGapClose)

	for (unsigned j = 0; j < 20; ++j)
		{
#define	eqj(x)	if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j);
#define bej(x)	if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j);
		bej(fcCounts[j]);
//		eqj(uSortOrder[j]) // may differ due to ties, don't check?
		bej(AAScores[j])
#undef eqj
#undef bej
		}
#undef eq
#undef be
	}

void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,
  unsigned uLengthB)
	{
	if (uLengthA != uLengthB)
		Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB);
	for (unsigned i = 0; i < uLengthB; ++i)
		AssertProfPosEq(PA, PB, i);
	}

#if	DEBUG
static void ValidateProf(const ProfPos *Prof, unsigned uLength)
	{
	for (unsigned i = 0; i < uLength; ++i)
		{
		const ProfPos &PP = Prof[i];

		FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG;
		assert(BTEq(s1, 1.0));

		if (i > 0)
			{
			const ProfPos &PPPrev = Prof[i-1];
			FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL;
			FCOUNT s3 = PP.m_LL + PP.m_LG;
			assert(BTEq(s2, s3));
			}
		if (i < uLength - 1)
			{
			const ProfPos &PPNext = Prof[i+1];
			FCOUNT s4 = PP.m_LL + PP.m_GL;
			FCOUNT s5 = PPNext.m_LL + PPNext.m_LG;
			assert(BTEq(s4, s5));
			}
		}
	}
#else
#define ValidateProf(Prof, Length)	/* empty */
#endif

static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos)
	{
	ProfPos &PP = Prof[uPos];
	SortCounts(PP.m_fcCounts, PP.m_uSortOrder);
	PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);

// "Occupancy"
	PP.m_fOcc = PP.m_LL + PP.m_GL;

// Frequency of gap-opens in this position (i)
// Gap open 	= letter in i-1 and gap in i
//				= iff LG in i
	FCOUNT fcOpen = PP.m_LG;

// Frequency of gap-closes in this position
// Gap close	= gap in i and letter in i+1
//				= iff GL in i+1
	FCOUNT fcClose;
	if (uPos + 1 < uLength)
		fcClose = Prof[uPos + 1].m_GL;
	else
		fcClose = PP.m_GG + PP.m_LG;

	PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0);
	PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0);
#if	DOUBLE_AFFINE
	PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0);
	PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0);
#endif

	for (unsigned i = 0; i < g_AlphaSize; ++i)
		{
		SCORE scoreSum = 0;
		for (unsigned j = 0; j < g_AlphaSize; ++j)
			scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];
		PP.m_AAScores[i] = scoreSum;
		}
	}

void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength)
	{
	for (unsigned i = 0; i < uLength; ++i)
		ScoresFromFreqsPos(Prof, uLength, i);
	}

static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,
  unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
  unsigned &uColIndexCombined)
	{
#if	TRACE
	Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",
	  uColIndexA, uColIndexCombined);
#endif
	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		{
		char c = msaA.GetChar(uSeqIndexA, uColIndexA);
		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
		}

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');

	++uColIndexCombined;
	++uColIndexA;
	}

static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,
  unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,
  unsigned &uColIndexCombined)
	{
#if	TRACE
	Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",
	  uColIndexB, uColIndexCombined);
#endif
	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		{
		char c = msaB.GetChar(uSeqIndexB, uColIndexB);
		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
		}

	++uColIndexCombined;
	++uColIndexB;
	}

static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,
  const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,
  unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)
	{
#if	TRACE
	Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
	  uColIndexA, uColIndexB, uColIndexCombined);
#endif
	const unsigned uLengthA = msaA.GetColCount();
	const unsigned uLengthB = msaB.GetColCount();

	unsigned uNewColCount = uColCountA;
	if (uColCountB > uNewColCount)
		uNewColCount = uColCountB;

	for (unsigned n = 0; n < uColCountA; ++n)
		{
		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
			{
			char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);
			c = UnalignChar(c);
			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);
			}
		}
	for (unsigned n = uColCountA; n < uNewColCount; ++n)
		{
		for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
			msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');
		}

	for (unsigned n = 0; n < uColCountB; ++n)
		{
		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
			{
			char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);
			c = UnalignChar(c);
			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);
			}
		}
	for (unsigned n = uColCountB; n < uNewColCount; ++n)
		{
		for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
			msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');
		}

	uColIndexCombined += uNewColCount;
	uColIndexA += uColCountA;
	uColIndexB += uColCountB;
	}

static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,
  unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,
  MSA &msaCombined, unsigned &uColIndexCombined)
	{
#if	TRACE
	Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",
	  uColIndexA, uColIndexB, uColIndexCombined);
#endif

	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		{
		char c = msaA.GetChar(uSeqIndexA, uColIndexA);
		msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);
		}

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		{
		char c = msaB.GetChar(uSeqIndexB, uColIndexB);
		msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);
		}

	++uColIndexA;
	++uColIndexB;
	++uColIndexCombined;
	}

void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,
  MSA &msaCombined)
	{
	msaCombined.Clear();

#if	TRACE
	Log("FastAlignProfiles\n");
	Log("Template A:\n");
	msaA.LogMe();
	Log("Template B:\n");
	msaB.LogMe();
#endif

	const unsigned uColCountA = msaA.GetColCount();
	const unsigned uColCountB = msaB.GetColCount();

	const unsigned uSeqCountA = msaA.GetSeqCount();
	const unsigned uSeqCountB = msaB.GetSeqCount();

	msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);

// Copy sequence names into combined MSA
	for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)
		{
		msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));
		msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));
		}

	for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)
		{
		msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));
		msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));
		}

	unsigned uColIndexA = 0;
	unsigned uColIndexB = 0;
	unsigned uColIndexCombined = 0;
	const unsigned uEdgeCount = Path.GetEdgeCount();
	for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)
		{
		const PWEdge &Edge = Path.GetEdge(uEdgeIndex);
#if	TRACE
		Log("\nEdge %u %c%u.%u\n",
		  uEdgeIndex,
		  Edge.cType,
		  Edge.uPrefixLengthA,
		  Edge.uPrefixLengthB);
#endif
		const char cType = Edge.cType;
		const unsigned uPrefixLengthA = Edge.uPrefixLengthA;
		unsigned uColCountA = 0;
		if (uPrefixLengthA > 0)
			{
			const unsigned uNodeIndexA = uPrefixLengthA - 1;
			const unsigned uTplColIndexA = uNodeIndexA;
			if (uTplColIndexA > uColIndexA)
				uColCountA = uTplColIndexA - uColIndexA;
			}

		const unsigned uPrefixLengthB = Edge.uPrefixLengthB;
		unsigned uColCountB = 0;
		if (uPrefixLengthB > 0)
			{
			const unsigned uNodeIndexB = uPrefixLengthB - 1;
			const unsigned uTplColIndexB = uNodeIndexB;
			if (uTplColIndexB > uColIndexB)
				uColCountB = uTplColIndexB - uColIndexB;
			}

// TODO: This code looks like a hangover from HMM estimation -- can we delete it?
		assert(uColCountA == 0);
		assert(uColCountB == 0);
		AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,
		  uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);

		switch (cType)
			{
		case 'M':
			{
			assert(uPrefixLengthA > 0);
			assert(uPrefixLengthB > 0);
			const unsigned uColA = uPrefixLengthA - 1;
			const unsigned uColB = uPrefixLengthB - 1;
			assert(uColIndexA == uColA);
			assert(uColIndexB == uColB);
			AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,
			  msaCombined, uColIndexCombined);
			break;
			}
		case 'D':
			{
			assert(uPrefixLengthA > 0);
			const unsigned uColA = uPrefixLengthA - 1;
			assert(uColIndexA == uColA);
			AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
			break;
			}
		case 'I':
			{
			assert(uPrefixLengthB > 0);
			const unsigned uColB = uPrefixLengthB - 1;
			assert(uColIndexB == uColB);
			AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);
			break;
			}
		default:
			assert(false);
			}
		}
	unsigned uInsertColCountA = uColCountA - uColIndexA;
	unsigned uInsertColCountB = uColCountB - uColIndexB;

// TODO: This code looks like a hangover from HMM estimation -- can we delete it?
	assert(uInsertColCountA == 0);
	assert(uInsertColCountB == 0);
	AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,
	  uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);

	assert(msaCombined.GetColCount() == uEdgeCount);
	}

static const ProfPos PPStart =
	{
	false,		//m_bAllGaps;
	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21];
	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20];
	1.0,	// m_LL;
	0.0,	// m_LG;
	0.0,	// m_GL;
	0.0,	// m_GG;
	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores
	0,		// m_uResidueGroup;
	1.0,	// m_fOcc;
	0.0,	// m_fcStartOcc;
	0.0,	// m_fcEndOcc;
	0.0,	// m_scoreGapOpen;
	0.0,	// m_scoreGapClose;
	};

// MM
//  Ai

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -