📄 objscoreda.cpp
字号:
#include "muscle.h"
#include "msa.h"
#include "profile.h"
#include "objscore.h"
#if DOUBLE_AFFINE
#define TRACE 0
#define TEST_SPFAST 0
static SCORE GapPenalty(unsigned uLength, bool Term, SCORE g, SCORE e)
{
//if (Term)
// {
// switch (g_TermGap)
// {
// case TERMGAP_Full:
// return g + (uLength - 1)*e;
// case TERMGAP_Half:
// return g/2 + (uLength - 1)*e;
// case TERMGAP_Ext:
// return uLength*e;
// }
// Quit("Bad termgap");
// }
//else
// return g + (uLength - 1)*e;
//return MINUS_INFINITY;
return g + (uLength - 1)*e;
}
static SCORE GapPenalty(unsigned uLength, bool Term)
{
SCORE s1 = GapPenalty(uLength, Term, g_scoreGapOpen, g_scoreGapExtend);
#if DOUBLE_AFFINE
SCORE s2 = GapPenalty(uLength, Term, g_scoreGapOpen2, g_scoreGapExtend2);
if (s1 > s2)
return s1;
return s2;
#else
return s1;
#endif
}
static const MSA *g_ptrMSA1;
static const MSA *g_ptrMSA2;
static unsigned g_uSeqIndex1;
static unsigned g_uSeqIndex2;
static void LogGap(unsigned uStart, unsigned uEnd, unsigned uGapLength,
bool bNTerm, bool bCTerm)
{
Log("%16.16s ", "");
for (unsigned i = 0; i < uStart; ++i)
Log(" ");
unsigned uMyLength = 0;
for (unsigned i = uStart; i <= uEnd; ++i)
{
bool bGap1 = g_ptrMSA1->IsGap(g_uSeqIndex1, i);
bool bGap2 = g_ptrMSA2->IsGap(g_uSeqIndex2, i);
if (!bGap1 && !bGap2)
Quit("Error -- neither gapping");
if (bGap1 && bGap2)
Log(".");
else
{
++uMyLength;
Log("-");
}
}
SCORE s = GapPenalty(uGapLength, bNTerm || bCTerm);
Log(" L=%d N%d C%d s=%.3g", uGapLength, bNTerm, bCTerm, s);
Log("\n");
if (uMyLength != uGapLength)
Quit("Lengths differ");
}
static SCORE ScoreSeqPair(const MSA &msa1, unsigned uSeqIndex1,
const MSA &msa2, unsigned uSeqIndex2, SCORE *ptrLetters, SCORE *ptrGaps)
{
g_ptrMSA1 = &msa1;
g_ptrMSA2 = &msa2;
g_uSeqIndex1 = uSeqIndex1;
g_uSeqIndex2 = uSeqIndex2;
const unsigned uColCount = msa1.GetColCount();
const unsigned uColCount2 = msa2.GetColCount();
if (uColCount != uColCount2)
Quit("ScoreSeqPair, different lengths");
#if TRACE
Log("ScoreSeqPair\n");
Log("%16.16s ", msa1.GetSeqName(uSeqIndex1));
for (unsigned i = 0; i < uColCount; ++i)
Log("%c", msa1.GetChar(uSeqIndex1, i));
Log("\n");
Log("%16.16s ", msa2.GetSeqName(uSeqIndex2));
for (unsigned i = 0; i < uColCount; ++i)
Log("%c", msa1.GetChar(uSeqIndex2, i));
Log("\n");
#endif
SCORE scoreTotal = 0;
// Substitution scores
unsigned uFirstLetter1 = uInsane;
unsigned uFirstLetter2 = uInsane;
unsigned uLastLetter1 = uInsane;
unsigned uLastLetter2 = uInsane;
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
{
bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
bool bWildcard1 = msa1.IsWildcard(uSeqIndex1, uColIndex);
bool bWildcard2 = msa2.IsWildcard(uSeqIndex2, uColIndex);
if (!bGap1)
{
if (uInsane == uFirstLetter1)
uFirstLetter1 = uColIndex;
uLastLetter1 = uColIndex;
}
if (!bGap2)
{
if (uInsane == uFirstLetter2)
uFirstLetter2 = uColIndex;
uLastLetter2 = uColIndex;
}
if (bGap1 || bGap2 || bWildcard1 || bWildcard2)
continue;
unsigned uLetter1 = msa1.GetLetter(uSeqIndex1, uColIndex);
unsigned uLetter2 = msa2.GetLetter(uSeqIndex2, uColIndex);
SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
scoreTotal += scoreMatch;
#if TRACE
Log("%c <-> %c = %7.1f %10.1f\n",
msa1.GetChar(uSeqIndex1, uColIndex),
msa2.GetChar(uSeqIndex2, uColIndex),
scoreMatch,
scoreTotal);
#endif
}
*ptrLetters = scoreTotal;
// Gap penalties
unsigned uGapLength = uInsane;
unsigned uGapStartCol = uInsane;
bool bGapping1 = false;
bool bGapping2 = false;
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
{
bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
if (bGap1 && bGap2)
continue;
if (bGapping1)
{
if (bGap1)
++uGapLength;
else
{
bGapping1 = false;
bool bNTerm = (uFirstLetter2 == uGapStartCol);
bool bCTerm = (uLastLetter2 + 1 == uColIndex);
SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
scoreTotal += scoreGap;
#if TRACE
LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
Log("GAP %7.1f %10.1f\n",
scoreGap,
scoreTotal);
#endif
}
continue;
}
else
{
if (bGap1)
{
uGapStartCol = uColIndex;
bGapping1 = true;
uGapLength = 1;
continue;
}
}
if (bGapping2)
{
if (bGap2)
++uGapLength;
else
{
bGapping2 = false;
bool bNTerm = (uFirstLetter1 == uGapStartCol);
bool bCTerm = (uLastLetter1 + 1 == uColIndex);
SCORE scoreGap = GapPenalty(uGapLength, bNTerm || bCTerm);
scoreTotal += scoreGap;
#if TRACE
LogGap(uGapStartCol, uColIndex - 1, uGapLength, bNTerm, bCTerm);
Log("GAP %7.1f %10.1f\n",
scoreGap,
scoreTotal);
#endif
}
}
else
{
if (bGap2)
{
uGapStartCol = uColIndex;
bGapping2 = true;
uGapLength = 1;
}
}
}
if (bGapping1 || bGapping2)
{
SCORE scoreGap = GapPenalty(uGapLength, true);
scoreTotal += scoreGap;
#if TRACE
LogGap(uGapStartCol, uColCount - 1, uGapLength, false, true);
Log("GAP %7.1f %10.1f\n",
scoreGap,
scoreTotal);
#endif
}
*ptrGaps = scoreTotal - *ptrLetters;
return scoreTotal;
}
// The usual sum-of-pairs objective score: sum the score
// of the alignment of each pair of sequences.
SCORE ObjScoreDA(const MSA &msa, SCORE *ptrLetters, SCORE *ptrGaps)
{
const unsigned uSeqCount = msa.GetSeqCount();
SCORE scoreTotal = 0;
unsigned uPairCount = 0;
#if TRACE
msa.LogMe();
Log(" Score Weight Weight Total\n");
Log("---------- ------ ------ ----------\n");
#endif
SCORE TotalLetters = 0;
SCORE TotalGaps = 0;
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
{
const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
{
const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
const WEIGHT w = w1*w2;
SCORE Letters;
SCORE Gaps;
SCORE scorePair = ScoreSeqPair(msa, uSeqIndex1, msa, uSeqIndex2,
&Letters, &Gaps);
scoreTotal += w1*w2*scorePair;
TotalLetters += w1*w2*Letters;
TotalGaps += w1*w2*Gaps;
++uPairCount;
#if TRACE
Log("%10.2f %6.3f %6.3f %10.2f %d=%s %d=%s\n",
scorePair,
w1,
w2,
scorePair*w1*w2,
uSeqIndex1,
msa.GetSeqName(uSeqIndex1),
uSeqIndex2,
msa.GetSeqName(uSeqIndex2));
#endif
}
}
*ptrLetters = TotalLetters;
*ptrGaps = TotalGaps;
return scoreTotal;
}
#endif // DOUBLE_AFFINE
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -