📄 objscore2.cpp
字号:
#include "muscle.h"
#include "msa.h"
#include "profile.h"
#include "objscore.h"
#define TRACE 0
#define TRACE_SEQPAIR 0
#define TEST_SPFAST 0
extern SCOREMATRIX VTML_LA;
extern SCOREMATRIX PAM200;
extern SCOREMATRIX PAM200NoCenter;
extern SCOREMATRIX VTML_SP;
extern SCOREMATRIX VTML_SPNoCenter;
extern SCOREMATRIX NUC_SP;
SCORE g_SPScoreLetters;
SCORE g_SPScoreGaps;
static SCORE TermGapScore(bool Gap)
{
switch (g_TermGaps)
{
case TERMGAPS_Full:
return 0;
case TERMGAPS_Half:
if (Gap)
return g_scoreGapOpen/2;
return 0;
case TERMGAPS_Ext:
if (Gap)
return g_scoreGapExtend;
return 0;
}
Quit("TermGapScore?!");
return 0;
}
SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,
const MSA &msa2, unsigned uSeqIndex2)
{
const unsigned uColCount = msa1.GetColCount();
const unsigned uColCount2 = msa2.GetColCount();
if (uColCount != uColCount2)
Quit("ScoreSeqPairLetters, different lengths");
#if TRACE_SEQPAIR
{
Log("\n");
Log("ScoreSeqPairLetters\n");
MSA msaTmp;
msaTmp.SetSize(2, uColCount);
msaTmp.CopySeq(0, msa1, uSeqIndex1);
msaTmp.CopySeq(1, msa2, uSeqIndex2);
msaTmp.LogMe();
}
#endif
SCORE scoreLetters = 0;
SCORE scoreGaps = 0;
bool bGapping1 = false;
bool bGapping2 = false;
unsigned uColStart = 0;
bool bLeftTermGap = false;
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
{
bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
if (!bGap1 || !bGap2)
{
if (bGap1 || bGap2)
bLeftTermGap = true;
uColStart = uColIndex;
break;
}
}
unsigned uColEnd = uColCount - 1;
bool bRightTermGap = false;
for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
{
bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
if (!bGap1 || !bGap2)
{
if (bGap1 || bGap2)
bRightTermGap = true;
uColEnd = (unsigned) iColIndex;
break;
}
}
#if TRACE_SEQPAIR
Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif
for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
{
unsigned uLetter1 = msa1.GetLetterEx(uSeqIndex1, uColIndex);
if (uLetter1 >= g_AlphaSize)
continue;
unsigned uLetter2 = msa2.GetLetterEx(uSeqIndex2, uColIndex);
if (uLetter2 >= g_AlphaSize)
continue;
SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];
scoreLetters += scoreMatch;
}
return scoreLetters;
}
SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,
const MSA &msa2, unsigned uSeqIndex2)
{
const unsigned uColCount = msa1.GetColCount();
const unsigned uColCount2 = msa2.GetColCount();
if (uColCount != uColCount2)
Quit("ScoreSeqPairGaps, different lengths");
#if TRACE_SEQPAIR
{
Log("\n");
Log("ScoreSeqPairGaps\n");
MSA msaTmp;
msaTmp.SetSize(2, uColCount);
msaTmp.CopySeq(0, msa1, uSeqIndex1);
msaTmp.CopySeq(1, msa2, uSeqIndex2);
msaTmp.LogMe();
}
#endif
SCORE scoreGaps = 0;
bool bGapping1 = false;
bool bGapping2 = false;
unsigned uColStart = 0;
bool bLeftTermGap = false;
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
{
bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
if (!bGap1 || !bGap2)
{
if (bGap1 || bGap2)
bLeftTermGap = true;
uColStart = uColIndex;
break;
}
}
unsigned uColEnd = uColCount - 1;
bool bRightTermGap = false;
for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)
{
bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);
bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);
if (!bGap1 || !bGap2)
{
if (bGap1 || bGap2)
bRightTermGap = true;
uColEnd = (unsigned) iColIndex;
break;
}
}
#if TRACE_SEQPAIR
Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);
#endif
for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)
{
bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);
bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);
if (bGap1 && bGap2)
continue;
if (bGap1)
{
if (!bGapping1)
{
#if TRACE_SEQPAIR
Log("Gap open seq 1 col %d\n", uColIndex);
#endif
if (uColIndex == uColStart)
scoreGaps += TermGapScore(true);
else
scoreGaps += g_scoreGapOpen;
bGapping1 = true;
}
else
scoreGaps += g_scoreGapExtend;
continue;
}
else if (bGap2)
{
if (!bGapping2)
{
#if TRACE_SEQPAIR
Log("Gap open seq 2 col %d\n", uColIndex);
#endif
if (uColIndex == uColStart)
scoreGaps += TermGapScore(true);
else
scoreGaps += g_scoreGapOpen;
bGapping2 = true;
}
else
scoreGaps += g_scoreGapExtend;
continue;
}
bGapping1 = false;
bGapping2 = false;
}
if (bGapping1 || bGapping2)
{
scoreGaps -= g_scoreGapOpen;
scoreGaps += TermGapScore(true);
}
return scoreGaps;
}
// The usual sum-of-pairs objective score: sum the score
// of the alignment of each pair of sequences.
SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[])
{
#if TRACE
Log("==================ObjScoreSP==============\n");
Log("msa=\n");
msa.LogMe();
#endif
g_SPScoreLetters = 0;
g_SPScoreGaps = 0;
if (0 != MatchScore)
{
const unsigned uColCount = msa.GetColCount();
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
MatchScore[uColIndex] = 0;
}
const unsigned uSeqCount = msa.GetSeqCount();
SCORE scoreTotal = 0;
unsigned uPairCount = 0;
#if TRACE
Log("Seq1 Seq2 wt1 wt2 Letters Gaps Unwt.Score Wt.Score Total\n");
Log("---- ---- ------ ------ ---------- ---------- ---------- ---------- ----------\n");
#endif
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
{
const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)
{
const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
const WEIGHT w = w1*w2;
SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2);
SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2);
SCORE scorePair = scoreLetters + scoreGaps;
++uPairCount;
scoreTotal += w*scorePair;
g_SPScoreLetters += w*scoreLetters;
g_SPScoreGaps += w*scoreGaps;
#if TRACE
Log("%4d %4d %6.3f %6.3f %10.2f %10.2f %10.2f %10.2f %10.2f >%s >%s\n",
uSeqIndex1,
uSeqIndex2,
w1,
w2,
scoreLetters,
scoreGaps,
scorePair,
scorePair*w1*w2,
scoreTotal,
msa.GetSeqName(uSeqIndex1),
msa.GetSeqName(uSeqIndex2));
#endif
}
}
#if TEST_SPFAST
{
SCORE f = ObjScoreSPFast(msa);
Log("Fast = %.6g\n", f);
Log("Brute = %.6g\n", scoreTotal);
if (BTEq(f, scoreTotal))
Log("Agree\n");
else
Log("** DISAGREE **\n");
}
#endif
// return scoreTotal / uPairCount;
return scoreTotal;
}
// Objective score defined as the dynamic programming score.
// Input is two alignments, which must be of the same length.
// Result is the same profile-profile score that is optimized
// by dynamic programming.
SCORE ObjScoreDP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[])
{
const unsigned uColCount = msa1.GetColCount();
if (msa2.GetColCount() != uColCount)
Quit("ObjScoreDP, must be same length");
const unsigned uColCount1 = msa1.GetColCount();
const unsigned uColCount2 = msa2.GetColCount();
const ProfPos *PA = ProfileFromMSA(msa1);
const ProfPos *PB = ProfileFromMSA(msa2);
return ObjScoreDP_Profs(PA, PB, uColCount1, MatchScore);
}
SCORE ObjScoreDP_Profs(const ProfPos *PA, const ProfPos *PB, unsigned uColCount,
SCORE MatchScore[])
{
//#if TRACE
// Log("Profile 1:\n");
// ListProfile(PA, uColCount, &msa1);
//
// Log("Profile 2:\n");
// ListProfile(PB, uColCount, &msa2);
//#endif
SCORE scoreTotal = 0;
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
{
const ProfPos &PPA = PA[uColIndex];
const ProfPos &PPB = PB[uColIndex];
SCORE scoreGap = 0;
SCORE scoreMatch = 0;
// If gapped column...
if (PPA.m_bAllGaps && PPB.m_bAllGaps)
scoreGap = 0;
else if (PPA.m_bAllGaps)
{
if (uColCount - 1 == uColIndex || !PA[uColIndex+1].m_bAllGaps)
scoreGap = PPB.m_scoreGapClose;
if (0 == uColIndex || !PA[uColIndex-1].m_bAllGaps)
scoreGap += PPB.m_scoreGapOpen;
//if (0 == scoreGap)
// scoreGap = PPB.m_scoreGapExtend;
}
else if (PPB.m_bAllGaps)
{
if (uColCount - 1 == uColIndex || !PB[uColIndex+1].m_bAllGaps)
scoreGap = PPA.m_scoreGapClose;
if (0 == uColIndex || !PB[uColIndex-1].m_bAllGaps)
scoreGap += PPA.m_scoreGapOpen;
//if (0 == scoreGap)
// scoreGap = PPA.m_scoreGapExtend;
}
else
scoreMatch = ScoreProfPos2(PPA, PPB);
if (0 != MatchScore)
MatchScore[uColIndex] = scoreMatch;
scoreTotal += scoreMatch + scoreGap;
extern bool g_bTracePPScore;
extern MSA *g_ptrPPScoreMSA1;
extern MSA *g_ptrPPScoreMSA2;
if (g_bTracePPScore)
{
const MSA &msa1 = *g_ptrPPScoreMSA1;
const MSA &msa2 = *g_ptrPPScoreMSA2;
const unsigned uSeqCount1 = msa1.GetSeqCount();
const unsigned uSeqCount2 = msa2.GetSeqCount();
for (unsigned n = 0; n < uSeqCount1; ++n)
Log("%c", msa1.GetChar(n, uColIndex));
Log(" ");
for (unsigned n = 0; n < uSeqCount2; ++n)
Log("%c", msa2.GetChar(n, uColIndex));
Log(" %10.3f", scoreMatch);
if (scoreGap != 0)
Log(" %10.3f", scoreGap);
Log("\n");
}
}
delete[] PA;
delete[] PB;
return scoreTotal;
}
// Objective score defined as the sum of profile-sequence
// scores for each sequence in the alignment. The profile
// is computed from the entire alignment, so this includes
// the score of each sequence against itself. This is to
// avoid recomputing the profile each time, so we reduce
// complexity but introduce a questionable approximation.
// The goal is to see if we can exploit the apparent
// improvement in performance of log-expectation score
// over the usual sum-of-pairs by optimizing this
// objective score in the iterative refinement stage.
SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[])
{
if (g_PPScore != PPSCORE_LE)
Quit("FastScoreMSA_LASimple: LA");
const unsigned uSeqCount = msa.GetSeqCount();
const unsigned uColCount = msa.GetColCount();
const ProfPos *Prof = ProfileFromMSA(msa);
if (0 != MatchScore)
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
MatchScore[uColIndex] = 0;
SCORE scoreTotal = 0;
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
{
const WEIGHT weightSeq = msa.GetSeqWeight(uSeqIndex);
SCORE scoreSeq = 0;
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
{
const ProfPos &PP = Prof[uColIndex];
if (msa.IsGap(uSeqIndex, uColIndex))
{
bool bOpen = (0 == uColIndex ||
!msa.IsGap(uSeqIndex, uColIndex - 1));
bool bClose = (uColCount - 1 == uColIndex ||
!msa.IsGap(uSeqIndex, uColIndex + 1));
if (bOpen)
scoreSeq += PP.m_scoreGapOpen;
if (bClose)
scoreSeq += PP.m_scoreGapClose;
//if (!bOpen && !bClose)
// scoreSeq += PP.m_scoreGapExtend;
}
else if (msa.IsWildcard(uSeqIndex, uColIndex))
continue;
else
{
unsigned uLetter = msa.GetLetter(uSeqIndex, uColIndex);
const SCORE scoreMatch = PP.m_AAScores[uLetter];
if (0 != MatchScore)
MatchScore[uColIndex] += weightSeq*scoreMatch;
scoreSeq += scoreMatch;
}
}
scoreTotal += weightSeq*scoreSeq;
}
delete[] Prof;
return scoreTotal;
}
// The XP score is the sum of the score of each pair of
// sequences between two profiles which are aligned to
// each other. Notice that for two given profiles aligned
// in different ways, the difference in XP score must be
// the same as the difference in SP score because the
// score of a pair of sequences in one profile doesn't
// depend on the alignment.
SCORE ObjScoreXP(const MSA &msa1, const MSA &msa2)
{
const unsigned uColCount1 = msa1.GetColCount();
const unsigned uColCount2 = msa2.GetColCount();
if (uColCount1 != uColCount2)
Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2);
const unsigned uSeqCount1 = msa1.GetSeqCount();
const unsigned uSeqCount2 = msa2.GetSeqCount();
#if TRACE
Log(" Score Weight Weight Total\n");
Log("---------- ------ ------ ----------\n");
#endif
SCORE scoreTotal = 0;
unsigned uPairCount = 0;
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)
{
const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1);
for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)
{
const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2);
const WEIGHT w = w1*w2;
SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2);
SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2);
SCORE scorePair = scoreLetters + scoreGaps;
scoreTotal += w1*w2*scorePair;
++uPairCount;
#if TRACE
Log("%10.2f %6.3f %6.3f %10.2f >%s >%s\n",
scorePair,
w1,
w2,
scorePair*w1*w2,
msa1.GetSeqName(uSeqIndex1),
msa2.GetSeqName(uSeqIndex2));
#endif
}
}
if (0 == uPairCount)
Quit("0 == uPairCount");
#if TRACE
Log("msa1=\n");
msa1.LogMe();
Log("msa2=\n");
msa2.LogMe();
Log("XP=%g\n", scoreTotal);
#endif
// return scoreTotal / uPairCount;
return scoreTotal;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -