📄 spfast.cpp
字号:
#include "muscle.h"
#include "profile.h"
#define TRACE 0
enum
{
LL = 0,
LG = 1,
GL = 2,
GG = 3,
};
static char *GapTypeToStr(int GapType)
{
switch (GapType)
{
case LL: return "LL";
case LG: return "LG";
case GL: return "GL";
case GG: return "GG";
}
Quit("Invalid gap type");
return "?";
}
static SCORE GapScoreMatrix[4][4];
static void InitGapScoreMatrix()
{
const SCORE t = (SCORE) 0.2;
GapScoreMatrix[LL][LL] = 0;
GapScoreMatrix[LL][LG] = g_scoreGapOpen;
GapScoreMatrix[LL][GL] = 0;
GapScoreMatrix[LL][GG] = 0;
GapScoreMatrix[LG][LL] = g_scoreGapOpen;
GapScoreMatrix[LG][LG] = 0;
GapScoreMatrix[LG][GL] = g_scoreGapOpen;
GapScoreMatrix[LG][GG] = t*g_scoreGapOpen; // approximation!
GapScoreMatrix[GL][LL] = 0;
GapScoreMatrix[GL][LG] = g_scoreGapOpen;
GapScoreMatrix[GL][GL] = 0;
GapScoreMatrix[GL][GG] = 0;
GapScoreMatrix[GG][LL] = 0;
GapScoreMatrix[GG][LG] = t*g_scoreGapOpen; // approximation!
GapScoreMatrix[GG][GL] = 0;
GapScoreMatrix[GG][GG] = 0;
for (int i = 0; i < 4; ++i)
for (int j = 0; j < i; ++j)
if (GapScoreMatrix[i][j] != GapScoreMatrix[j][i])
Quit("GapScoreMatrix not symmetrical");
}
static SCORE SPColBrute(const MSA &msa, unsigned uColIndex)
{
SCORE Sum = 0;
const unsigned uSeqCount = msa.GetSeqCount();
for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
{
const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);
if (uLetter1 >= 20)
continue;
for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
{
const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);
if (uLetter2 >= 20)
continue;
SCORE t = w1*w2*(*g_ptrScoreMatrix)[uLetter1][uLetter2];
#if TRACE
Log("Check %c %c w1=%.3g w2=%.3g Mx=%.3g t=%.3g\n",
LetterToCharAmino(uLetter1),
LetterToCharAmino(uLetter2),
w1,
w2,
(*g_ptrScoreMatrix)[uLetter1][uLetter2],
t);
#endif
Sum += t;
}
}
return Sum;
}
static SCORE SPGapFreqs(const FCOUNT Freqs[])
{
#if TRACE
Log("Freqs=");
for (unsigned i = 0; i < 4; ++i)
if (Freqs[i] != 0)
Log(" %s=%.3g", GapTypeToStr(i), Freqs[i]);
Log("\n");
#endif
SCORE TotalOffDiag = 0;
SCORE TotalDiag = 0;
for (unsigned i = 0; i < 4; ++i)
{
const FCOUNT fi = Freqs[i];
if (0 == fi)
continue;
const float *Row = GapScoreMatrix[i];
SCORE diagt = fi*fi*Row[i];
TotalDiag += diagt;
#if TRACE
Log("SPFGaps %s %s + Mx=%.3g TotalDiag += %.3g\n",
GapTypeToStr(i),
GapTypeToStr(i),
Row[i],
diagt);
#endif
SCORE Sum = 0;
for (unsigned j = 0; j < i; ++j)
{
SCORE t = Freqs[j]*Row[j];
#if TRACE
if (Freqs[j] != 0)
Log("SPFGaps %s %s + Mx=%.3g Sum += %.3g\n",
GapTypeToStr(i),
GapTypeToStr(j),
Row[j],
fi*t);
#endif
Sum += t;
}
TotalOffDiag += fi*Sum;
}
#if TRACE
Log("SPFGap TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n",
TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag);
#endif
return TotalOffDiag*2 + TotalDiag;
}
static SCORE SPFreqs(const FCOUNT Freqs[])
{
#if TRACE
Log("Freqs=");
for (unsigned i = 0; i < 20; ++i)
if (Freqs[i] != 0)
Log(" %c=%.3g", LetterToCharAmino(i), Freqs[i]);
Log("\n");
#endif
SCORE TotalOffDiag = 0;
SCORE TotalDiag = 0;
for (unsigned i = 0; i < 20; ++i)
{
const FCOUNT fi = Freqs[i];
if (0 == fi)
continue;
const float *Row = (*g_ptrScoreMatrix)[i];
SCORE diagt = fi*fi*Row[i];
TotalDiag += diagt;
#if TRACE
Log("SPF %c %c + Mx=%.3g TotalDiag += %.3g\n",
LetterToCharAmino(i),
LetterToCharAmino(i),
Row[i],
diagt);
#endif
SCORE Sum = 0;
for (unsigned j = 0; j < i; ++j)
{
SCORE t = Freqs[j]*Row[j];
#if TRACE
if (Freqs[j] != 0)
Log("SPF %c %c + Mx=%.3g Sum += %.3g\n",
LetterToCharAmino(i),
LetterToCharAmino(j),
Row[j],
fi*t);
#endif
Sum += t;
}
TotalOffDiag += fi*Sum;
}
#if TRACE
Log("SPF TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n",
TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag);
#endif
return TotalOffDiag*2 + TotalDiag;
}
static SCORE ObjScoreSPCol(const MSA &msa, unsigned uColIndex)
{
FCOUNT Freqs[20];
FCOUNT GapFreqs[4];
memset(Freqs, 0, sizeof(Freqs));
memset(GapFreqs, 0, sizeof(GapFreqs));
const unsigned uSeqCount = msa.GetSeqCount();
#if TRACE
Log("Weights=");
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
Log(" %u=%.3g", uSeqIndex, msa.GetSeqWeight(uSeqIndex));
Log("\n");
#endif
SCORE SelfOverCount = 0;
SCORE GapSelfOverCount = 0;
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
{
WEIGHT w = msa.GetSeqWeight(uSeqIndex);
bool bGapThisCol = msa.IsGap(uSeqIndex, uColIndex);
bool bGapPrevCol = (uColIndex == 0 ? false : msa.IsGap(uSeqIndex, uColIndex - 1));
int GapType = bGapThisCol + 2*bGapPrevCol;
assert(GapType >= 0 && GapType < 4);
GapFreqs[GapType] += w;
SCORE gapt = w*w*GapScoreMatrix[GapType][GapType];
GapSelfOverCount += gapt;
if (bGapThisCol)
continue;
unsigned uLetter = msa.GetLetterEx(uSeqIndex, uColIndex);
if (uLetter >= 20)
continue;
Freqs[uLetter] += w;
SCORE t = w*w*(*g_ptrScoreMatrix)[uLetter][uLetter];
#if TRACE
Log("FastCol compute freqs & SelfOverCount %c w=%.3g M=%.3g SelfOverCount += %.3g\n",
LetterToCharAmino(uLetter), w, (*g_ptrScoreMatrix)[uLetter][uLetter], t);
#endif
SelfOverCount += t;
}
SCORE SPF = SPFreqs(Freqs);
SCORE Col = SPF - SelfOverCount;
SCORE SPFGaps = SPGapFreqs(GapFreqs);
SCORE ColGaps = SPFGaps - GapSelfOverCount;
#if TRACE
Log("SPF=%.3g - SelfOverCount=%.3g = %.3g\n", SPF, SelfOverCount, Col);
Log("SPFGaps=%.3g - GapsSelfOverCount=%.3g = %.3g\n", SPFGaps, GapSelfOverCount, ColGaps);
#endif
return Col + ColGaps;
}
SCORE ObjScoreSPDimer(const MSA &msa)
{
static bool bGapScoreMatrixInit = false;
if (!bGapScoreMatrixInit)
InitGapScoreMatrix();
SCORE Total = 0;
const unsigned uSeqCount = msa.GetSeqCount();
const unsigned uColCount = msa.GetColCount();
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
{
SCORE Col = ObjScoreSPCol(msa, uColIndex);
#if TRACE
{
SCORE ColCheck = SPColBrute(msa, uColIndex);
Log("FastCol=%.3g CheckCol=%.3g\n", Col, ColCheck);
}
#endif
Total += Col;
}
#if TRACE
Log("Total/2 = %.3g (final result from fast)\n", Total/2);
#endif
return Total/2;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -