📄 anchors.cpp
字号:
#include "muscle.h"
#include "msa.h"
#include "objscore.h"
#define TRACE 0
static void WindowSmooth(const SCORE Score[], unsigned uCount, unsigned uWindowLength,
SCORE SmoothScore[], double dCeil)
{
#define Ceil(x) ((SCORE) ((x) > dCeil ? dCeil : (x)))
if (1 != uWindowLength%2)
Quit("WindowSmooth=%u must be odd", uWindowLength);
if (uCount <= uWindowLength)
{
for (unsigned i = 0; i < uCount; ++i)
SmoothScore[i] = 0;
return;
}
const unsigned w2 = uWindowLength/2;
for (unsigned i = 0; i < w2; ++i)
{
SmoothScore[i] = 0;
SmoothScore[uCount - i - 1] = 0;
}
SCORE scoreWindowTotal = 0;
for (unsigned i = 0; i < uWindowLength; ++i)
{
scoreWindowTotal += Ceil(Score[i]);
}
for (unsigned i = w2; ; ++i)
{
SmoothScore[i] = scoreWindowTotal/uWindowLength;
if (i == uCount - w2 - 1)
break;
scoreWindowTotal -= Ceil(Score[i - w2]);
scoreWindowTotal += Ceil(Score[i + w2 + 1]);
}
#undef Ceil
}
// Find columns that score above the given threshold.
// A range of scores is defined between the average
// and the maximum. The threshold is a fraction 0.0 .. 1.0
// within that range, where 0.0 is the average score
// and 1.0 is the maximum score.
// "Grade" is by analogy with grading on a curve.
static void FindBestColsGrade(const SCORE Score[], unsigned uCount,
double dThreshold, unsigned BestCols[], unsigned *ptruBestColCount)
{
SCORE scoreTotal = 0;
for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
scoreTotal += Score[uIndex];
const SCORE scoreAvg = scoreTotal / uCount;
SCORE scoreMax = MINUS_INFINITY;
for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
if (Score[uIndex] > scoreMax)
scoreMax = Score[uIndex];
unsigned uBestColCount = 0;
for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
{
const SCORE s = Score[uIndex];
const double dHeight = (s - scoreAvg)/(scoreMax - scoreAvg);
if (dHeight >= dThreshold)
{
BestCols[uBestColCount] = uIndex;
++uBestColCount;
}
}
*ptruBestColCount = uBestColCount;
}
// Best col only if all following criteria satisfied:
// (1) Score >= min
// (2) Smoothed score >= min
// (3) No gaps.
static void FindBestColsCombo(const MSA &msa, const SCORE Score[],
const SCORE SmoothScore[], double dMinScore, double dMinSmoothScore,
unsigned BestCols[], unsigned *ptruBestColCount)
{
const unsigned uColCount = msa.GetColCount();
unsigned uBestColCount = 0;
for (unsigned uIndex = 0; uIndex < uColCount; ++uIndex)
{
if (Score[uIndex] < dMinScore)
continue;
if (SmoothScore[uIndex] < dMinSmoothScore)
continue;
if (msa.ColumnHasGap(uIndex))
continue;
BestCols[uBestColCount] = uIndex;
++uBestColCount;
}
*ptruBestColCount = uBestColCount;
}
static void ListBestCols(const MSA &msa, const SCORE Score[], const SCORE SmoothScore[],
unsigned BestCols[], unsigned uBestColCount)
{
const unsigned uColCount = msa.GetColCount();
const unsigned uSeqCount = msa.GetSeqCount();
Log("Col ");
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
Log("%u", uSeqIndex%10);
Log(" ");
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
{
Log("%3u ", uColIndex);
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
Log("%c", msa.GetChar(uSeqIndex, uColIndex));
Log(" %10.3f", Score[uColIndex]);
Log(" %10.3f", SmoothScore[uColIndex]);
for (unsigned i = 0; i < uBestColCount; ++i)
if (BestCols[i] == uColIndex)
Log(" <-- Best");
Log("\n");
}
}
// If two best columns are found within a window, choose
// the highest-scoring. If more than two, choose the one
// closest to the center of the window.
static void MergeBestCols(const SCORE Scores[], const unsigned BestCols[],
unsigned uBestColCount, unsigned uWindowLength, unsigned AnchorCols[],
unsigned *ptruAnchorColCount)
{
unsigned uAnchorColCount = 0;
for (unsigned n = 0; n < uBestColCount; /* update inside loop */)
{
unsigned uBestColIndex = BestCols[n];
unsigned uCountWithinWindow = 0;
for (unsigned i = n + 1; i < uBestColCount; ++i)
{
unsigned uBestColIndex2 = BestCols[i];
if (uBestColIndex2 - uBestColIndex >= uWindowLength)
break;
++uCountWithinWindow;
}
unsigned uAnchorCol = uBestColIndex;
if (1 == uCountWithinWindow)
{
unsigned uBestColIndex2 = BestCols[n+1];
if (Scores[uBestColIndex] > Scores[uBestColIndex2])
uAnchorCol = uBestColIndex;
else
uAnchorCol = uBestColIndex2;
}
else if (uCountWithinWindow > 1)
{
unsigned uWindowCenter = uBestColIndex + uWindowLength/2;
int iClosestDist = uWindowLength;
unsigned uClosestCol = uBestColIndex;
for (unsigned i = n + 1; i < n + uCountWithinWindow; ++i)
{
unsigned uColIndex = BestCols[i];
int iDist = uColIndex - uBestColIndex;
if (iDist < 0)
iDist = -iDist;
if (iDist < iClosestDist)
{
uClosestCol = uColIndex;
iClosestDist = iDist;
}
}
uAnchorCol = uClosestCol;
}
AnchorCols[uAnchorColCount] = uAnchorCol;
++uAnchorColCount;
n += uCountWithinWindow + 1;
}
*ptruAnchorColCount = uAnchorColCount;
}
void FindAnchorCols(const MSA &msa, unsigned AnchorCols[],
unsigned *ptruAnchorColCount)
{
const unsigned uColCount = msa.GetColCount();
if (uColCount < 16)
{
*ptruAnchorColCount = 0;
return;
}
SCORE *MatchScore = new SCORE[uColCount];
SCORE *SmoothScore = new SCORE[uColCount];
unsigned *BestCols = new unsigned[uColCount];
GetLetterScores(msa, MatchScore);
WindowSmooth(MatchScore, uColCount, g_uSmoothWindowLength, SmoothScore,
g_dSmoothScoreCeil);
unsigned uBestColCount;
FindBestColsCombo(msa, MatchScore, SmoothScore, g_dMinBestColScore, g_dMinSmoothScore,
BestCols, &uBestColCount);
#if TRACE
ListBestCols(msa, MatchScore, SmoothScore, BestCols, uBestColCount);
#endif
MergeBestCols(MatchScore, BestCols, uBestColCount, g_uAnchorSpacing, AnchorCols,
ptruAnchorColCount);
delete[] MatchScore;
delete[] SmoothScore;
delete[] BestCols;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -