⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 anchors.cpp

📁 unix,linux下编译。用于蛋白质
💻 CPP
字号:
#include "muscle.h"
#include "msa.h"
#include "objscore.h"

#define	TRACE	0

static void WindowSmooth(const SCORE Score[], unsigned uCount, unsigned uWindowLength,
  SCORE SmoothScore[], double dCeil)
	{
#define	Ceil(x)	((SCORE) ((x) > dCeil ? dCeil : (x)))

	if (1 != uWindowLength%2)
		Quit("WindowSmooth=%u must be odd", uWindowLength);

	if (uCount <= uWindowLength)
		{
		for (unsigned i = 0; i < uCount; ++i)
			SmoothScore[i] = 0;
		return;
		}

	const unsigned w2 = uWindowLength/2;
	for (unsigned i = 0; i < w2; ++i)
		{
		SmoothScore[i] = 0;
		SmoothScore[uCount - i - 1] = 0;
		}

	SCORE scoreWindowTotal = 0;
	for (unsigned i = 0; i < uWindowLength; ++i)
		{
		scoreWindowTotal += Ceil(Score[i]);
		}

	for (unsigned i = w2; ; ++i)
		{
		SmoothScore[i] = scoreWindowTotal/uWindowLength;
		if (i == uCount - w2 - 1)
			break;

		scoreWindowTotal -= Ceil(Score[i - w2]);
		scoreWindowTotal += Ceil(Score[i + w2 + 1]);
		}
#undef Ceil
	}

// Find columns that score above the given threshold.
// A range of scores is defined between the average
// and the maximum. The threshold is a fraction 0.0 .. 1.0
// within that range, where 0.0 is the average score
// and 1.0 is the maximum score.
// "Grade" is by analogy with grading on a curve.
static void FindBestColsGrade(const SCORE Score[], unsigned uCount,
  double dThreshold, unsigned BestCols[], unsigned *ptruBestColCount)
	{
	SCORE scoreTotal = 0;
	for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
		scoreTotal += Score[uIndex];
	const SCORE scoreAvg = scoreTotal / uCount;

	SCORE scoreMax = MINUS_INFINITY;
	for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
		if (Score[uIndex] > scoreMax)
			scoreMax = Score[uIndex];

	unsigned uBestColCount = 0;
	for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
		{
		const SCORE s = Score[uIndex];
		const double dHeight = (s - scoreAvg)/(scoreMax - scoreAvg);
		if (dHeight >= dThreshold)
			{
			BestCols[uBestColCount] = uIndex;
			++uBestColCount;
			}
		}
	*ptruBestColCount = uBestColCount;
	}

// Best col only if all following criteria satisfied:
// (1) Score >= min
// (2) Smoothed score >= min
// (3) No gaps.
static void FindBestColsCombo(const MSA &msa, const SCORE Score[],
  const SCORE SmoothScore[], double dMinScore, double dMinSmoothScore,
  unsigned BestCols[], unsigned *ptruBestColCount)
	{
	const unsigned uColCount = msa.GetColCount();

	unsigned uBestColCount = 0;
	for (unsigned uIndex = 0; uIndex < uColCount; ++uIndex)
		{
		if (Score[uIndex] < dMinScore)
			continue;
		if (SmoothScore[uIndex] < dMinSmoothScore)
			continue;
		if (msa.ColumnHasGap(uIndex))
			continue;
		BestCols[uBestColCount] = uIndex;
		++uBestColCount;
		}
	*ptruBestColCount = uBestColCount;
	}

static void ListBestCols(const MSA &msa, const SCORE Score[], const SCORE SmoothScore[],
  unsigned BestCols[], unsigned uBestColCount)
	{
	const unsigned uColCount = msa.GetColCount();
	const unsigned uSeqCount = msa.GetSeqCount();

	Log("Col  ");
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		Log("%u", uSeqIndex%10);
	Log("  ");

	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		Log("%3u  ", uColIndex);
		for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
			Log("%c", msa.GetChar(uSeqIndex, uColIndex));

		Log("  %10.3f", Score[uColIndex]);
		Log("  %10.3f", SmoothScore[uColIndex]);

		for (unsigned i = 0; i < uBestColCount; ++i)
			if (BestCols[i] == uColIndex)
				Log(" <-- Best");
		Log("\n");
		}
	}

// If two best columns are found within a window, choose
// the highest-scoring. If more than two, choose the one
// closest to the center of the window.
static void MergeBestCols(const SCORE Scores[], const unsigned BestCols[],
  unsigned uBestColCount, unsigned uWindowLength, unsigned AnchorCols[],
  unsigned *ptruAnchorColCount)
	{
	unsigned uAnchorColCount = 0;
	for (unsigned n = 0; n < uBestColCount; /* update inside loop */)
		{
		unsigned uBestColIndex = BestCols[n];
		unsigned uCountWithinWindow = 0;
		for (unsigned i = n + 1; i < uBestColCount; ++i)
			{
			unsigned uBestColIndex2 = BestCols[i];
			if (uBestColIndex2 - uBestColIndex >= uWindowLength)
				break;
			++uCountWithinWindow;
			}
		unsigned uAnchorCol = uBestColIndex;
		if (1 == uCountWithinWindow)
			{
			unsigned uBestColIndex2 = BestCols[n+1];
			if (Scores[uBestColIndex] > Scores[uBestColIndex2])
				uAnchorCol = uBestColIndex;
			else
				uAnchorCol = uBestColIndex2;
			}
		else if (uCountWithinWindow > 1)
			{
			unsigned uWindowCenter = uBestColIndex + uWindowLength/2;
			int iClosestDist = uWindowLength;
			unsigned uClosestCol = uBestColIndex;
			for (unsigned i = n + 1; i < n + uCountWithinWindow; ++i)
				{
				unsigned uColIndex = BestCols[i];
				int iDist = uColIndex - uBestColIndex;
				if (iDist < 0)
					iDist = -iDist;
				if (iDist < iClosestDist)
					{
					uClosestCol = uColIndex;
					iClosestDist = iDist;
					}
				}
			uAnchorCol = uClosestCol;
			}
		AnchorCols[uAnchorColCount] = uAnchorCol;
		++uAnchorColCount;
		n += uCountWithinWindow + 1;
		}
	*ptruAnchorColCount = uAnchorColCount;
	}

void FindAnchorCols(const MSA &msa, unsigned AnchorCols[],
  unsigned *ptruAnchorColCount)
	{
	const unsigned uColCount = msa.GetColCount();
	if (uColCount < 16)
		{
		*ptruAnchorColCount = 0;
		return;
		}

	SCORE *MatchScore = new SCORE[uColCount];
	SCORE *SmoothScore = new SCORE[uColCount];
	unsigned *BestCols = new unsigned[uColCount];

	GetLetterScores(msa, MatchScore);
	WindowSmooth(MatchScore, uColCount, g_uSmoothWindowLength, SmoothScore,
	  g_dSmoothScoreCeil);

	unsigned uBestColCount;
	FindBestColsCombo(msa, MatchScore, SmoothScore, g_dMinBestColScore, g_dMinSmoothScore,
	  BestCols, &uBestColCount);

#if	TRACE
	ListBestCols(msa, MatchScore, SmoothScore, BestCols, uBestColCount);
#endif

	MergeBestCols(MatchScore, BestCols, uBestColCount, g_uAnchorSpacing, AnchorCols,
	  ptruAnchorColCount);

	delete[] MatchScore;
	delete[] SmoothScore;
	delete[] BestCols;
	}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -