⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scoredist.cpp

📁 unix,linux下编译。用于蛋白质
💻 CPP
字号:
#include <limits.h>
#include <math.h>
#include "muscle.h"
#include "msa.h"
#include "distfunc.h"
#include "msa.h"
#include "seqvect.h"
#include "pwpath.h"

// ScoreDist
// E. Sonnhammer & V. Hollich, Scoredist: A simple and robust protein sequence
// distance estimator, BMC Bioinformatics 2005, 6:108.

extern int BLOSUM62[20][20];
extern double BLOSUM62_Expected;

static const double Dayhoff_CalibrationFactor = 1.3370;
static const double JTT_CalibrationFactor = 1.2873;
static const double MV_CalibrationFactor = 1.1775;
static const double LARGE_D = 3.0;

static double CalibrationFactor = JTT_CalibrationFactor;


// Similarity score
static double Sigma(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2,
  unsigned *ptrLength)
	{
	unsigned Length = 0;
	double Score = 0;
	const unsigned ColCount = msa.GetColCount();
	for (unsigned ColIndex = 0; ColIndex < ColCount; ++ColIndex)
		{
		unsigned Letter1 = msa.GetLetterEx(SeqIndex1, ColIndex);
		unsigned Letter2 = msa.GetLetterEx(SeqIndex2, ColIndex);
		if (Letter1 >= 20 || Letter2 >= 20)
			continue;
		++Length;
		Score += BLOSUM62[Letter1][Letter2];
		}

	*ptrLength = Length;
	return Score;
	}

// Normalized score
static double Sigma_N(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2)
	{
	unsigned Length = UINT_MAX;
	double Score = Sigma(msa, SeqIndex1, SeqIndex2, &Length);
	double RandomScore = Length*BLOSUM62_Expected;
	return Score - RandomScore;
	}

// Upper limit
static double Sigma_U(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2,
  unsigned *ptrLength)
	{
	double Score11 = Sigma(msa, SeqIndex1, SeqIndex1, ptrLength);
	double Score22 = Sigma(msa, SeqIndex2, SeqIndex2, ptrLength);
	return (Score11 + Score22)/2;
	}

// Normalized upper limit
static double Sigma_UN(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2)
	{
	unsigned Length = UINT_MAX;
	double Score = Sigma_U(msa, SeqIndex1, SeqIndex2, &Length);
	double RandomScore = Length*BLOSUM62_Expected;
	return Score - RandomScore;
	}

double GetScoreDist(const MSA &msa, unsigned SeqIndex1, unsigned SeqIndex2)
	{
	if (g_Alpha != ALPHA_Amino)
		Quit("Scoredist is only for amino acid sequences");

	double s_N = Sigma_N(msa, SeqIndex1, SeqIndex2);
	double s_UN = Sigma_UN(msa, SeqIndex1, SeqIndex2);
	double d = 0.0;
	if (s_UN != 0)
		{
		double Ratio = s_N/s_UN;
		if (Ratio < 0.001)
			d = LARGE_D;
		else
			d =	-log(Ratio);
		}
	return d*CalibrationFactor;
	}

void DistPWScoreDist(const SeqVect &v, DistFunc &DF)
	{
	SEQWEIGHT SeqWeightSave = GetSeqWeightMethod();
	SetSeqWeightMethod(SEQWEIGHT_Henikoff);

	const unsigned uSeqCount = v.Length();
	DF.SetCount(uSeqCount);

	const unsigned uPairCount = (uSeqCount*(uSeqCount + 1))/2;
	unsigned uCount = 0;
	SetProgressDesc("PW ScoreDist");
	for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
		{
		const Seq &s1 = v.GetSeq(uSeqIndex1);
		MSA msa1;
		msa1.FromSeq(s1);
		for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
			{
			if (0 == uCount%20)
				Progress(uCount, uPairCount);
			++uCount;
			const Seq &s2 = v.GetSeq(uSeqIndex2);
			MSA msa2;
			msa2.FromSeq(s2);
		
			PWPath Path;
			MSA msaOut;
			AlignTwoMSAs(msa1, msa2, msaOut, Path, false, false);

			float d = (float) GetScoreDist(msaOut, 0, 1);
			DF.SetDist(uSeqIndex1, uSeqIndex2, d);
			}
		}
	ProgressStepsDone();

	SetSeqWeightMethod(SeqWeightSave);
	}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -