⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 msa.cpp

📁 unix,linux下编译。用于蛋白质
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include "muscle.h"
#include "msa.h"
#include "textfile.h"
#include "seq.h"
#include <math.h>

const unsigned DEFAULT_SEQ_LENGTH = 500;

unsigned MSA::m_uIdCount = 0;

MSA::MSA()
	{
	m_uSeqCount = 0;
	m_uColCount = 0;

	m_szSeqs = 0;
	m_szNames = 0;
	m_Weights = 0;

	m_IdToSeqIndex = 0;
	m_SeqIndexToId = 0;

	m_uCacheSeqCount = 0;
	m_uCacheSeqLength = 0;
	}

MSA::~MSA()
	{
	Free();
	}

void MSA::Free()
	{
	for (unsigned n = 0; n < m_uSeqCount; ++n)
		{
		delete[] m_szSeqs[n];
		delete[] m_szNames[n];
		}

	delete[] m_szSeqs;
	delete[] m_szNames;
	delete[] m_Weights;
	delete[] m_IdToSeqIndex;
	delete[] m_SeqIndexToId;

	m_uSeqCount = 0;
	m_uColCount = 0;

	m_szSeqs = 0;
	m_szNames = 0;
	m_Weights = 0;

	m_IdToSeqIndex = 0;
	m_SeqIndexToId = 0;
	}

void MSA::SetSize(unsigned uSeqCount, unsigned uColCount)
	{
	Free();

	m_uSeqCount = uSeqCount;
	m_uCacheSeqLength = uColCount;
	m_uColCount = 0;

	if (0 == uSeqCount && 0 == uColCount)
		return;

	m_szSeqs = new char *[uSeqCount];
	m_szNames = new char *[uSeqCount];
	m_Weights = new WEIGHT[uSeqCount];

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		m_szSeqs[uSeqIndex] = new char[uColCount+1];
		m_szNames[uSeqIndex] = 0;
#if	DEBUG
		m_Weights[uSeqIndex] = BTInsane;
		memset(m_szSeqs[uSeqIndex], '?', uColCount);
#endif
		m_szSeqs[uSeqIndex][uColCount] = 0;
		}

	if (m_uIdCount > 0)
		{
		m_IdToSeqIndex = new unsigned[m_uIdCount];
		m_SeqIndexToId = new unsigned[m_uSeqCount];
#if	DEBUG
		memset(m_IdToSeqIndex, 0xff, m_uIdCount*sizeof(unsigned));
		memset(m_SeqIndexToId, 0xff, m_uSeqCount*sizeof(unsigned));
#endif
		}
	}

void MSA::LogMe() const
	{
	if (0 == GetColCount())
		{
		Log("MSA empty\n");
		return;
		}

	const unsigned uColsPerLine = 50;
	unsigned uLinesPerSeq = (GetColCount() - 1)/uColsPerLine + 1;
	for (unsigned n = 0; n < uLinesPerSeq; ++n)
		{
		unsigned i;
		unsigned iStart = n*uColsPerLine;
		unsigned iEnd = GetColCount();
		if (iEnd - iStart + 1 > uColsPerLine)
			iEnd = iStart + uColsPerLine;
		Log("                       ");
		for (i = iStart; i < iEnd; ++i)
			Log("%u", i%10);
		Log("\n");
		Log("                       ");
		for (i = iStart; i + 9 < iEnd; i += 10)
			Log("%-10u", i);
		if (n == uLinesPerSeq - 1)
			Log(" %-10u", GetColCount());
		Log("\n");
		for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
			{
			Log("%12.12s", m_szNames[uSeqIndex]);
			if (m_Weights[uSeqIndex] != BTInsane)
				Log(" (%5.3f)", m_Weights[uSeqIndex]);
			else
				Log("        ");
			Log("   ");
			for (i = iStart; i < iEnd; ++i)
				Log("%c", GetChar(uSeqIndex, i));
			if (0 != m_SeqIndexToId)
				Log(" [%5u]", m_SeqIndexToId[uSeqIndex]);
			Log("\n");
			}
		Log("\n\n");
		}
	}

char MSA::GetChar(unsigned uSeqIndex, unsigned uIndex) const
	{
// TODO: Performance cost?
	if (uSeqIndex >= m_uSeqCount || uIndex >= m_uColCount)
		Quit("MSA::GetChar(%u/%u,%u/%u)",
		  uSeqIndex, m_uSeqCount, uIndex, m_uColCount);

	char c = m_szSeqs[uSeqIndex][uIndex];
//	assert(IsLegalChar(c));
	return c;
	}

unsigned MSA::GetLetter(unsigned uSeqIndex, unsigned uIndex) const
	{
// TODO: Performance cost?
	char c = GetChar(uSeqIndex, uIndex);
	unsigned uLetter = CharToLetter(c);
	if (uLetter >= 20)
		{
		char c = ' ';
		if (uSeqIndex < m_uSeqCount && uIndex < m_uColCount)
			c = m_szSeqs[uSeqIndex][uIndex];
		Quit("MSA::GetLetter(%u/%u, %u/%u)='%c'/%u",
		  uSeqIndex, m_uSeqCount, uIndex, m_uColCount, c, uLetter);
		}
	return uLetter;
	}

unsigned MSA::GetLetterEx(unsigned uSeqIndex, unsigned uIndex) const
	{
// TODO: Performance cost?
	char c = GetChar(uSeqIndex, uIndex);
	unsigned uLetter = CharToLetterEx(c);
	return uLetter;
	}

void MSA::SetSeqName(unsigned uSeqIndex, const char szName[])
	{
	if (uSeqIndex >= m_uSeqCount)
		Quit("MSA::SetSeqName(%u, %s), count=%u", uSeqIndex, m_uSeqCount);
	delete[] m_szNames[uSeqIndex];
	int n = (int) strlen(szName) + 1;
	m_szNames[uSeqIndex] = new char[n];
	memcpy(m_szNames[uSeqIndex], szName, n);
	}

const char *MSA::GetSeqName(unsigned uSeqIndex) const
	{
	if (uSeqIndex >= m_uSeqCount)
		Quit("MSA::GetSeqName(%u), count=%u", uSeqIndex, m_uSeqCount);
	return m_szNames[uSeqIndex];
	}

bool MSA::IsGap(unsigned uSeqIndex, unsigned uIndex) const
	{
	char c = GetChar(uSeqIndex, uIndex);
	return IsGapChar(c);
	}

bool MSA::IsWildcard(unsigned uSeqIndex, unsigned uIndex) const
	{
	char c = GetChar(uSeqIndex, uIndex);
	return IsWildcardChar(c);
	}

void MSA::SetChar(unsigned uSeqIndex, unsigned uIndex, char c)
	{
	if (uSeqIndex >= m_uSeqCount || uIndex > m_uCacheSeqLength)
		Quit("MSA::SetChar(%u,%u)", uSeqIndex, uIndex);

	if (uIndex == m_uCacheSeqLength)
		{
		const unsigned uNewCacheSeqLength = m_uCacheSeqLength + DEFAULT_SEQ_LENGTH;
		for (unsigned n = 0; n < m_uSeqCount; ++n)
			{
			char *ptrNewSeq = new char[uNewCacheSeqLength+1];
			memcpy(ptrNewSeq, m_szSeqs[n], m_uCacheSeqLength);
			memset(ptrNewSeq + m_uCacheSeqLength, '?', DEFAULT_SEQ_LENGTH);
			ptrNewSeq[uNewCacheSeqLength] = 0;
			delete[] m_szSeqs[n];
			m_szSeqs[n] = ptrNewSeq;
			}

		m_uColCount = uIndex;
		m_uCacheSeqLength = uNewCacheSeqLength;
		}

	if (uIndex >= m_uColCount)
		m_uColCount = uIndex + 1;
	m_szSeqs[uSeqIndex][uIndex] = c;
	}

void MSA::GetSeq(unsigned uSeqIndex, Seq &seq) const
	{
	assert(uSeqIndex < m_uSeqCount);

	seq.Clear();

	for (unsigned n = 0; n < m_uColCount; ++n)
		if (!IsGap(uSeqIndex, n))
			{
			char c = GetChar(uSeqIndex, n);
			if (!isalpha(c))
				Quit("Invalid character '%c' in sequence", c);
			c = toupper(c);
			seq.push_back(c);
			}
	const char *ptrName = GetSeqName(uSeqIndex);
	seq.SetName(ptrName);
	}

bool MSA::HasGap() const
	{
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		for (unsigned n = 0; n < GetColCount(); ++n)
			if (IsGap(uSeqIndex, n))
				return true;
	return false;
	}

bool MSA::IsLegalLetter(unsigned uLetter) const
	{
	return uLetter < 20;
	}

void MSA::SetSeqCount(unsigned uSeqCount)
	{
	Free();
	SetSize(uSeqCount, DEFAULT_SEQ_LENGTH);
	}

void MSA::CopyCol(unsigned uFromCol, unsigned uToCol)
	{
	assert(uFromCol < GetColCount());
	assert(uToCol < GetColCount());
	if (uFromCol == uToCol)
		return;

	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		{
		const char c = GetChar(uSeqIndex, uFromCol);
		SetChar(uSeqIndex, uToCol, c);
		}
	}

void MSA::Copy(const MSA &msa)
	{
	Free();
	const unsigned uSeqCount = msa.GetSeqCount();
	const unsigned uColCount = msa.GetColCount();
	SetSize(uSeqCount, uColCount);

	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		SetSeqName(uSeqIndex, msa.GetSeqName(uSeqIndex));
		const unsigned uId = msa.GetSeqId(uSeqIndex);
		SetSeqId(uSeqIndex, uId);
		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msa.GetChar(uSeqIndex, uColIndex);
			SetChar(uSeqIndex, uColIndex, c);
			}
		}
	}

bool MSA::IsGapColumn(unsigned uColIndex) const
	{
	assert(GetSeqCount() > 0);
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		if (!IsGap(uSeqIndex, uColIndex))
			return false;
	return true;
	}

bool MSA::GetSeqIndex(const char *ptrSeqName, unsigned *ptruSeqIndex) const
	{
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		if (0 == stricmp(ptrSeqName, GetSeqName(uSeqIndex)))
			{
			*ptruSeqIndex = uSeqIndex;
			return true;
			}
	return false;
	}

void MSA::DeleteCol(unsigned uColIndex)
	{
	assert(uColIndex < m_uColCount);
	size_t n = m_uColCount - uColIndex;
	if (n > 0)
		{
		for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
			{
			char *ptrSeq = m_szSeqs[uSeqIndex];
			memmove(ptrSeq + uColIndex, ptrSeq + uColIndex + 1, n);
			}
		}
	--m_uColCount;
	}

void MSA::DeleteColumns(unsigned uColIndex, unsigned uColCount)
	{
	for (unsigned n = 0; n < uColCount; ++n)
		DeleteCol(uColIndex);
	}

void MSA::FromFile(TextFile &File)
	{
	FromFASTAFile(File);
	}

// Weights sum to 1, WCounts sum to NIC
WEIGHT MSA::GetSeqWeight(unsigned uSeqIndex) const
	{
	assert(uSeqIndex < m_uSeqCount);
	WEIGHT w = m_Weights[uSeqIndex];
	if (w == wInsane)
		Quit("Seq weight not set");
	return w;
	}

void MSA::SetSeqWeight(unsigned uSeqIndex, WEIGHT w) const
	{
	assert(uSeqIndex < m_uSeqCount);
	m_Weights[uSeqIndex] = w;
	}

void MSA::NormalizeWeights(WEIGHT wDesiredTotal) const
	{
	WEIGHT wTotal = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
		wTotal += m_Weights[uSeqIndex];

	if (0 == wTotal)
		return;

	const WEIGHT f = wDesiredTotal/wTotal;
	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
		m_Weights[uSeqIndex] *= f;
	}

void MSA::CalcWeights() const
	{
	Quit("Calc weights not implemented");
	}

static void FmtChar(char c, unsigned uWidth)
	{
	Log("%c", c);
	for (unsigned n = 0; n < uWidth - 1; ++n)
		Log(" ");
	}

static void FmtInt(unsigned u, unsigned uWidth)
	{
	static char szStr[1024];
	assert(uWidth < sizeof(szStr));
	if (u > 0)
		sprintf(szStr, "%u", u);
	else
		strcpy(szStr, ".");
	Log(szStr);
	unsigned n = (unsigned) strlen(szStr);
	if (n < uWidth)
		for (unsigned i = 0; i < uWidth - n; ++i)
			Log(" ");
	}

static void FmtInt0(unsigned u, unsigned uWidth)
	{
	static char szStr[1024];
	assert(uWidth < sizeof(szStr));
	sprintf(szStr, "%u", u);
	Log(szStr);
	unsigned n = (unsigned) strlen(szStr);
	if (n < uWidth)
		for (unsigned i = 0; i < uWidth - n; ++i)
			Log(" ");
	}

static void FmtPad(unsigned n)
	{
	for (unsigned i = 0; i < n; ++i)
		Log(" ");
	}

void MSA::FromSeq(const Seq &s)
	{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -