⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 msa.cpp

📁 unix,linux下编译。用于蛋白质
💻 CPP
📖 第 1 页 / 共 2 页
字号:
	unsigned uSeqLength = s.Length();
	SetSize(1, uSeqLength);
	SetSeqName(0, s.GetName());
	if (0 != m_SeqIndexToId)
		SetSeqId(0, s.GetId());
	for (unsigned n = 0; n < uSeqLength; ++n)
		SetChar(0, n, s[n]);
	}

unsigned MSA::GetCharCount(unsigned uSeqIndex, unsigned uColIndex) const
	{
	assert(uSeqIndex < GetSeqCount());
	assert(uColIndex < GetColCount());

	unsigned uCol = 0;
	for (unsigned n = 0; n <= uColIndex; ++n)
		if (!IsGap(uSeqIndex, n))
			++uCol;
	return uCol;
	}

void MSA::CopySeq(unsigned uToSeqIndex, const MSA &msaFrom, unsigned uFromSeqIndex)
	{
	assert(uToSeqIndex < m_uSeqCount);
	const unsigned uColCount = msaFrom.GetColCount();
	assert(m_uColCount == uColCount ||
	  (0 == m_uColCount && uColCount <= m_uCacheSeqLength));

	memcpy(m_szSeqs[uToSeqIndex], msaFrom.GetSeqBuffer(uFromSeqIndex), uColCount);
	SetSeqName(uToSeqIndex, msaFrom.GetSeqName(uFromSeqIndex));
	if (0 == m_uColCount)
		m_uColCount = uColCount;
	}

const char *MSA::GetSeqBuffer(unsigned uSeqIndex) const
	{
	assert(uSeqIndex < m_uSeqCount);
	return m_szSeqs[uSeqIndex];
	}

void MSA::DeleteSeq(unsigned uSeqIndex)
	{
	assert(uSeqIndex < m_uSeqCount);

	delete m_szSeqs[uSeqIndex];
	delete m_szNames[uSeqIndex];

	const unsigned uBytesToMove = (m_uSeqCount - uSeqIndex)*sizeof(char *);
	if (uBytesToMove > 0)
		{
		memmove(m_szSeqs + uSeqIndex, m_szSeqs + uSeqIndex + 1, uBytesToMove);
		memmove(m_szNames + uSeqIndex, m_szNames + uSeqIndex + 1, uBytesToMove);
		}

	--m_uSeqCount;

	delete[] m_Weights;
	m_Weights = 0;
	}

bool MSA::IsEmptyCol(unsigned uColIndex) const
	{
	const unsigned uSeqCount = GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		if (!IsGap(uSeqIndex, uColIndex))
			return false;
	return true;
	}

//void MSA::DeleteEmptyCols(bool bProgress)
//	{
//	unsigned uColCount = GetColCount();
//	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
//		{
//		if (IsEmptyCol(uColIndex))
//			{
//			if (bProgress)
//				{
//				Log("Deleting col %u of %u\n", uColIndex, uColCount);
//				printf("Deleting col %u of %u\n", uColIndex, uColCount);
//				}
//			DeleteCol(uColIndex);
//			--uColCount;
//			}
//		}
//	}

unsigned MSA::AlignedColIndexToColIndex(unsigned uAlignedColIndex) const
	{
	Quit("MSA::AlignedColIndexToColIndex not implemented");
	return 0;
	}

WEIGHT MSA::GetTotalSeqWeight() const
	{
	WEIGHT wTotal = 0;
	const unsigned uSeqCount = GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		wTotal += m_Weights[uSeqIndex];
	return wTotal;
	}

bool MSA::SeqsEq(const MSA &a1, unsigned uSeqIndex1, const MSA &a2,
  unsigned uSeqIndex2)
	{
	Seq s1;
	Seq s2;

	a1.GetSeq(uSeqIndex1, s1);
	a2.GetSeq(uSeqIndex2, s2);

	s1.StripGaps();
	s2.StripGaps();

	return s1.EqIgnoreCase(s2);
	}

unsigned MSA::GetSeqLength(unsigned uSeqIndex) const
	{
	assert(uSeqIndex < GetSeqCount());

	const unsigned uColCount = GetColCount();
	unsigned uLength = 0;
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		if (!IsGap(uSeqIndex, uColIndex))
			++uLength;
	return uLength;
	}

void MSA::GetPWID(unsigned uSeqIndex1, unsigned uSeqIndex2, double *ptrPWID,
  unsigned *ptruPosCount) const
	{
	assert(uSeqIndex1 < GetSeqCount());
	assert(uSeqIndex2 < GetSeqCount());

	unsigned uSameCount = 0;
	unsigned uPosCount = 0;
	const unsigned uColCount = GetColCount();
	for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
		{
		char c1 = GetChar(uSeqIndex1, uColIndex);
		if (IsGapChar(c1))
			continue;
		char c2 = GetChar(uSeqIndex2, uColIndex);
		if (IsGapChar(c2))
			continue;
		++uPosCount;
		if (c1 == c2)
			++uSameCount;
		}
	*ptruPosCount = uPosCount;
	if (uPosCount > 0)
		*ptrPWID = 100.0 * (double) uSameCount / (double) uPosCount;
	else
		*ptrPWID = 0;
	}

void MSA::UnWeight()
	{
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		m_Weights[uSeqIndex] = BTInsane;
	}

unsigned MSA::UniqueResidueTypes(unsigned uColIndex) const
	{
	assert(uColIndex < GetColCount());

	unsigned Counts[MAX_ALPHA];
	memset(Counts, 0, sizeof(Counts));
	const unsigned uSeqCount = GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		if (IsGap(uSeqIndex, uColIndex) || IsWildcard(uSeqIndex, uColIndex))
			continue;
		const unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
		++(Counts[uLetter]);
		}
	unsigned uUniqueCount = 0;
	for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
		if (Counts[uLetter] > 0)
			++uUniqueCount;
	return uUniqueCount;
	}

double MSA::GetOcc(unsigned uColIndex) const
	{
	unsigned uGapCount = 0;
	for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
		if (IsGap(uSeqIndex, uColIndex))
			++uGapCount;
	unsigned uSeqCount = GetSeqCount();
	return (double) (uSeqCount - uGapCount) / (double) uSeqCount;
	}

void MSA::ToFile(TextFile &File) const
	{
	if (g_bMSF)
		ToMSFFile(File);
	else if (g_bAln)
		ToAlnFile(File);
	else if (g_bHTML)
		ToHTMLFile(File);
	else if (g_bPHYS)
		ToPhySequentialFile(File);
	else if (g_bPHYI)
		ToPhyInterleavedFile(File);
	else
		ToFASTAFile(File);
	if (0 != g_pstrScoreFileName)
		WriteScoreFile(*this);
	}

bool MSA::ColumnHasGap(unsigned uColIndex) const
	{
	const unsigned uSeqCount = GetSeqCount();
	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
		if (IsGap(uSeqIndex, uColIndex))
			return true;
	return false;
	}

void MSA::SetIdCount(unsigned uIdCount)
	{
	//if (m_uIdCount != 0)
	//	Quit("MSA::SetIdCount: may only be called once");

	if (m_uIdCount > 0)
		{
		if (uIdCount > m_uIdCount)
			Quit("MSA::SetIdCount: cannot increase count");
		return;
		}
	m_uIdCount = uIdCount;
	}

void MSA::SetSeqId(unsigned uSeqIndex, unsigned uId)
	{
	assert(uSeqIndex < m_uSeqCount);
	assert(uId < m_uIdCount);
	if (0 == m_SeqIndexToId)
		{
		if (0 == m_uIdCount)
			Quit("MSA::SetSeqId, SetIdCount has not been called");
		m_IdToSeqIndex = new unsigned[m_uIdCount];
		m_SeqIndexToId = new unsigned[m_uSeqCount];

		memset(m_IdToSeqIndex, 0xff, m_uIdCount*sizeof(unsigned));
		memset(m_SeqIndexToId, 0xff, m_uSeqCount*sizeof(unsigned));
		}
	m_SeqIndexToId[uSeqIndex] = uId;
	m_IdToSeqIndex[uId] = uSeqIndex;
	}

unsigned MSA::GetSeqIndex(unsigned uId) const
	{
	assert(uId < m_uIdCount);
	assert(0 != m_IdToSeqIndex);
	unsigned uSeqIndex = m_IdToSeqIndex[uId];
	assert(uSeqIndex < m_uSeqCount);
	return uSeqIndex;
	}

bool MSA::GetSeqIndex(unsigned uId, unsigned *ptruIndex) const
	{
	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
		{
		if (uId == m_SeqIndexToId[uSeqIndex])
			{
			*ptruIndex = uSeqIndex;
			return true;
			}
		}
	return false;
	}

unsigned MSA::GetSeqId(unsigned uSeqIndex) const
	{
	assert(uSeqIndex < m_uSeqCount);
	unsigned uId = m_SeqIndexToId[uSeqIndex];
	assert(uId < m_uIdCount);
	return uId;
	}

bool MSA::WeightsSet() const
	{
	return BTInsane != m_Weights[0];
	}

void MSASubsetByIds(const MSA &msaIn, const unsigned Ids[], unsigned uIdCount,
  MSA &msaOut)
	{
	const unsigned uColCount = msaIn.GetColCount();
	msaOut.SetSize(uIdCount, uColCount);
	for (unsigned uSeqIndexOut = 0; uSeqIndexOut < uIdCount; ++uSeqIndexOut)
		{
		const unsigned uId = Ids[uSeqIndexOut];

		const unsigned uSeqIndexIn = msaIn.GetSeqIndex(uId);
		const char *ptrName = msaIn.GetSeqName(uSeqIndexIn);

		msaOut.SetSeqId(uSeqIndexOut, uId);
		msaOut.SetSeqName(uSeqIndexOut, ptrName);

		for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
			{
			const char c = msaIn.GetChar(uSeqIndexIn, uColIndex);
			msaOut.SetChar(uSeqIndexOut, uColIndex, c);
			}
		}
	}

// Caller must allocate ptrSeq and ptrLabel as new char[n].
void MSA::AppendSeq(char *ptrSeq, unsigned uSeqLength, char *ptrLabel)
	{
	if (m_uSeqCount > m_uCacheSeqCount)
		Quit("Internal error MSA::AppendSeq");
	if (m_uSeqCount == m_uCacheSeqCount)
		ExpandCache(m_uSeqCount + 4, uSeqLength);
	m_szSeqs[m_uSeqCount] = ptrSeq;
	m_szNames[m_uSeqCount] = ptrLabel;
	++m_uSeqCount;
	}

void MSA::ExpandCache(unsigned uSeqCount, unsigned uColCount)
	{
	if (m_IdToSeqIndex != 0 || m_SeqIndexToId != 0 || uSeqCount < m_uSeqCount)
		Quit("Internal error MSA::ExpandCache");

	if (m_uSeqCount > 0 && uColCount != m_uColCount)
		Quit("Internal error MSA::ExpandCache, ColCount changed");

	char **NewSeqs = new char *[uSeqCount];
	char **NewNames = new char *[uSeqCount];
	WEIGHT *NewWeights = new WEIGHT[uSeqCount];

	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
		{
		NewSeqs[uSeqIndex] = m_szSeqs[uSeqIndex];
		NewNames[uSeqIndex] = m_szNames[uSeqIndex];
		NewWeights[uSeqIndex] = m_Weights[uSeqIndex];
		}

	for (unsigned uSeqIndex = m_uSeqCount; uSeqIndex < uSeqCount; ++uSeqIndex)
		{
		char *Seq = new char[uColCount];
		NewSeqs[uSeqIndex] = Seq;
#if	DEBUG
		memset(Seq, '?', uColCount);
#endif
		}

	delete[] m_szSeqs;
	delete[] m_szNames;
	delete[] m_Weights;

	m_szSeqs = NewSeqs;
	m_szNames = NewNames;
	m_Weights = NewWeights;

	m_uCacheSeqCount = uSeqCount;
	m_uCacheSeqLength = uColCount;
	m_uColCount = uColCount;
	}

void MSA::FixAlpha()
	{
	ClearInvalidLetterWarning();
	for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
		{
		for (unsigned uColIndex = 0; uColIndex < m_uColCount; ++uColIndex)
			{
			char c = GetChar(uSeqIndex, uColIndex);
			if (!IsResidueChar(c) && !IsGapChar(c))
				{
				char w = GetWildcardChar();
				// Warning("Invalid letter '%c', replaced by '%c'", c, w);
				InvalidLetterWarning(c, w);
				SetChar(uSeqIndex, uColIndex, w);
				}
			}
		}
	ReportInvalidLetters();
	}

ALPHA MSA::GuessAlpha() const
	{
// If at least MIN_NUCLEO_PCT of the first CHAR_COUNT non-gap
// letters belong to the nucleotide alphabet, guess nucleo.
// Otherwise amino.
	const unsigned CHAR_COUNT = 100;
	const unsigned MIN_NUCLEO_PCT = 95;

	const unsigned uSeqCount = GetSeqCount();
	const unsigned uColCount = GetColCount();
	if (0 == uSeqCount)
		return ALPHA_Amino;

	unsigned uDNACount = 0;
	unsigned uRNACount = 0;
	unsigned uTotal = 0;
	unsigned i = 0;
	for (;;)
		{
		unsigned uSeqIndex = i/uColCount;
		if (uSeqIndex >= uSeqCount)
			break;
		unsigned uColIndex = i%uColCount;
		++i;
		char c = GetChar(uSeqIndex, uColIndex);
		if (IsGapChar(c))
			continue;
		if (IsDNA(c))
			++uDNACount;
		if (IsRNA(c))
			++uRNACount;
		++uTotal;
		if (uTotal >= CHAR_COUNT)
			break;
		}
	if (uTotal != 0 && ((uRNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
		return ALPHA_RNA;
	if (uTotal != 0 && ((uDNACount*100)/uTotal) >= MIN_NUCLEO_PCT)
		return ALPHA_DNA;
	return ALPHA_Amino;
	}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -