📄 msa.cpp
字号:
#include "muscle.h"
#include "msa.h"
#include "textfile.h"
#include "seq.h"
#include <math.h>
const unsigned DEFAULT_SEQ_LENGTH = 500;
unsigned MSA::m_uIdCount = 0;
MSA::MSA()
{
m_uSeqCount = 0;
m_uColCount = 0;
m_szSeqs = 0;
m_szNames = 0;
m_Weights = 0;
m_IdToSeqIndex = 0;
m_SeqIndexToId = 0;
m_uCacheSeqCount = 0;
m_uCacheSeqLength = 0;
}
MSA::~MSA()
{
Free();
}
void MSA::Free()
{
for (unsigned n = 0; n < m_uSeqCount; ++n)
{
delete[] m_szSeqs[n];
delete[] m_szNames[n];
}
delete[] m_szSeqs;
delete[] m_szNames;
delete[] m_Weights;
delete[] m_IdToSeqIndex;
delete[] m_SeqIndexToId;
m_uSeqCount = 0;
m_uColCount = 0;
m_szSeqs = 0;
m_szNames = 0;
m_Weights = 0;
m_IdToSeqIndex = 0;
m_SeqIndexToId = 0;
}
void MSA::SetSize(unsigned uSeqCount, unsigned uColCount)
{
Free();
m_uSeqCount = uSeqCount;
m_uCacheSeqLength = uColCount;
m_uColCount = 0;
if (0 == uSeqCount && 0 == uColCount)
return;
m_szSeqs = new char *[uSeqCount];
m_szNames = new char *[uSeqCount];
m_Weights = new WEIGHT[uSeqCount];
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
{
m_szSeqs[uSeqIndex] = new char[uColCount+1];
m_szNames[uSeqIndex] = 0;
#if DEBUG
m_Weights[uSeqIndex] = BTInsane;
memset(m_szSeqs[uSeqIndex], '?', uColCount);
#endif
m_szSeqs[uSeqIndex][uColCount] = 0;
}
if (m_uIdCount > 0)
{
m_IdToSeqIndex = new unsigned[m_uIdCount];
m_SeqIndexToId = new unsigned[m_uSeqCount];
#if DEBUG
memset(m_IdToSeqIndex, 0xff, m_uIdCount*sizeof(unsigned));
memset(m_SeqIndexToId, 0xff, m_uSeqCount*sizeof(unsigned));
#endif
}
}
void MSA::LogMe() const
{
if (0 == GetColCount())
{
Log("MSA empty\n");
return;
}
const unsigned uColsPerLine = 50;
unsigned uLinesPerSeq = (GetColCount() - 1)/uColsPerLine + 1;
for (unsigned n = 0; n < uLinesPerSeq; ++n)
{
unsigned i;
unsigned iStart = n*uColsPerLine;
unsigned iEnd = GetColCount();
if (iEnd - iStart + 1 > uColsPerLine)
iEnd = iStart + uColsPerLine;
Log(" ");
for (i = iStart; i < iEnd; ++i)
Log("%u", i%10);
Log("\n");
Log(" ");
for (i = iStart; i + 9 < iEnd; i += 10)
Log("%-10u", i);
if (n == uLinesPerSeq - 1)
Log(" %-10u", GetColCount());
Log("\n");
for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
{
Log("%12.12s", m_szNames[uSeqIndex]);
if (m_Weights[uSeqIndex] != BTInsane)
Log(" (%5.3f)", m_Weights[uSeqIndex]);
else
Log(" ");
Log(" ");
for (i = iStart; i < iEnd; ++i)
Log("%c", GetChar(uSeqIndex, i));
if (0 != m_SeqIndexToId)
Log(" [%5u]", m_SeqIndexToId[uSeqIndex]);
Log("\n");
}
Log("\n\n");
}
}
char MSA::GetChar(unsigned uSeqIndex, unsigned uIndex) const
{
// TODO: Performance cost?
if (uSeqIndex >= m_uSeqCount || uIndex >= m_uColCount)
Quit("MSA::GetChar(%u/%u,%u/%u)",
uSeqIndex, m_uSeqCount, uIndex, m_uColCount);
char c = m_szSeqs[uSeqIndex][uIndex];
// assert(IsLegalChar(c));
return c;
}
unsigned MSA::GetLetter(unsigned uSeqIndex, unsigned uIndex) const
{
// TODO: Performance cost?
char c = GetChar(uSeqIndex, uIndex);
unsigned uLetter = CharToLetter(c);
if (uLetter >= 20)
{
char c = ' ';
if (uSeqIndex < m_uSeqCount && uIndex < m_uColCount)
c = m_szSeqs[uSeqIndex][uIndex];
Quit("MSA::GetLetter(%u/%u, %u/%u)='%c'/%u",
uSeqIndex, m_uSeqCount, uIndex, m_uColCount, c, uLetter);
}
return uLetter;
}
unsigned MSA::GetLetterEx(unsigned uSeqIndex, unsigned uIndex) const
{
// TODO: Performance cost?
char c = GetChar(uSeqIndex, uIndex);
unsigned uLetter = CharToLetterEx(c);
return uLetter;
}
void MSA::SetSeqName(unsigned uSeqIndex, const char szName[])
{
if (uSeqIndex >= m_uSeqCount)
Quit("MSA::SetSeqName(%u, %s), count=%u", uSeqIndex, m_uSeqCount);
delete[] m_szNames[uSeqIndex];
int n = (int) strlen(szName) + 1;
m_szNames[uSeqIndex] = new char[n];
memcpy(m_szNames[uSeqIndex], szName, n);
}
const char *MSA::GetSeqName(unsigned uSeqIndex) const
{
if (uSeqIndex >= m_uSeqCount)
Quit("MSA::GetSeqName(%u), count=%u", uSeqIndex, m_uSeqCount);
return m_szNames[uSeqIndex];
}
bool MSA::IsGap(unsigned uSeqIndex, unsigned uIndex) const
{
char c = GetChar(uSeqIndex, uIndex);
return IsGapChar(c);
}
bool MSA::IsWildcard(unsigned uSeqIndex, unsigned uIndex) const
{
char c = GetChar(uSeqIndex, uIndex);
return IsWildcardChar(c);
}
void MSA::SetChar(unsigned uSeqIndex, unsigned uIndex, char c)
{
if (uSeqIndex >= m_uSeqCount || uIndex > m_uCacheSeqLength)
Quit("MSA::SetChar(%u,%u)", uSeqIndex, uIndex);
if (uIndex == m_uCacheSeqLength)
{
const unsigned uNewCacheSeqLength = m_uCacheSeqLength + DEFAULT_SEQ_LENGTH;
for (unsigned n = 0; n < m_uSeqCount; ++n)
{
char *ptrNewSeq = new char[uNewCacheSeqLength+1];
memcpy(ptrNewSeq, m_szSeqs[n], m_uCacheSeqLength);
memset(ptrNewSeq + m_uCacheSeqLength, '?', DEFAULT_SEQ_LENGTH);
ptrNewSeq[uNewCacheSeqLength] = 0;
delete[] m_szSeqs[n];
m_szSeqs[n] = ptrNewSeq;
}
m_uColCount = uIndex;
m_uCacheSeqLength = uNewCacheSeqLength;
}
if (uIndex >= m_uColCount)
m_uColCount = uIndex + 1;
m_szSeqs[uSeqIndex][uIndex] = c;
}
void MSA::GetSeq(unsigned uSeqIndex, Seq &seq) const
{
assert(uSeqIndex < m_uSeqCount);
seq.Clear();
for (unsigned n = 0; n < m_uColCount; ++n)
if (!IsGap(uSeqIndex, n))
{
char c = GetChar(uSeqIndex, n);
if (!isalpha(c))
Quit("Invalid character '%c' in sequence", c);
c = toupper(c);
seq.push_back(c);
}
const char *ptrName = GetSeqName(uSeqIndex);
seq.SetName(ptrName);
}
bool MSA::HasGap() const
{
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
for (unsigned n = 0; n < GetColCount(); ++n)
if (IsGap(uSeqIndex, n))
return true;
return false;
}
bool MSA::IsLegalLetter(unsigned uLetter) const
{
return uLetter < 20;
}
void MSA::SetSeqCount(unsigned uSeqCount)
{
Free();
SetSize(uSeqCount, DEFAULT_SEQ_LENGTH);
}
void MSA::CopyCol(unsigned uFromCol, unsigned uToCol)
{
assert(uFromCol < GetColCount());
assert(uToCol < GetColCount());
if (uFromCol == uToCol)
return;
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
{
const char c = GetChar(uSeqIndex, uFromCol);
SetChar(uSeqIndex, uToCol, c);
}
}
void MSA::Copy(const MSA &msa)
{
Free();
const unsigned uSeqCount = msa.GetSeqCount();
const unsigned uColCount = msa.GetColCount();
SetSize(uSeqCount, uColCount);
for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
{
SetSeqName(uSeqIndex, msa.GetSeqName(uSeqIndex));
const unsigned uId = msa.GetSeqId(uSeqIndex);
SetSeqId(uSeqIndex, uId);
for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
{
const char c = msa.GetChar(uSeqIndex, uColIndex);
SetChar(uSeqIndex, uColIndex, c);
}
}
}
bool MSA::IsGapColumn(unsigned uColIndex) const
{
assert(GetSeqCount() > 0);
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
if (!IsGap(uSeqIndex, uColIndex))
return false;
return true;
}
bool MSA::GetSeqIndex(const char *ptrSeqName, unsigned *ptruSeqIndex) const
{
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
if (0 == stricmp(ptrSeqName, GetSeqName(uSeqIndex)))
{
*ptruSeqIndex = uSeqIndex;
return true;
}
return false;
}
void MSA::DeleteCol(unsigned uColIndex)
{
assert(uColIndex < m_uColCount);
size_t n = m_uColCount - uColIndex;
if (n > 0)
{
for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
{
char *ptrSeq = m_szSeqs[uSeqIndex];
memmove(ptrSeq + uColIndex, ptrSeq + uColIndex + 1, n);
}
}
--m_uColCount;
}
void MSA::DeleteColumns(unsigned uColIndex, unsigned uColCount)
{
for (unsigned n = 0; n < uColCount; ++n)
DeleteCol(uColIndex);
}
void MSA::FromFile(TextFile &File)
{
FromFASTAFile(File);
}
// Weights sum to 1, WCounts sum to NIC
WEIGHT MSA::GetSeqWeight(unsigned uSeqIndex) const
{
assert(uSeqIndex < m_uSeqCount);
WEIGHT w = m_Weights[uSeqIndex];
if (w == wInsane)
Quit("Seq weight not set");
return w;
}
void MSA::SetSeqWeight(unsigned uSeqIndex, WEIGHT w) const
{
assert(uSeqIndex < m_uSeqCount);
m_Weights[uSeqIndex] = w;
}
void MSA::NormalizeWeights(WEIGHT wDesiredTotal) const
{
WEIGHT wTotal = 0;
for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
wTotal += m_Weights[uSeqIndex];
if (0 == wTotal)
return;
const WEIGHT f = wDesiredTotal/wTotal;
for (unsigned uSeqIndex = 0; uSeqIndex < m_uSeqCount; ++uSeqIndex)
m_Weights[uSeqIndex] *= f;
}
void MSA::CalcWeights() const
{
Quit("Calc weights not implemented");
}
static void FmtChar(char c, unsigned uWidth)
{
Log("%c", c);
for (unsigned n = 0; n < uWidth - 1; ++n)
Log(" ");
}
static void FmtInt(unsigned u, unsigned uWidth)
{
static char szStr[1024];
assert(uWidth < sizeof(szStr));
if (u > 0)
sprintf(szStr, "%u", u);
else
strcpy(szStr, ".");
Log(szStr);
unsigned n = (unsigned) strlen(szStr);
if (n < uWidth)
for (unsigned i = 0; i < uWidth - n; ++i)
Log(" ");
}
static void FmtInt0(unsigned u, unsigned uWidth)
{
static char szStr[1024];
assert(uWidth < sizeof(szStr));
sprintf(szStr, "%u", u);
Log(szStr);
unsigned n = (unsigned) strlen(szStr);
if (n < uWidth)
for (unsigned i = 0; i < uWidth - n; ++i)
Log(" ");
}
static void FmtPad(unsigned n)
{
for (unsigned i = 0; i < n; ++i)
Log(" ");
}
void MSA::FromSeq(const Seq &s)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -