📄 phyfromfile.cpp
字号:
#include "muscle.h"
#include "tree.h"
#include "textfile.h"
#define TRACE 0
// Tokens in Newick files are:
// ( ) : , ;
// string
// 'string'
// "string"
// [ comment ]
//
// We can't safely distinguish between identifiers and floating point
// numbers at the lexical level (because identifiers may be numeric,
// or start with digits), so both edge lengths and identifiers are
// returned as strings.
const char *Tree::NTTStr(NEWICK_TOKEN_TYPE NTT) const
{
switch (NTT)
{
#define c(x) case NTT_##x: return #x;
c(Unknown)
c(Lparen)
c(Rparen)
c(Colon)
c(Comma)
c(Semicolon)
c(String)
c(SingleQuotedString)
c(DoubleQuotedString)
c(Comment)
#undef c
}
return "??";
}
NEWICK_TOKEN_TYPE Tree::GetToken(TextFile &File, char szToken[], unsigned uBytes) const
{
// Skip leading white space
File.SkipWhite();
char c;
File.GetCharX(c);
// In case a single-character token
szToken[0] = c;
szToken[1] = 0;
unsigned uBytesCopied = 0;
NEWICK_TOKEN_TYPE TT;
switch (c)
{
case '(':
return NTT_Lparen;
case ')':
return NTT_Rparen;
case ':':
return NTT_Colon;
case ';':
return NTT_Semicolon;
case ',':
return NTT_Comma;
case '\'':
TT = NTT_SingleQuotedString;
File.GetCharX(c);
break;
case '"':
TT = NTT_DoubleQuotedString;
File.GetCharX(c);
break;
case '[':
TT = NTT_Comment;
break;
default:
TT = NTT_String;
break;
}
for (;;)
{
if (TT != NTT_Comment)
{
if (uBytesCopied < uBytes - 2)
{
szToken[uBytesCopied++] = c;
szToken[uBytesCopied] = 0;
}
else
Quit("Tree::GetToken: input buffer too small, token so far='%s'", szToken);
}
bool bEof = File.GetChar(c);
if (bEof)
return TT;
switch (TT)
{
case NTT_String:
if (0 != strchr("():;,", c))
{
File.PushBack(c);
return NTT_String;
}
if (isspace(c))
return NTT_String;
break;
case NTT_SingleQuotedString:
if ('\'' == c)
return NTT_String;
break;
case NTT_DoubleQuotedString:
if ('"' == c)
return NTT_String;
break;
case NTT_Comment:
if (']' == c)
return GetToken(File, szToken, uBytes);
break;
default:
Quit("Tree::GetToken, invalid TT=%u", TT);
}
}
}
// NOTE: this hack must come after definition of Tree::GetToken.
#if TRACE
#define GetToken GetTokenVerbose
#endif
void Tree::FromFile(TextFile &File)
{
// Assume rooted.
// If we discover that it is unrooted, will convert on the fly.
CreateRooted();
double dEdgeLength;
bool bEdgeLength = GetGroupFromFile(File, 0, &dEdgeLength);
// Next token should be either ';' for rooted tree or ',' for unrooted.
char szToken[16];
NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));
// If rooted, all done.
if (NTT_Semicolon == NTT)
{
if (bEdgeLength)
Log(" *** Warning *** edge length on root group in Newick file %s\n",
File.GetFileName());
Validate();
return;
}
if (NTT_Comma != NTT)
Quit("Tree::FromFile, expected ';' or ',', got '%s'", szToken);
const unsigned uThirdNode = UnrootFromFile();
bEdgeLength = GetGroupFromFile(File, uThirdNode, &dEdgeLength);
if (bEdgeLength)
SetEdgeLength(0, uThirdNode, dEdgeLength);
Validate();
}
// Return true if edge length for this group.
bool Tree::GetGroupFromFile(TextFile &File, unsigned uNodeIndex,
double *ptrdEdgeLength)
{
char szToken[1024];
NEWICK_TOKEN_TYPE NTT = GetToken(File, szToken, sizeof(szToken));
// Group is either leaf name or (left, right).
if (NTT_String == NTT)
{
SetLeafName(uNodeIndex, szToken);
#if TRACE
Log("Group is leaf '%s'\n", szToken);
#endif
}
else if (NTT_Lparen == NTT)
{
const unsigned uLeft = AppendBranch(uNodeIndex);
const unsigned uRight = uLeft + 1;
// Left sub-group...
#if TRACE
Log("Got '(', group is compound, expect left sub-group\n");
#endif
double dEdgeLength;
bool bLeftLength = GetGroupFromFile(File, uLeft, &dEdgeLength);
#if TRACE
if (bLeftLength)
Log("Edge length for left sub-group: %.3g\n", dEdgeLength);
else
Log("No edge length for left sub-group\n");
#endif
if (bLeftLength)
SetEdgeLength(uNodeIndex, uLeft, dEdgeLength);
// ... then comma ...
#if TRACE
Log("Expect comma\n");
#endif
NTT = GetToken(File, szToken, sizeof(szToken));
if (NTT_Comma != NTT)
Quit("Tree::GetGroupFromFile, expected ',', got '%s'", szToken);
// ...then right sub-group...
#if TRACE
Log("Expect right sub-group\n");
#endif
bool bRightLength = GetGroupFromFile(File, uRight, &dEdgeLength);
if (bRightLength)
SetEdgeLength(uNodeIndex, uRight, dEdgeLength);
#if TRACE
if (bRightLength)
Log("Edge length for right sub-group: %.3g\n", dEdgeLength);
else
Log("No edge length for right sub-group\n");
#endif
// ... then closing parenthesis.
#if TRACE
Log("Expect closing parenthesis (or comma if > 2-ary)\n");
#endif
NTT = GetToken(File, szToken, sizeof(szToken));
if (NTT_Rparen == NTT)
;
else if (NTT_Comma == NTT)
{
File.PushBack(',');
return false;
}
else
Quit("Tree::GetGroupFromFile, expected ')' or ',', got '%s'", szToken);
}
else
Quit("Tree::GetGroupFromFile, expected '(' or leaf name, got '%s'",
szToken);
// Group may optionally be followed by edge length.
bool bEof = File.SkipWhiteX();
if (bEof)
return false;
char c;
File.GetCharX(c);
#if TRACE
Log("Character following group, could be colon, is '%c'\n", c);
#endif
if (':' == c)
{
NTT = GetToken(File, szToken, sizeof(szToken));
if (NTT_String != NTT)
Quit("Tree::GetGroupFromFile, expected edge length, got '%s'", szToken);
*ptrdEdgeLength = atof(szToken);
return true;
}
File.PushBack(c);
return false;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -