📄 lexer.cs
字号:
// <file>
// <copyright see="prj:///doc/copyright.txt"/>
// <license see="prj:///doc/license.txt"/>
// <owner name="Andrea Paatz" email="andrea@icsharpcode.net"/>
// <version>$Revision: 2639 $</version>
// </file>
using System;
using System.Globalization;
using System.IO;
using System.Text;
namespace ICSharpCode.NRefactory.Parser.CSharp
{
internal sealed class Lexer : AbstractLexer
{
public Lexer(TextReader reader) : base(reader)
{
}
void ReadPreProcessingDirective()
{
Location start = new Location(Col - 1, Line);
bool canBeKeyword;
string directive = ReadIdent('#', out canBeKeyword);
string argument = ReadToEndOfLine();
this.specialTracker.AddPreprocessingDirective(directive, argument.Trim(), start, new Location(start.X + directive.Length + argument.Length, start.Y));
}
protected override Token Next()
{
int nextChar;
char ch;
bool hadLineEnd = false;
if (Line == 1 && Col == 1) hadLineEnd = true; // beginning of document
while ((nextChar = ReaderRead()) != -1) {
Token token;
switch (nextChar) {
case ' ':
case '\t':
continue;
case '\r':
case '\n':
if (hadLineEnd) {
// second line end before getting to a token
// -> here was a blank line
specialTracker.AddEndOfLine(new Location(Col, Line));
}
HandleLineEnd((char)nextChar);
hadLineEnd = true;
continue;
case '/':
int peek = ReaderPeek();
if (peek == '/' || peek == '*') {
ReadComment();
continue;
} else {
token = ReadOperator('/');
}
break;
case '#':
ReadPreProcessingDirective();
continue;
case '"':
token = ReadString();
break;
case '\'':
token = ReadChar();
break;
case '@':
int next = ReaderRead();
if (next == -1) {
errors.Error(Line, Col, String.Format("EOF after @"));
continue;
} else {
int x = Col - 1;
int y = Line;
ch = (char)next;
if (ch == '"') {
token = ReadVerbatimString();
} else if (Char.IsLetterOrDigit(ch) || ch == '_') {
bool canBeKeyword;
token = new Token(Tokens.Identifier, x - 1, y, ReadIdent(ch, out canBeKeyword));
} else {
errors.Error(y, x, String.Format("Unexpected char in Lexer.Next() : {0}", ch));
continue;
}
}
break;
default:
ch = (char)nextChar;
if (Char.IsLetter(ch) || ch == '_' || ch == '\\') {
int x = Col - 1; // Col was incremented above, but we want the start of the identifier
int y = Line;
bool canBeKeyword;
string s = ReadIdent(ch, out canBeKeyword);
if (canBeKeyword) {
int keyWordToken = Keywords.GetToken(s);
if (keyWordToken >= 0) {
return new Token(keyWordToken, x, y);
}
}
return new Token(Tokens.Identifier, x, y, s);
} else if (Char.IsDigit(ch)) {
token = ReadDigit(ch, Col - 1);
} else {
token = ReadOperator(ch);
}
break;
}
// try error recovery (token = null -> continue with next char)
if (token != null) {
return token;
}
}
return new Token(Tokens.EOF, Col, Line, String.Empty);
}
// The C# compiler has a fixed size length therefore we'll use a fixed size char array for identifiers
// it's also faster than using a string builder.
const int MAX_IDENTIFIER_LENGTH = 512;
char[] identBuffer = new char[MAX_IDENTIFIER_LENGTH];
string ReadIdent(char ch, out bool canBeKeyword)
{
int peek;
int curPos = 0;
canBeKeyword = true;
while (true) {
if (ch == '\\') {
peek = ReaderPeek();
if (peek != 'u' && peek != 'U') {
errors.Error(Line, Col, "Identifiers can only contain unicode escape sequences");
}
canBeKeyword = false;
string surrogatePair;
ReadEscapeSequence(out ch, out surrogatePair);
if (surrogatePair != null) {
if (!char.IsLetterOrDigit(surrogatePair, 0)) {
errors.Error(Line, Col, "Unicode escape sequences in identifiers cannot be used to represent characters that are invalid in identifiers");
}
for (int i = 0; i < surrogatePair.Length - 1; i++) {
if (curPos < MAX_IDENTIFIER_LENGTH) {
identBuffer[curPos++] = surrogatePair[i];
}
}
ch = surrogatePair[surrogatePair.Length - 1];
} else {
if (!IsIdentifierPart(ch)) {
errors.Error(Line, Col, "Unicode escape sequences in identifiers cannot be used to represent characters that are invalid in identifiers");
}
}
}
if (curPos < MAX_IDENTIFIER_LENGTH) {
identBuffer[curPos++] = ch;
} else {
errors.Error(Line, Col, String.Format("Identifier too long"));
while (IsIdentifierPart(ReaderPeek())) {
ReaderRead();
}
break;
}
peek = ReaderPeek();
if (IsIdentifierPart(peek) || peek == '\\') {
ch = (char)ReaderRead();
} else {
break;
}
}
return new String(identBuffer, 0, curPos);
}
Token ReadDigit(char ch, int x)
{
unchecked { // prevent exception when ReaderPeek() = -1 is cast to char
int y = Line;
sb.Length = 0;
sb.Append(ch);
string prefix = null;
string suffix = null;
bool ishex = false;
bool isunsigned = false;
bool islong = false;
bool isfloat = false;
bool isdouble = false;
bool isdecimal = false;
char peek = (char)ReaderPeek();
if (ch == '.') {
isdouble = true;
while (Char.IsDigit((char)ReaderPeek())) { // read decimal digits beyond the dot
sb.Append((char)ReaderRead());
}
peek = (char)ReaderPeek();
} else if (ch == '0' && (peek == 'x' || peek == 'X')) {
ReaderRead(); // skip 'x'
sb.Length = 0; // Remove '0' from 0x prefix from the stringvalue
while (IsHex((char)ReaderPeek())) {
sb.Append((char)ReaderRead());
}
if (sb.Length == 0) {
sb.Append('0'); // dummy value to prevent exception
errors.Error(y, x, "Invalid hexadecimal integer literal");
}
ishex = true;
prefix = "0x";
peek = (char)ReaderPeek();
} else {
while (Char.IsDigit((char)ReaderPeek())) {
sb.Append((char)ReaderRead());
}
peek = (char)ReaderPeek();
}
Token nextToken = null; // if we accidently read a 'dot'
if (peek == '.') { // read floating point number
ReaderRead();
peek = (char)ReaderPeek();
if (!Char.IsDigit(peek)) {
nextToken = new Token(Tokens.Dot, Col - 1, Line);
peek = '.';
} else {
isdouble = true; // double is default
if (ishex) {
errors.Error(y, x, String.Format("No hexadecimal floating point values allowed"));
}
sb.Append('.');
while (Char.IsDigit((char)ReaderPeek())) { // read decimal digits beyond the dot
sb.Append((char)ReaderRead());
}
peek = (char)ReaderPeek();
}
}
if (peek == 'e' || peek == 'E') { // read exponent
isdouble = true;
sb.Append((char)ReaderRead());
peek = (char)ReaderPeek();
if (peek == '-' || peek == '+') {
sb.Append((char)ReaderRead());
}
while (Char.IsDigit((char)ReaderPeek())) { // read exponent value
sb.Append((char)ReaderRead());
}
isunsigned = true;
peek = (char)ReaderPeek();
}
if (peek == 'f' || peek == 'F') { // float value
ReaderRead();
suffix = "f";
isfloat = true;
} else if (peek == 'd' || peek == 'D') { // double type suffix (obsolete, double is default)
ReaderRead();
suffix = "d";
isdouble = true;
} else if (peek == 'm' || peek == 'M') { // decimal value
ReaderRead();
suffix = "m";
isdecimal = true;
} else if (!isdouble) {
if (peek == 'u' || peek == 'U') {
ReaderRead();
suffix = "u";
isunsigned = true;
peek = (char)ReaderPeek();
}
if (peek == 'l' || peek == 'L') {
ReaderRead();
peek = (char)ReaderPeek();
islong = true;
if (!isunsigned && (peek == 'u' || peek == 'U')) {
ReaderRead();
suffix = "lu";
isunsigned = true;
} else {
suffix = isunsigned ? "ul" : "l";
}
}
}
string digit = sb.ToString();
string stringValue = prefix + digit + suffix;
if (isfloat) {
float num;
if (float.TryParse(digit, NumberStyles.Any, CultureInfo.InvariantCulture, out num)) {
return new Token(Tokens.Literal, x, y, stringValue, num);
} else {
errors.Error(y, x, String.Format("Can't parse float {0}", digit));
return new Token(Tokens.Literal, x, y, stringValue, 0f);
}
}
if (isdecimal) {
decimal num;
if (decimal.TryParse(digit, NumberStyles.Any, CultureInfo.InvariantCulture, out num)) {
return new Token(Tokens.Literal, x, y, stringValue, num);
} else {
errors.Error(y, x, String.Format("Can't parse decimal {0}", digit));
return new Token(Tokens.Literal, x, y, stringValue, 0m);
}
}
if (isdouble) {
double num;
if (double.TryParse(digit, NumberStyles.Any, CultureInfo.InvariantCulture, out num)) {
return new Token(Tokens.Literal, x, y, stringValue, num);
} else {
errors.Error(y, x, String.Format("Can't parse double {0}", digit));
return new Token(Tokens.Literal, x, y, stringValue, 0d);
}
}
// Try to determine a parsable value using ranges.
ulong result;
if (ishex) {
if (!ulong.TryParse(digit, NumberStyles.HexNumber, null, out result)) {
errors.Error(y, x, String.Format("Can't parse hexadecimal constant {0}", digit));
return new Token(Tokens.Literal, x, y, stringValue.ToString(), 0);
}
} else {
if (!ulong.TryParse(digit, NumberStyles.Integer, null, out result)) {
errors.Error(y, x, String.Format("Can't parse integral constant {0}", digit));
return new Token(Tokens.Literal, x, y, stringValue.ToString(), 0);
}
}
if (result > long.MaxValue) {
islong = true;
isunsigned = true;
} else if (result > uint.MaxValue) {
islong = true;
} else if (result > int.MaxValue) {
isunsigned = true;
}
Token token;
if (islong) {
if (isunsigned) {
ulong num;
if (ulong.TryParse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number, CultureInfo.InvariantCulture, out num)) {
token = new Token(Tokens.Literal, x, y, stringValue, num);
} else {
errors.Error(y, x, String.Format("Can't parse unsigned long {0}", digit));
token = new Token(Tokens.Literal, x, y, stringValue, 0UL);
}
} else {
long num;
if (long.TryParse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number, CultureInfo.InvariantCulture, out num)) {
token = new Token(Tokens.Literal, x, y, stringValue, num);
} else {
errors.Error(y, x, String.Format("Can't parse long {0}", digit));
token = new Token(Tokens.Literal, x, y, stringValue, 0L);
}
}
} else {
if (isunsigned) {
uint num;
if (uint.TryParse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number, CultureInfo.InvariantCulture, out num)) {
token = new Token(Tokens.Literal, x, y, stringValue, num);
} else {
errors.Error(y, x, String.Format("Can't parse unsigned int {0}", digit));
token = new Token(Tokens.Literal, x, y, stringValue, (uint)0);
}
} else {
int num;
if (int.TryParse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number, CultureInfo.InvariantCulture, out num)) {
token = new Token(Tokens.Literal, x, y, stringValue, num);
} else {
errors.Error(y, x, String.Format("Can't parse int {0}", digit));
token = new Token(Tokens.Literal, x, y, stringValue, 0);
}
}
}
token.next = nextToken;
return token;
}
}
Token ReadString()
{
int x = Col - 1;
int y = Line;
sb.Length = 0;
originalValue.Length = 0;
originalValue.Append('"');
bool doneNormally = false;
int nextChar;
while ((nextChar = ReaderRead()) != -1) {
char ch = (char)nextChar;
if (ch == '"') {
doneNormally = true;
originalValue.Append('"');
break;
}
if (ch == '\\') {
originalValue.Append('\\');
string surrogatePair;
originalValue.Append(ReadEscapeSequence(out ch, out surrogatePair));
if (surrogatePair != null) {
sb.Append(surrogatePair);
} else {
sb.Append(ch);
}
} else if (ch == '\n') {
errors.Error(y, x, String.Format("No new line is allowed inside a string literal"));
break;
} else {
originalValue.Append(ch);
sb.Append(ch);
}
}
if (!doneNormally) {
errors.Error(y, x, String.Format("End of file reached inside string literal"));
}
return new Token(Tokens.Literal, x, y, originalValue.ToString(), sb.ToString());
}
Token ReadVerbatimString()
{
sb.Length = 0;
originalValue.Length = 0;
originalValue.Append("@\"");
int x = Col - 2; // @ and " already read
int y = Line;
int nextChar;
while ((nextChar = ReaderRead()) != -1) {
char ch = (char)nextChar;
if (ch == '"') {
if (ReaderPeek() != '"') {
originalValue.Append('"');
break;
}
originalValue.Append("\"\"");
sb.Append('"');
ReaderRead();
} else if (HandleLineEnd(ch)) {
sb.Append("\r\n");
originalValue.Append("\r\n");
} else {
sb.Append(ch);
originalValue.Append(ch);
}
}
if (nextChar == -1) {
errors.Error(y, x, String.Format("End of file reached inside verbatim string literal"));
}
return new Token(Tokens.Literal, x, y, originalValue.ToString(), sb.ToString());
}
char[] escapeSequenceBuffer = new char[12];
/// <summary>
/// reads an escape sequence
/// </summary>
/// <param name="ch">The character represented by the escape sequence,
/// or '\0' if there was an error or the escape sequence represents a character that
/// can be represented only be a suggorate pair</param>
/// <param name="surrogatePair">Null, except when the character represented
/// by the escape sequence can only be represented by a surrogate pair (then the string
/// contains the surrogate pair)</param>
/// <returns>The escape sequence</returns>
string ReadEscapeSequence(out char ch, out string surrogatePair)
{
surrogatePair = null;
int nextChar = ReaderRead();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -