lexer.cs

来自「根据cs源码解析为codedom」· CS 代码 · 共 970 行 · 第 1/2 页
970 行
// <file>
//     <copyright see="prj:///doc/copyright.txt"/>
//     <license see="prj:///doc/license.txt"/>
//     <owner name="Andrea Paatz" email="andrea@icsharpcode.net"/>
//     <version>$Revision: 2639 $</version>
// </file>

using System;
using System.Globalization;
using System.IO;
using System.Text;

namespace ICSharpCode.NRefactory.Parser.CSharp
{
	internal sealed class Lexer : AbstractLexer
	{
		public Lexer(TextReader reader) : base(reader)
		{
		}
		
		void ReadPreProcessingDirective()
		{
			Location start = new Location(Col - 1, Line);
			bool canBeKeyword;
			string directive = ReadIdent('#', out canBeKeyword);
			string argument  = ReadToEndOfLine();
			this.specialTracker.AddPreprocessingDirective(directive, argument.Trim(), start, new Location(start.X + directive.Length + argument.Length, start.Y));
		}
		
		protected override Token Next()
		{
			int nextChar;
			char ch;
			bool hadLineEnd = false;
			if (Line == 1 && Col == 1) hadLineEnd = true; // beginning of document
			
			while ((nextChar = ReaderRead()) != -1) {
				Token token;
				
				switch (nextChar) {
					case ' ':
					case '\t':
						continue;
					case '\r':
					case '\n':
						if (hadLineEnd) {
							// second line end before getting to a token
							// -> here was a blank line
							specialTracker.AddEndOfLine(new Location(Col, Line));
						}
						HandleLineEnd((char)nextChar);
						hadLineEnd = true;
						continue;
					case '/':
						int peek = ReaderPeek();
						if (peek == '/' || peek == '*') {
							ReadComment();
							continue;
						} else {
							token = ReadOperator('/');
						}
						break;
					case '#':
						ReadPreProcessingDirective();
						continue;
					case '"':
						token = ReadString();
						break;
					case '\'':
						token = ReadChar();
						break;
					case '@':
						int next = ReaderRead();
						if (next == -1) {
							errors.Error(Line, Col, String.Format("EOF after @"));
							continue;
						} else {
							int x = Col - 1;
							int y = Line;
							ch = (char)next;
							if (ch == '"') {
								token = ReadVerbatimString();
							} else if (Char.IsLetterOrDigit(ch) || ch == '_') {
								bool canBeKeyword;
								token = new Token(Tokens.Identifier, x - 1, y, ReadIdent(ch, out canBeKeyword));
							} else {
								errors.Error(y, x, String.Format("Unexpected char in Lexer.Next() : {0}", ch));
								continue;
							}
						}
						break;
					default:
						ch = (char)nextChar;
						if (Char.IsLetter(ch) || ch == '_' || ch == '\\') {
							int x = Col - 1; // Col was incremented above, but we want the start of the identifier
							int y = Line;
							bool canBeKeyword;
							string s = ReadIdent(ch, out canBeKeyword);
							if (canBeKeyword) {
								int keyWordToken = Keywords.GetToken(s);
								if (keyWordToken >= 0) {
									return new Token(keyWordToken, x, y);
								}
							}
							return new Token(Tokens.Identifier, x, y, s);
						} else if (Char.IsDigit(ch)) {
							token = ReadDigit(ch, Col - 1);
						} else {
							token = ReadOperator(ch);
						}
						break;
				}
				
				// try error recovery (token = null -> continue with next char)
				if (token != null) {
					return token;
				}
			}
			
			return new Token(Tokens.EOF, Col, Line, String.Empty);
		}
		
		// The C# compiler has a fixed size length therefore we'll use a fixed size char array for identifiers
		// it's also faster than using a string builder.
		const int MAX_IDENTIFIER_LENGTH = 512;
		char[] identBuffer = new char[MAX_IDENTIFIER_LENGTH];
		
		string ReadIdent(char ch, out bool canBeKeyword)
		{
			int peek;
			int curPos     = 0;
			canBeKeyword = true;
			while (true) {
				if (ch == '\\') {
					peek = ReaderPeek();
					if (peek != 'u' && peek != 'U') {
						errors.Error(Line, Col, "Identifiers can only contain unicode escape sequences");
					}
					canBeKeyword = false;
					string surrogatePair;
					ReadEscapeSequence(out ch, out surrogatePair);
					if (surrogatePair != null) {
						if (!char.IsLetterOrDigit(surrogatePair, 0)) {
							errors.Error(Line, Col, "Unicode escape sequences in identifiers cannot be used to represent characters that are invalid in identifiers");
						}
						for (int i = 0; i < surrogatePair.Length - 1; i++) {
							if (curPos < MAX_IDENTIFIER_LENGTH) {
								identBuffer[curPos++] = surrogatePair[i];
							}
						}
						ch = surrogatePair[surrogatePair.Length - 1];
					} else {
						if (!IsIdentifierPart(ch)) {
							errors.Error(Line, Col, "Unicode escape sequences in identifiers cannot be used to represent characters that are invalid in identifiers");
						}
					}
				}
				
				if (curPos < MAX_IDENTIFIER_LENGTH) {
					identBuffer[curPos++] = ch;
				} else {
					errors.Error(Line, Col, String.Format("Identifier too long"));
					while (IsIdentifierPart(ReaderPeek())) {
						ReaderRead();
					}
					break;
				}
				peek = ReaderPeek();
				if (IsIdentifierPart(peek) || peek == '\\') {
					ch = (char)ReaderRead();
				} else {
					break;
				}
			}
			return new String(identBuffer, 0, curPos);
		}
		
		Token ReadDigit(char ch, int x)
		{
			unchecked { // prevent exception when ReaderPeek() = -1 is cast to char
				int y = Line;
				sb.Length = 0;
				sb.Append(ch);
				string prefix = null;
				string suffix = null;
				
				bool ishex      = false;
				bool isunsigned = false;
				bool islong     = false;
				bool isfloat    = false;
				bool isdouble   = false;
				bool isdecimal  = false;
				
				char peek = (char)ReaderPeek();
				
				if (ch == '.')  {
					isdouble = true;
					
					while (Char.IsDigit((char)ReaderPeek())) { // read decimal digits beyond the dot
						sb.Append((char)ReaderRead());
					}
					peek = (char)ReaderPeek();
				} else if (ch == '0' && (peek == 'x' || peek == 'X')) {
					ReaderRead(); // skip 'x'
					sb.Length = 0; // Remove '0' from 0x prefix from the stringvalue
					while (IsHex((char)ReaderPeek())) {
						sb.Append((char)ReaderRead());
					}
					if (sb.Length == 0) {
						sb.Append('0'); // dummy value to prevent exception
						errors.Error(y, x, "Invalid hexadecimal integer literal");
					}
					ishex = true;
					prefix = "0x";
					peek = (char)ReaderPeek();
				} else {
					while (Char.IsDigit((char)ReaderPeek())) {
						sb.Append((char)ReaderRead());
					}
					peek = (char)ReaderPeek();
				}
				
				Token nextToken = null; // if we accidently read a 'dot'
				if (peek == '.') { // read floating point number
					ReaderRead();
					peek = (char)ReaderPeek();
					if (!Char.IsDigit(peek)) {
						nextToken = new Token(Tokens.Dot, Col - 1, Line);
						peek = '.';
					} else {
						isdouble = true; // double is default
						if (ishex) {
							errors.Error(y, x, String.Format("No hexadecimal floating point values allowed"));
						}
						sb.Append('.');
						
						while (Char.IsDigit((char)ReaderPeek())) { // read decimal digits beyond the dot
							sb.Append((char)ReaderRead());
						}
						peek = (char)ReaderPeek();
					}
				}
				
				if (peek == 'e' || peek == 'E') { // read exponent
					isdouble = true;
					sb.Append((char)ReaderRead());
					peek = (char)ReaderPeek();
					if (peek == '-' || peek == '+') {
						sb.Append((char)ReaderRead());
					}
					while (Char.IsDigit((char)ReaderPeek())) { // read exponent value
						sb.Append((char)ReaderRead());
					}
					isunsigned = true;
					peek = (char)ReaderPeek();
				}
				
				if (peek == 'f' || peek == 'F') { // float value
					ReaderRead();
					suffix = "f";
					isfloat = true;
				} else if (peek == 'd' || peek == 'D') { // double type suffix (obsolete, double is default)
					ReaderRead();
					suffix = "d";
					isdouble = true;
				} else if (peek == 'm' || peek == 'M') { // decimal value
					ReaderRead();
					suffix = "m";
					isdecimal = true;
				} else if (!isdouble) {
					if (peek == 'u' || peek == 'U') {
						ReaderRead();
						suffix = "u";
						isunsigned = true;
						peek = (char)ReaderPeek();
					}
					
					if (peek == 'l' || peek == 'L') {
						ReaderRead();
						peek = (char)ReaderPeek();
						islong = true;
						if (!isunsigned && (peek == 'u' || peek == 'U')) {
							ReaderRead();
							suffix = "lu";
							isunsigned = true;
						} else {
							suffix = isunsigned ? "ul" : "l";
						}
					}
				}
				
				string digit       = sb.ToString();
				string stringValue = prefix + digit + suffix;
				
				if (isfloat) {
					float num;
					if (float.TryParse(digit, NumberStyles.Any, CultureInfo.InvariantCulture, out num)) {
						return new Token(Tokens.Literal, x, y, stringValue, num);
					} else {
						errors.Error(y, x, String.Format("Can't parse float {0}", digit));
						return new Token(Tokens.Literal, x, y, stringValue, 0f);
					}
				}
				if (isdecimal) {
					decimal num;
					if (decimal.TryParse(digit, NumberStyles.Any, CultureInfo.InvariantCulture, out num)) {
						return new Token(Tokens.Literal, x, y, stringValue, num);
					} else {
						errors.Error(y, x, String.Format("Can't parse decimal {0}", digit));
						return new Token(Tokens.Literal, x, y, stringValue, 0m);
					}
				}
				if (isdouble) {
					double num;
					if (double.TryParse(digit, NumberStyles.Any, CultureInfo.InvariantCulture, out num)) {
						return new Token(Tokens.Literal, x, y, stringValue, num);
					} else {
						errors.Error(y, x, String.Format("Can't parse double {0}", digit));
						return new Token(Tokens.Literal, x, y, stringValue, 0d);
					}
				}
				
				// Try to determine a parsable value using ranges.
				ulong result;
				if (ishex) {
					if (!ulong.TryParse(digit, NumberStyles.HexNumber, null, out result)) {
						errors.Error(y, x, String.Format("Can't parse hexadecimal constant {0}", digit));
						return new Token(Tokens.Literal, x, y, stringValue.ToString(), 0);
					}
				} else {
					if (!ulong.TryParse(digit, NumberStyles.Integer, null, out result)) {
						errors.Error(y, x, String.Format("Can't parse integral constant {0}", digit));
						return new Token(Tokens.Literal, x, y, stringValue.ToString(), 0);
					}
				}
				
				if (result > long.MaxValue) {
					islong     = true;
					isunsigned = true;
				} else if (result > uint.MaxValue) {
					islong = true;
				} else if (result > int.MaxValue) {
					isunsigned = true;
				}
				
				Token token;
				
				if (islong) {
					if (isunsigned) {
						ulong num;
						if (ulong.TryParse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number, CultureInfo.InvariantCulture, out num)) {
							token = new Token(Tokens.Literal, x, y, stringValue, num);
						} else {
							errors.Error(y, x, String.Format("Can't parse unsigned long {0}", digit));
							token = new Token(Tokens.Literal, x, y, stringValue, 0UL);
						}
					} else {
						long num;
						if (long.TryParse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number, CultureInfo.InvariantCulture, out num)) {
							token = new Token(Tokens.Literal, x, y, stringValue, num);
						} else {
							errors.Error(y, x, String.Format("Can't parse long {0}", digit));
							token = new Token(Tokens.Literal, x, y, stringValue, 0L);
						}
					}
				} else {
					if (isunsigned) {
						uint num;
						if (uint.TryParse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number, CultureInfo.InvariantCulture, out num)) {
							token = new Token(Tokens.Literal, x, y, stringValue, num);
						} else {
							errors.Error(y, x, String.Format("Can't parse unsigned int {0}", digit));
							token = new Token(Tokens.Literal, x, y, stringValue, (uint)0);
						}
					} else {
						int num;
						if (int.TryParse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number, CultureInfo.InvariantCulture, out num)) {
							token = new Token(Tokens.Literal, x, y, stringValue, num);
						} else {
							errors.Error(y, x, String.Format("Can't parse int {0}", digit));
							token = new Token(Tokens.Literal, x, y, stringValue, 0);
						}
					}
				}
				token.next = nextToken;
				return token;
			}
		}
		
		Token ReadString()
		{
			int x = Col - 1;
			int y = Line;
			
			sb.Length = 0;
			originalValue.Length = 0;
			originalValue.Append('"');
			bool doneNormally = false;
			int nextChar;
			while ((nextChar = ReaderRead()) != -1) {
				char ch = (char)nextChar;
				
				if (ch == '"') {
					doneNormally = true;
					originalValue.Append('"');
					break;
				}
				
				if (ch == '\\') {
					originalValue.Append('\\');
					string surrogatePair;
					originalValue.Append(ReadEscapeSequence(out ch, out surrogatePair));
					if (surrogatePair != null) {
						sb.Append(surrogatePair);
					} else {
						sb.Append(ch);
					}
				} else if (ch == '\n') {
					errors.Error(y, x, String.Format("No new line is allowed inside a string literal"));
					break;
				} else {
					originalValue.Append(ch);
					sb.Append(ch);
				}
			}
			
			if (!doneNormally) {
				errors.Error(y, x, String.Format("End of file reached inside string literal"));
			}
			
			return new Token(Tokens.Literal, x, y, originalValue.ToString(), sb.ToString());
		}
		
		Token ReadVerbatimString()
		{
			sb.Length            = 0;
			originalValue.Length = 0;
			originalValue.Append("@\"");
			int x = Col - 2; // @ and " already read
			int y = Line;
			int nextChar;
			while ((nextChar = ReaderRead()) != -1) {
				char ch = (char)nextChar;
				
				if (ch == '"') {
					if (ReaderPeek() != '"') {
						originalValue.Append('"');
						break;
					}
					originalValue.Append("\"\"");
					sb.Append('"');
					ReaderRead();
				} else if (HandleLineEnd(ch)) {
					sb.Append("\r\n");
					originalValue.Append("\r\n");
				} else {
					sb.Append(ch);
					originalValue.Append(ch);
				}
			}
			
			if (nextChar == -1) {
				errors.Error(y, x, String.Format("End of file reached inside verbatim string literal"));
			}
			
			return new Token(Tokens.Literal, x, y, originalValue.ToString(), sb.ToString());
		}
		
		char[] escapeSequenceBuffer = new char[12];
		
		/// <summary>
		/// reads an escape sequence
		/// </summary>
		/// <param name="ch">The character represented by the escape sequence,
		/// or '\0' if there was an error or the escape sequence represents a character that
		/// can be represented only be a suggorate pair</param>
		/// <param name="surrogatePair">Null, except when the character represented
		/// by the escape sequence can only be represented by a surrogate pair (then the string
		/// contains the surrogate pair)</param>
		/// <returns>The escape sequence</returns>
		string ReadEscapeSequence(out char ch, out string surrogatePair)
		{
			surrogatePair = null;
			
			int nextChar = ReaderRead();
lexer.cs - 源码说明

本页面展示了「根据cs源码解析为codedom」中的 lexer.cs 源码文件，采用 CS 编程语言编写，共 970 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与codedom相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?