lexer.cs

来自「全功能c#编译器」· CS 代码 · 共 767 行 · 第 1/2 页
767 行
// <file>
//     <copyright see="prj:///doc/copyright.txt"/>
//     <license see="prj:///doc/license.txt"/>
//     <owner name="Andrea Paatz" email="andrea@icsharpcode.net"/>
//     <version value="$version"/>
// </file>

using System;
using System.IO;
using System.Collections;
using System.Drawing;
using System.Diagnostics;
using System.Globalization;
using System.Text;
using ICSharpCode.CsVbRefactory.Parser;

namespace ICSharpCode.CsVbRefactory.Parser.CSharp
{
	public class Lexer : AbstractLexer
	{
		public Lexer(TextReader reader) : base(reader)
		{
		}
		
		protected override Token Next()
		{
			int nextChar;
			while ((nextChar = reader.Read()) != -1) {
				char ch = (char)nextChar;
				++col;
				
				if (Char.IsWhiteSpace(ch)) {
					++col;
					HandleLineEnd(ch);
					continue;
				}
				
				if (Char.IsLetter(ch) || ch == '_') {
					int x = col;
					int y = line;
					string s = ReadIdent(ch);
					int keyWordToken = Keywords.GetToken(s);
					if (keyWordToken >= 0) {
						return new Token(keyWordToken, x, y);
					}
					return new Token(Tokens.Identifier, x, y, s);
				}
				
				if (Char.IsDigit(ch)) {
					return ReadDigit(ch, col);
				}
				
				switch (ch) {
					case '/':
						int peek = reader.Peek();
						if (peek == '/' || peek == '*') {
							ReadComment();
							continue;
						}
						break;
					case '#':
						Point start = new Point(col, line);
						string directive = ReadIdent('#');
						string argument  = ReadToEOL();
//						this.specialTracker.AddPreProcessingDirective(directive, argument, start, new Point(start.X + directive.Length + argument.Length, start.Y));
						continue;
					case '"':
						return ReadString();
					case '\'':
						return ReadChar();
					case '@':
						int next = reader.Read();
						++col;
						if (next == -1) {
							errors.Error(line, col, String.Format("EOF after @"));
						} else {
							int x = col;
							int y = line;
							ch = (char)next;
							if (ch == '"') {
								return ReadVerbatimString();
							}
							if (Char.IsLetterOrDigit(ch)) {
								return new Token(Tokens.Identifier, x, y, ReadIdent(ch));
							}
							errors.Error(y, x, String.Format("Unexpected char in Lexer.Next() : {0}", ch));
						}
						break;
				}
				
				Token token = ReadOperator(ch);
				
				// try error recovery :)
				if (token == null) {
					return Next();
				}
				return token;
			}
			
			return new Token(Tokens.EOF, col, line, String.Empty);
		}
		
		// The C# compiler has a fixed size length therefore we'll use a fixed size char array for identifiers
		// it's also faster than using a string builder.
		const int MAX_IDENTIFIER_LENGTH = 512;
		char[] identBuffer = new char[MAX_IDENTIFIER_LENGTH];
		
		string ReadIdent(char ch)
		{
			int curPos     = 1;
			identBuffer[0] = ch;
			int peek;
			while ((peek = reader.Peek()) != -1 && (Char.IsLetterOrDigit(ch = (char)peek) || ch == '_')) {
				reader.Read();
				++col;
				
				if (curPos < MAX_IDENTIFIER_LENGTH) {
					identBuffer[curPos++] = ch;
				} else {
					errors.Error(line, col, String.Format("Identifier too long"));
					while ((peek = reader.Peek()) != -1 && (Char.IsLetterOrDigit(ch = (char)peek) || ch == '_')) {
						reader.Read();
						++col;
					}
					break;
				}
			}
			return new String(identBuffer, 0, curPos);
		}
		
		Token ReadDigit(char ch, int x)
		{
			int y = line;
			++col;
			sb.Length = 0;
			sb.Append(ch);
			string prefix = null;
			string suffix = null;
			
			bool ishex      = false;
			bool isunsigned = false;
			bool islong     = false;
			bool isfloat    = false;
			bool isdouble   = false;
			bool isdecimal  = false;
			
			char peek = (char)reader.Peek();
			
			if (ch == '.')  {
				isdouble = true;
				++col;
				
				while (Char.IsDigit((char)reader.Peek())) { // read decimal digits beyond the dot
					sb.Append((char)reader.Read());
					++col;
				}
				peek = (char)reader.Peek();
			} else if (ch == '0' && (peek == 'x' || peek == 'X')) {
				reader.Read(); // skip 'x'
				++col;
				while (IsHex((char)reader.Peek())) {
					sb.Append(Char.ToUpper((char)reader.Read()));
					++col;
				}
				ishex = true;
				prefix = "0x";
				peek = (char)reader.Peek();
			} else {
				while (Char.IsDigit((char)reader.Peek())) {
					sb.Append((char)reader.Read());
					++col;
				}
				peek = (char)reader.Peek();
			}
			if (peek == '.') { // read floating point number
				reader.Read();
				peek = (char)reader.Peek();
				if (!Char.IsDigit(peek)) {
					isdouble = true; // double is default
					// TODO: what happens with the '.' ?
				} else {
					isdouble = true; // double is default
					if (ishex) {
						errors.Error(y, x, String.Format("No hexadecimal floating point values allowed"));
					}
					sb.Append('.');
					
					
					++col;
					
					while (Char.IsDigit((char)reader.Peek())) { // read decimal digits beyond the dot
						sb.Append((char)reader.Read());
						++col;
					}
					peek = (char)reader.Peek();
				}
			}
			
			if (peek == 'e' || peek == 'E') { // read exponent
				isdouble = true;
				sb.Append((char)reader.Read());
				++col;
				peek = (char)reader.Peek();
				if (peek == '-' || peek == '+') {
					sb.Append((char)reader.Read());
					++col;
				}
				while (Char.IsDigit((char)reader.Peek())) { // read exponent value
					sb.Append((char)reader.Read());
					++col;
				}
				isunsigned = true;
				peek = (char)reader.Peek();
			}
			
			if (peek == 'f' || peek == 'F') { // float value
				reader.Read();
				suffix = "f";
				++col;
				isfloat = true;
			} else if (peek == 'm' || peek == 'M') { // double type suffix (obsolete, double is default)
				reader.Read();
				suffix = "m";
				++col;
				isdouble = true;
			} else if (peek == 'd' || peek == 'D') { // decimal value
				reader.Read();
				suffix = "d";
				++col;
				isdecimal = true;
			} else if (!isdouble) {
				if (peek == 'u' || peek == 'U') {
					reader.Read();
					suffix = "u";
					++col;
					isunsigned = true;
					peek = (char)reader.Peek();
				}
				
				if (peek == 'l' || peek == 'L') {
					reader.Read();
					peek = (char)reader.Peek();
					++col;
					islong = true;
					if (!isunsigned && (peek == 'u' || peek == 'U')) {
						reader.Read();
						suffix = "lu";
						++col;
						isunsigned = true;
					} else {
						suffix = isunsigned ? "ul" : "l";
					}
				}
			}
			
			string digit       = sb.ToString();
			string stringValue = prefix + digit + suffix;
			
			if (isfloat) {
				try {
					return new Token(Tokens.Literal, x, y, stringValue, Single.Parse(digit, CultureInfo.InvariantCulture));
				} catch (Exception) {
					errors.Error(y, x, String.Format("Can't parse float {0}", digit));
					return new Token(Tokens.Literal, x, y, stringValue, 0f);
				}
			}
			if (isdecimal) {
				try {
					return new Token(Tokens.Literal, x, y, stringValue, Decimal.Parse(digit, CultureInfo.InvariantCulture));
				} catch (Exception) {
					errors.Error(y, x, String.Format("Can't parse decimal {0}", digit));
					return new Token(Tokens.Literal, x, y, stringValue, 0m);
				}
			}
			if (isdouble) {
				try {
					return new Token(Tokens.Literal, x, y, stringValue, Double.Parse(digit, CultureInfo.InvariantCulture));
				} catch (Exception) {
					errors.Error(y, x, String.Format("Can't parse double {0}", digit));
					return new Token(Tokens.Literal, x, y, stringValue, 0d);
				}
			}
			if (islong) {
				if (isunsigned) {
					try {
						return new Token(Tokens.Literal, x, y, stringValue, UInt64.Parse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number));
					} catch (Exception) {
						errors.Error(y, x, String.Format("Can't parse unsigned long {0}", digit));
						return new Token(Tokens.Literal, x, y, stringValue, 0UL);
					}
				} else {
					try {
						return new Token(Tokens.Literal, x, y, stringValue, Int64.Parse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number));
					} catch (Exception) {
						errors.Error(y, x, String.Format("Can't parse long {0}", digit));
						return new Token(Tokens.Literal, x, y, stringValue, 0L);
					}
				}
			} else {
				if (isunsigned) {
					try {
						return new Token(Tokens.Literal, x, y, stringValue, UInt32.Parse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number));
					} catch (Exception) {
						errors.Error(y, x, String.Format("Can't parse unsigned int {0}", digit));
						return new Token(Tokens.Literal, x, y, stringValue, 0U);
					}
				} else {
					try {
						return new Token(Tokens.Literal, x, y, stringValue, Int32.Parse(digit, ishex ? NumberStyles.HexNumber : NumberStyles.Number));
					} catch (Exception) {
						errors.Error(y, x, String.Format("Can't parse int {0}", digit));
						return new Token(Tokens.Literal, x, y, stringValue, 0);
					}
				}
			}
		}
		
		Token ReadString()
		{
			int x = col;
			int y = line;
			
			sb.Length = 0;
			originalValue.Length = 0;
			originalValue.Append('"');
			bool doneNormally = false;
			int nextChar;
			while ((nextChar = reader.Read()) != -1) {
				char ch = (char)nextChar;
				++col;
				
				if (ch == '"') {
					doneNormally = true;
					originalValue.Append('"');
					break;
				}
				
				if (ch == '\\') {
					originalValue.Append('\\');
					originalValue.Append(ReadEscapeSequence(out ch));
					sb.Append(ch);
				} else if (ch == '\n') {
					errors.Error(y, x, String.Format("No new line is allowed inside a string literal"));
					break;
				} else {
					originalValue.Append(ch);
					sb.Append(ch);
				}
			}
			
			if (!doneNormally) {
				errors.Error(y, x, String.Format("End of file reached inside string literal"));
			}
			
			return new Token(Tokens.Literal, x, y, originalValue.ToString(), sb.ToString());
		}
		
		Token ReadVerbatimString()
		{
			int x = col;
			int y = line;
			int nextChar;
			sb.Length            = 0;
			originalValue.Length = 0;
			originalValue.Append("@\"");
			while ((nextChar = reader.Read()) != -1) {
				char ch = (char)nextChar;
				++col;
				
				if (ch == '"') {
					if (reader.Peek() != '"') {
						originalValue.Append('"');
						break;
					}
					originalValue.Append("\"\"");
					sb.Append('"');
					reader.Read();
				}
				if (HandleLineEnd(ch)) {
					sb.Append('\n');
					originalValue.Append('\n');
				} else {
					sb.Append(ch);
					originalValue.Append(ch);
lexer.cs - 源码说明

本页面展示了「全功能c#编译器」中的 lexer.cs 源码文件，采用 CS 编程语言编写，共 767 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与编译器相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?