📄 cs-tokenizer.cs
字号:
//// cs-tokenizer.cs: The Tokenizer for the C# compiler// This also implements the preprocessor//// Author: Miguel de Icaza (miguel@gnu.org)//// Licensed under the terms of the GNU GPL//// (C) 2001, 2002 Ximian, Inc (http://www.ximian.com)// (C) 2004 Novell, Inc///* * TODO: * Make sure we accept the proper Unicode ranges, per the spec. * Report error 1032*/using System;using System.Text;using System.Collections;using System.IO;using System.Globalization;using System.Reflection;namespace Mono.CSharp{ /// <summary> /// Tokenizer for C# source code. /// </summary> public class Tokenizer : yyParser.yyInput { SeekableStreamReader reader; SourceFile ref_name; SourceFile file_name; int ref_line = 1; int line = 1; int col = 0; int previous_col; int current_token; bool handle_get_set = false; bool handle_remove_add = false; bool handle_assembly = false; Location current_location; Location current_comment_location = Location.Null; ArrayList escapedIdentifiers = new ArrayList (); // // XML documentation buffer. The save point is used to divide // comments on types and comments on members. // StringBuilder xml_comment_buffer; // // See comment on XmlCommentState enumeration. // XmlCommentState xmlDocState = XmlCommentState.Allowed; // // Whether tokens have been seen on this line // bool tokens_seen = false; // // Whether a token has been seen on the file // This is needed because `define' is not allowed to be used // after a token has been seen. // bool any_token_seen = false; static Hashtable tokenValues; private static Hashtable TokenValueName { get { if (tokenValues == null) tokenValues = GetTokenValueNameHash (); return tokenValues; } } private static Hashtable GetTokenValueNameHash () { Type t = typeof (Token); FieldInfo [] fields = t.GetFields (); Hashtable hash = new Hashtable (); foreach (FieldInfo field in fields) { if (field.IsLiteral && field.IsStatic && field.FieldType == typeof (int)) hash.Add (field.GetValue (null), field.Name); } return hash; } // // Returns a verbose representation of the current location // public string location { get { string det; if (current_token == Token.ERROR) det = "detail: " + error_details; else det = ""; // return "Line: "+line+" Col: "+col + "\n" + // "VirtLine: "+ref_line + // " Token: "+current_token + " " + det; string current_token_name = TokenValueName [current_token] as string; if (current_token_name == null) current_token_name = current_token.ToString (); return String.Format ("{0} ({1},{2}), Token: {3} {4}", ref_name.Name, ref_line, col, current_token_name, det); } } public bool PropertyParsing { get { return handle_get_set; } set { handle_get_set = value; } } public bool AssemblyTargetParsing { get { return handle_assembly; } set { handle_assembly = value; } } public bool EventParsing { get { return handle_remove_add; } set { handle_remove_add = value; } } public XmlCommentState doc_state { get { return xmlDocState; } set { if (value == XmlCommentState.Allowed) { check_incorrect_doc_comment (); reset_doc_comment (); } xmlDocState = value; } } public bool IsEscapedIdentifier (Location loc) { foreach (LocatedToken lt in escapedIdentifiers) if (lt.Location.Equals (loc)) return true; return false; } // // Class variables // static CharArrayHashtable[] keywords; static Hashtable keywordStrings = new Hashtable (); static NumberStyles styles; static NumberFormatInfo csharp_format_info; // // Values for the associated token returned // int putback_char; Object val; // // Pre-processor // Hashtable defines; const int TAKING = 1; const int TAKEN_BEFORE = 2; const int ELSE_SEEN = 4; const int PARENT_TAKING = 8; const int REGION = 16; // // pre-processor if stack state: // Stack ifstack; static System.Text.StringBuilder string_builder; const int max_id_size = 512; static char [] id_builder = new char [max_id_size]; static CharArrayHashtable [] identifiers = new CharArrayHashtable [max_id_size + 1]; const int max_number_size = 512; static char [] number_builder = new char [max_number_size]; static int number_pos; // // Details about the error encoutered by the tokenizer // string error_details; public string error { get { return error_details; } } public int Line { get { return ref_line; } } public int Col { get { return col; } } static void AddKeyword (string kw, int token) { keywordStrings.Add (kw, kw); if (keywords [kw.Length] == null) { keywords [kw.Length] = new CharArrayHashtable (kw.Length); } keywords [kw.Length] [kw.ToCharArray ()] = token; } static void InitTokens () { keywords = new CharArrayHashtable [64]; AddKeyword ("__arglist", Token.ARGLIST); AddKeyword ("abstract", Token.ABSTRACT); AddKeyword ("as", Token.AS); AddKeyword ("add", Token.ADD); AddKeyword ("assembly", Token.ASSEMBLY); AddKeyword ("base", Token.BASE); AddKeyword ("bool", Token.BOOL); AddKeyword ("break", Token.BREAK); AddKeyword ("byte", Token.BYTE); AddKeyword ("case", Token.CASE); AddKeyword ("catch", Token.CATCH); AddKeyword ("char", Token.CHAR); AddKeyword ("checked", Token.CHECKED); AddKeyword ("class", Token.CLASS); AddKeyword ("const", Token.CONST); AddKeyword ("continue", Token.CONTINUE); AddKeyword ("decimal", Token.DECIMAL); AddKeyword ("default", Token.DEFAULT); AddKeyword ("delegate", Token.DELEGATE); AddKeyword ("do", Token.DO); AddKeyword ("double", Token.DOUBLE); AddKeyword ("else", Token.ELSE); AddKeyword ("enum", Token.ENUM); AddKeyword ("event", Token.EVENT); AddKeyword ("explicit", Token.EXPLICIT); AddKeyword ("extern", Token.EXTERN); AddKeyword ("false", Token.FALSE); AddKeyword ("finally", Token.FINALLY); AddKeyword ("fixed", Token.FIXED); AddKeyword ("float", Token.FLOAT); AddKeyword ("for", Token.FOR); AddKeyword ("foreach", Token.FOREACH); AddKeyword ("goto", Token.GOTO); AddKeyword ("get", Token.GET); AddKeyword ("if", Token.IF); AddKeyword ("implicit", Token.IMPLICIT); AddKeyword ("in", Token.IN); AddKeyword ("int", Token.INT); AddKeyword ("interface", Token.INTERFACE); AddKeyword ("internal", Token.INTERNAL); AddKeyword ("is", Token.IS); AddKeyword ("lock", Token.LOCK); AddKeyword ("long", Token.LONG); AddKeyword ("namespace", Token.NAMESPACE); AddKeyword ("new", Token.NEW); AddKeyword ("null", Token.NULL); AddKeyword ("object", Token.OBJECT); AddKeyword ("operator", Token.OPERATOR); AddKeyword ("out", Token.OUT); AddKeyword ("override", Token.OVERRIDE); AddKeyword ("params", Token.PARAMS); AddKeyword ("private", Token.PRIVATE); AddKeyword ("protected", Token.PROTECTED); AddKeyword ("public", Token.PUBLIC); AddKeyword ("readonly", Token.READONLY); AddKeyword ("ref", Token.REF); AddKeyword ("remove", Token.REMOVE); AddKeyword ("return", Token.RETURN); AddKeyword ("sbyte", Token.SBYTE); AddKeyword ("sealed", Token.SEALED); AddKeyword ("set", Token.SET); AddKeyword ("short", Token.SHORT); AddKeyword ("sizeof", Token.SIZEOF); AddKeyword ("stackalloc", Token.STACKALLOC); AddKeyword ("static", Token.STATIC); AddKeyword ("string", Token.STRING); AddKeyword ("struct", Token.STRUCT); AddKeyword ("switch", Token.SWITCH); AddKeyword ("this", Token.THIS); AddKeyword ("throw", Token.THROW); AddKeyword ("true", Token.TRUE); AddKeyword ("try", Token.TRY); AddKeyword ("typeof", Token.TYPEOF); AddKeyword ("uint", Token.UINT); AddKeyword ("ulong", Token.ULONG); AddKeyword ("unchecked", Token.UNCHECKED); AddKeyword ("unsafe", Token.UNSAFE); AddKeyword ("ushort", Token.USHORT); AddKeyword ("using", Token.USING); AddKeyword ("virtual", Token.VIRTUAL); AddKeyword ("void", Token.VOID); AddKeyword ("volatile", Token.VOLATILE); AddKeyword ("while", Token.WHILE); AddKeyword ("partial", Token.PARTIAL); } // // Class initializer // static Tokenizer () { InitTokens (); csharp_format_info = NumberFormatInfo.InvariantInfo; styles = NumberStyles.Float; string_builder = new System.Text.StringBuilder (); } int GetKeyword (char[] id, int id_len) { /* * Keywords are stored in an array of hashtables grouped by their * length. */ if ((id_len >= keywords.Length) || (keywords [id_len] == null)) return -1; object o = keywords [id_len] [id]; if (o == null) return -1; int res = (int) o; if (handle_get_set == false && (res == Token.GET || res == Token.SET)) return -1; if (handle_remove_add == false && (res == Token.REMOVE || res == Token.ADD)) return -1; if (handle_assembly == false && res == Token.ASSEMBLY) return -1; return res; } public Location Location { get { return current_location; } } void define (string def) { if (!RootContext.AllDefines.Contains (def)){ RootContext.AllDefines [def] = true; } if (defines.Contains (def)) return; defines [def] = true; } public Tokenizer (SeekableStreamReader input, SourceFile file, ArrayList defs) { this.ref_name = file; this.file_name = file; reader = input; putback_char = -1; if (defs != null){ defines = new Hashtable (); foreach (string def in defs) define (def); } xml_comment_buffer = new StringBuilder (); // // FIXME: This could be `Location.Push' but we have to // find out why the MS compiler allows this // Mono.CSharp.Location.Push (file, 0); } static bool is_identifier_start_character (char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' || Char.IsLetter (c); } static bool is_identifier_part_character (char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9') || Char.IsLetter (c); } public static bool IsKeyword (string s) { return keywordStrings [s] != null; } public static bool IsValidIdentifier (string s) { if (s == null || s.Length == 0) return false; if (!is_identifier_start_character (s [0])) return false; for (int i = 1; i < s.Length; i ++) if (! is_identifier_part_character (s [i])) return false; return true; } int is_punct (char c, ref bool doread) { int d; int t; doread = false; switch (c){ case '{': val = Location; return Token.OPEN_BRACE; case '}': val = Location; return Token.CLOSE_BRACE; case '[': // To block doccomment inside attribute declaration. if (doc_state == XmlCommentState.Allowed) doc_state = XmlCommentState.NotAllowed; return Token.OPEN_BRACKET; case ']': return Token.CLOSE_BRACKET; case '(': return Token.OPEN_PARENS; case ')': { if (deambiguate_close_parens == 0) return Token.CLOSE_PARENS; --deambiguate_close_parens; // Save current position and parse next token. int old = reader.Position; int old_ref_line = ref_line; int old_col = col; // disable preprocessing directives when peeking process_directives = false; int new_token = token (); process_directives = true; reader.Position = old; ref_line = old_ref_line; col = old_col; putback_char = -1; if (new_token == Token.OPEN_PARENS) return Token.CLOSE_PARENS_OPEN_PARENS; else if (new_token == Token.MINUS) return Token.CLOSE_PARENS_MINUS; else if (IsCastToken (new_token)) return Token.CLOSE_PARENS_CAST; else return Token.CLOSE_PARENS_NO_CAST; } case ',': return Token.COMMA; case ';': val = Location; return Token.SEMICOLON; case '~': val = Location; return Token.TILDE; case '?': return Token.INTERR; } d = peekChar (); if (c == '+'){ if (d == '+') { val = Location; t = Token.OP_INC; } else if (d == '=') t = Token.OP_ADD_ASSIGN; else { val = Location; return Token.PLUS; } doread = true; return t; } if (c == '-'){ if (d == '-') { val = Location; t = Token.OP_DEC; } else if (d == '=') t = Token.OP_SUB_ASSIGN; else if (d == '>') t = Token.OP_PTR; else { val = Location; return Token.MINUS; } doread = true; return t; } if (c == '!'){ if (d == '='){ doread = true; return Token.OP_NE; } val = Location; return Token.BANG; } if (c == '='){ if (d == '='){ doread = true; return Token.OP_EQ; } return Token.ASSIGN; } if (c == '&'){ if (d == '&'){ doread = true; return Token.OP_AND; } else if (d == '='){ doread = true; return Token.OP_AND_ASSIGN; } val = Location; return Token.BITWISE_AND; } if (c == '|'){ if (d == '|'){ doread = true; return Token.OP_OR; } else if (d == '='){ doread = true; return Token.OP_OR_ASSIGN; } return Token.BITWISE_OR; } if (c == '*'){ if (d == '='){ doread = true; return Token.OP_MULT_ASSIGN; } val = Location;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -