parser.rb
来自「Harvestman-最新版本」· RB 代码 · 共 1,205 行 · 第 1/4 页
RB
1,205 行
$tokens = [ # End of source. "END", # Operators and punctuators. Some pair-wise order matters, e.g. (+, -) # and (UNARY_PLUS, UNARY_MINUS). "\n", ";", ",", "=", "?", ":", "CONDITIONAL", "||", "&&", "|", "^", "&", "==", "!=", "===", "!==", "<", "<=", ">=", ">", "<<", ">>", ">>>", "+", "-", "*", "/", "%", "!", "~", "UNARY_PLUS", "UNARY_MINUS", "++", "--", ".", "[", "]", "{", "}", "(", ")", # Nonterminal tree node type codes. "SCRIPT", "BLOCK", "LABEL", "FOR_IN", "CALL", "NEW_WITH_ARGS", "INDEX", "ARRAY_INIT", "OBJECT_INIT", "PROPERTY_INIT", "GETTER", "SETTER", "GROUP", "LIST", # Terminals. "IDENTIFIER", "NUMBER", "STRING", "REGEXP", # Keywords. "break", "case", "catch", "const", "continue", "debugger", "default", "delete", "do", "else", "enum", "false", "finally", "for", "function", "if", "in", "instanceof", "new", "null", "return", "switch", "this", "throw", "true", "try", "typeof", "var", "void", "while", "with",]# Operator and punctuator mapping from token to tree node type name.$opTypeNames = { "\n" => "NEWLINE", ';' => "SEMICOLON", ',' => "COMMA", '?' => "HOOK", ':' => "COLON", '||' => "OR", '&&' => "AND", '|' => "BITWISE_OR", '^' => "BITWISE_XOR", '&' => "BITWISE_AND", '===' => "STRICT_EQ", '==' => "EQ", '=' => "ASSIGN", '!==' => "STRICT_NE", '!=' => "NE", '<<' => "LSH", '<=' => "LE", '<' => "LT", '>>>' => "URSH", '>>' => "RSH", '>=' => "GE", '>' => "GT", '++' => "INCREMENT", '--' => "DECREMENT", '+' => "PLUS", '-' => "MINUS", '*' => "MUL", '/' => "DIV", '%' => "MOD", '!' => "NOT", '~' => "BITWISE_NOT", '.' => "DOT", '[' => "LEFT_BRACKET", ']' => "RIGHT_BRACKET", '{' => "LEFT_CURLY", '}' => "RIGHT_CURLY", '(' => "LEFT_PAREN", ')' => "RIGHT_PAREN"}# Hash of keyword identifier to tokens index.$keywords = {}# Define const END, etc., based on the token names. Also map name to index.$consts = {}$tokens.length.times do |i| t = $tokens[i] if /\A[a-z]/ =~ t $consts[t.upcase] = i $keywords[t] = i elsif /\A\W/ =~ t $consts[$opTypeNames[t]] = i else $consts[t] = i endend# Map assignment operators to their indexes in the tokens array.$assignOps = ['|', '^', '&', '<<', '>>', '>>>', '+', '-', '*', '/', '%']$assignOpsHash = {}$assignOps.length.times do |i| t = $assignOps[i] $assignOpsHash[t] = $consts[$opTypeNames[t]]end$opPrecedence = { "SEMICOLON" => 0, "COMMA" => 1, "ASSIGN" => 2, "HOOK" => 3, "COLON" => 3, "CONDITIONAL" => 3, "OR" => 4, "AND" => 5, "BITWISE_OR" => 6, "BITWISE_XOR" => 7, "BITWISE_AND" => 8, "EQ" => 9, "NE" => 9, "STRICT_EQ" => 9, "STRICT_NE" => 9, "LT" => 10, "LE" => 10, "GE" => 10, "GT" => 10, "IN" => 10, "INSTANCEOF" => 10, "LSH" => 11, "RSH" => 11, "URSH" => 11, "PLUS" => 12, "MINUS" => 12, "MUL" => 13, "DIV" => 13, "MOD" => 13, "DELETE" => 14, "VOID" => 14, "TYPEOF" => 14, # PRE_INCREMENT: 14, PRE_DECREMENT: 14, "NOT" => 14, "BITWISE_NOT" => 14, "UNARY_PLUS" => 14, "UNARY_MINUS" => 14, "INCREMENT" => 15, "DECREMENT" => 15, # postfix "NEW" => 16, "DOT" => 17}# Map operator type code to precedence.$opPrecedence.keys.each do |i| $opPrecedence[$consts[i]] = $opPrecedence[i]end$opArity = { "COMMA" => -2, "ASSIGN" => 2, "CONDITIONAL" => 3, "OR" => 2, "AND" => 2, "BITWISE_OR" => 2, "BITWISE_XOR" => 2, "BITWISE_AND" => 2, "EQ" => 2, "NE" => 2, "STRICT_EQ" => 2, "STRICT_NE" => 2, "LT" => 2, "LE" => 2, "GE" => 2, "GT" => 2, "IN" => 2, "INSTANCEOF" => 2, "LSH" => 2, "RSH" => 2, "URSH" => 2, "PLUS" => 2, "MINUS" => 2, "MUL" => 2, "DIV" => 2, "MOD" => 2, "DELETE" => 1, "VOID" => 1, "TYPEOF" => 1, # PRE_INCREMENT: 1, PRE_DECREMENT: 1, "NOT" => 1, "BITWISE_NOT" => 1, "UNARY_PLUS" => 1, "UNARY_MINUS" => 1, "INCREMENT" => 1, "DECREMENT" => 1, # postfix "NEW" => 1, "NEW_WITH_ARGS" => 2, "DOT" => 2, "INDEX" => 2, "CALL" => 2, "ARRAY_INIT" => 1, "OBJECT_INIT" => 1, "GROUP" => 1}# Map operator type code to arity.$opArity.keys.each do |i| $opArity[$consts[i]] = $opArity[i]end# NB: superstring tokens (e.g., ++) must come before their substring token# counterparts (+ in the example), so that the $opRegExp regular expression# synthesized from this list makes the longest possible match.$ops = [';', ',', '?', ':', '||', '&&', '|', '^', '&', '===', '==', '=', '!==', '!=', '<<', '<=', '<', '>>>', '>>', '>=', '>', '++', '--', '+', '-', '*', '/', '%', '!', '~', '.', '[', ']', '{', '}', '(', ')']# Build a regexp that recognizes operators and punctuators (except newline).$opRegExpSrc = "\\A"$ops.length.times do |i| $opRegExpSrc += "|\\A" if $opRegExpSrc != "\\A" $opRegExpSrc += $ops[i].gsub(/([?|^&(){}\[\]+\-*\/\.])/) {|s| "\\" + s}end$opRegExp = Regexp.new($opRegExpSrc, Regexp::MULTILINE)# A regexp to match floating point literals (but not integer literals).$fpRegExp = Regexp.new("\\A\\d+\\.\\d*(?:[eE][-+]?\\d+)?|\\A\\d+(?:\\.\\d*)?[eE][-+]?\\d+|\\A\\.\\d+(?:[eE][-+]?\\d+)?", Regexp::MULTILINE)class Tokenizer attr_accessor :cursor, :source, :tokens, :tokenIndex, :lookahead attr_accessor :scanNewlines, :scanOperand, :filename, :lineno def initialize (source, filename, line) @cursor = 0 @source = source.to_s @tokens = [] @tokenIndex = 0 @lookahead = 0 @scanNewlines = false @scanOperand = true @filename = filename or "" @lineno = line or 1 end def input return @source.slice(@cursor, @source.length - @cursor) end def done return self.peek == $consts["END"]; end def token return @tokens[@tokenIndex]; end def match (tt) puts "Calling match of " + tt.to_s got = self.get puts "GOOT " + got.to_s + " " + tt.to_s #puts got #puts tt return got == tt || self.unget end def mustMatch (tt) print "Calling mustMatch " + tt.to_s raise SyntaxError.new("Missing " + $tokens[tt].downcase, self) unless self.match(tt) return self.token end def peek if @lookahead > 0 #tt = @tokens[(@tokenIndex + @lookahead)].type tt = @tokens[(@tokenIndex + @lookahead) & 3].type else tt = self.get self.unget end return tt end def peekOnSameLine @scanNewlines = true; tt = self.peek @scanNewlines = false; return tt end def get while @lookahead > 0 @lookahead -= 1 @tokenIndex = (@tokenIndex + 1) & 3 token = @tokens[@tokenIndex] return token.type if token.type != $consts["NEWLINE"] || @scanNewlines end while true input = self.input puts "Input => " + input if @scanNewlines puts "Scannewlines is true" match = /\A[ \t]+/.match(input) else match = /\A\s+/.match(input) end if match puts "A MATCH FOUND!" spaces = match[0] puts "Spaces => " + spaces.length.to_s @cursor += spaces.length puts "Newline count => " + spaces.count("\n").to_s @lineno += spaces.count("\n") input = self.input end puts "Input=> " + input + " " + input.length.to_s match = /\A\/(?:\*(?:.)*?\*\/|\/[^\n]*)/m.match(input) if !match: puts "BREAKING" break end puts "Cursor=> " + @cursor.to_s comment = match[0] puts "Comment => " + comment @cursor += comment.length puts "Cursor=> " + @cursor.to_s puts 'Comment length => ' + comment.length.to_s puts "Comment newline count => " + comment.count("\n").to_s @lineno += comment.count("\n") end #puts input
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?