newscanners.scala
来自「JAVA 语言的函数式编程扩展」· SCALA 代码 · 共 896 行 · 第 1/2 页
SCALA
896 行
/* NSC -- new Scala compiler * Copyright 2005-2007 LAMP/EPFL * @author Sean McDirmid */// $Id: NewScanners.scala 14416 2008-03-19 01:17:25Z mihaylov $package scala.tools.nsc.ast.parserimport scala.tools.nsc.util.SourceFile._import scala.tools.nsc.util._trait NewScanners { val global : Global import global._ import Tokens._ trait CoreScannerInput extends BufferedIterator[Char] { private[NewScanners] val scratch = new StringBuilder def readIfStartsWith(c : Char) : Boolean = if (head == c) { next; true } else false def readIfStartsWith(c0 : Char, c1 : Char) : Boolean = if (head == c0 && peek(1) == c1) { next; next; true } else false def startsWith(c0: Char, c1 : Char) : Boolean = head == c0 && peek(1) == c1 def isUnicode : Boolean def peek(idx : Int) : Char def offset : Int def error(offset : Int, msg : String) : Unit def incompleteError(offset : Int, msg : String) : Unit = error(offset, msg) def textFor(from : Int, until : Int) : RandomAccessSeq[Char] } trait ScannerInput extends CoreScannerInput { def seek(offset : Int) : Unit } class DefaultInput(in : NewCharArrayReader) extends ScannerInput { import scala.collection.mutable._ def seek(offset : Int) = in.seek(offset) def offset = in.offset def head = peek(0) def next = in.next def isUnicode : Boolean = in.isUnicode def hasNext = in.hasNext def peek(idx : Int) = { val offset = in.offset var jdx = idx var result = in.next while (jdx > 0) { jdx = jdx - 1 result =in.next } in.seek(offset) // jump back to old position result } def error(offset : Int, msg : String) : Unit = {} def textFor(from : Int, until : Int) = in.buf.slice(from, until).mkString } abstract class ParserScanner extends BaseScanner { def init = nextToken private var doc : String = "" var sepRegions : List[Int] = Nil private val current = new TokenHolder private val next = new TokenHolder implicit def in : ScannerInput var lastCode = EMPTY next.code = EMPTY current.code = EMPTY def hasNext = in.hasNext || (next.code != EMPTY && next.code != EOF) def flush : ScannerInput = { assert(current.code != EMPTY) in.seek(unadjust(current.offset)) current.code = EMPTY next.code = EMPTY in } def seek(offset : Int, lastCode : Int) = { assert(current.code == EMPTY) in.seek(unadjust(offset)) this.lastCode = lastCode nextToken } def resume(lastCode : Int) = { assert(current.code == EMPTY) this.lastCode = lastCode nextToken } /** read next token and return last position */ def skipToken: Int = { val p = current.offset; nextToken // XXX: account for off by one error //??? p } def currentPos = { assert(current.code != EMPTY) current.offset } def fillNext : Boolean = { assert(next.code == EMPTY) var hasNewline = false do { fill(next) } while (next.code match { case NEWLINE|NEWLINES|WHITESPACE|COMMENT => assert((next.code != COMMENT) == (xmlOk)) hasNewline = hasNewline || next.code == NEWLINE || next.code == NEWLINES if (next.code == COMMENT) doc = next.value.asInstanceOf[Option[String]].getOrElse("") true case _ => false }) hasNewline } def flushDoc = { val ret = doc doc = "" ret } def nextToken : Unit = { if (current.code == EOF) return // nothing more. var lastIsComment = false lastCode = current.code match { case WHITESPACE|EMPTY => lastCode case COMMENT => lastIsComment = true; lastCode case code => code } // push on braces val pushOn = (current.code) match { case LBRACE => RBRACE case LPAREN => RPAREN case LBRACKET => RBRACKET case CASE => assert(true) ARROW case RBRACE => while (!sepRegions.isEmpty && sepRegions.head != RBRACE) sepRegions = sepRegions.tail if (!sepRegions.isEmpty) sepRegions = sepRegions.tail EMPTY case code @ (ARROW) if (!sepRegions.isEmpty && sepRegions.head == code) => sepRegions = sepRegions.tail EMPTY case ARROW => assert(true) EMPTY case code @ (RPAREN|RBRACKET) => if (!sepRegions.isEmpty && sepRegions.head == code) sepRegions = sepRegions.tail EMPTY case _ => EMPTY } if (pushOn != EMPTY) sepRegions = pushOn :: sepRegions if (next.code != EMPTY) { current.copy(next) next.code = EMPTY } else fill(current) def currentIsNext : Unit = { assert(next.code != EMPTY) return nextToken } current.code match { case CASE|SEMI => fillNext (current.code,next.code) match { case (CASE,OBJECT) => assert(true) current.code = CASEOBJECT; next.code = EMPTY case (CASE, CLASS) => current.code = CASECLASS ; next.code = EMPTY case (SEMI, ELSE ) => currentIsNext case _ => } case WHITESPACE|COMMENT => if (current.code == COMMENT) doc = current.value.asInstanceOf[Option[String]].getOrElse("") nextToken case NEWLINE | NEWLINES => assert(xmlOk) val headIsRBRACE = if (sepRegions.isEmpty) true else sepRegions.head == RBRACE val hasNewline = fillNext if (headIsRBRACE && ((inLastOfStat(lastCode) && inFirstOfStat(next.code)) /* This need to be commented out, otherwise line continuation in the interpreter will not work XXX: not sure how the IDE reacts with this commented out. || next.code == EOF */ )) { //if (hasNewline) current.code = NEWLINES } else { currentIsNext } case _ => } } def token = { assert(current.code != EMPTY) current.code } def nextTokenCode = { if (next.code == EMPTY) fillNext next.code } def name = current.value.get.asInstanceOf[Name] def charVal = current.value.get.asInstanceOf[Char] def intVal(negated : Boolean) : Long = { val base = current.value.asInstanceOf[Option[Int]].getOrElse(10) intVal(current.offset, current.code, current.nLit(this), negated, base) } def intVal : Long = intVal(false) def floatVal(negated: Boolean): Double = { floatVal(current.offset, current.code, current.nLit(this), negated) } def floatVal : Double = floatVal(false) def stringVal = current.value.get.asInstanceOf[String] } class TokenHolder { var offset : Int = 0 var code : Int = 0 var length : Int = 0 var value : Option[Any] = None def copy(from : TokenHolder) = { this.offset = from.offset this.code = from.code this.length = from.length this.value = from.value } def set(offset : Int, length : Int, code : Int) = { this.offset = offset; this.length = length; this.code = code; this.value = None } def set(offset : Int, length : Int, code : Int, value : Any) = { this.offset = offset; this.length = length; this.code = code; this.value = Some(value) } def nLit(implicit in : BaseScanner) = (in.in.textFor(in.unadjust(offset), in.unadjust(offset + length))) } trait BaseScanner { implicit def in : CoreScannerInput ScannerConfiguration.hashCode // forces initialization import ScannerConfiguration._ var xmlOk = true def iterator = new Iterator[(Int,Int,Int)] { // offset,length,code val current = new TokenHolder def hasNext = in.hasNext def next = { fill(current) (current.offset, current.length, current.code) } } // IDE hooks def adjust(offset : Int) = offset def unadjust(offset : Int) = offset def identifier(name : Name) = name protected def fill(current : TokenHolder) : Unit = { if (!in.hasNext) { current.offset = adjust(in.offset) current.code = EOF return } val oldXmlOk = xmlOk xmlOk = false val offset = in.offset // call "after" next def escapeCode(offset : Int) : Char = in.next match { case c if simpleEscape.isDefinedAt(c) => simpleEscape(c) case c if isDigit(c) => val length = in.scratch.length try { assert(isDigit(c)) in.scratch append c while (isDigit(in.head)) in.scratch append in.next val n = Integer.parseInt(in.scratch.drop(length).mkString, 8) if (n > 0377) { in.error(offset, "malformed octal character code"); 0.toChar } else n.toChar } catch { case ex : Exception => in.error(offset, "malformed octal character code"); 0.toChar } finally { in.scratch.setLength(length) } case c => in.error(offset, "unrecognized escape code \'" + c + "\'"); c } def getIdentRest : Unit = in.readIf{ case '_' => in.scratch append '_' val c = in.head if (isOperatorPart(c)) getOperatorRest else getIdentRest case c if isIdentifierPart(c) => in.scratch append c; getIdentRest } val next = in.next // called after everything is read. def length = in.offset - offset def value(code : Int, value : Any) : Int = { current.value = Some(value) code } def doOperator(c : Char) = { in.scratch.setLength(0) in.scratch append(c) getOperatorRest val name : Name = global.newTermName(in.scratch.toString) value(name2token(name), (name)) } current.offset = adjust(offset) current.value = None current.code = next match { case ';' => (SEMI) case ',' => (COMMA) case '(' => xmlOk = true; (LPAREN) case ')' => (RPAREN) case '{' => xmlOk = true; (LBRACE) case '}' => (RBRACE) case '[' => (LBRACKET) case ']' => (RBRACKET) case SU => EOF case '\u21D2' => (ARROW) case '<' => if (oldXmlOk && (in.head match { case ('!' | '?') => true case c if xml.Parsing.isNameStart(c) => true case _ => false })) { in.next; XMLSTART } else doOperator('<') case ' ' | '\t' => in.readWhile(isSpace); xmlOk = true; (WHITESPACE) case '/' => if (in.readIfStartsWith('/')) { while (in.hasNext && !isNewLine(in.head)) in.next (COMMENT) } else if (in.readIfStartsWith('*')) { val emptyOrDoc = in.readIfStartsWith('*') val empty = emptyOrDoc && in.readIfStartsWith('/') val isDoc = emptyOrDoc && !empty if (isDoc) in.scratch setLength 0 var count = 0 if (!empty) while (count != -1) in.next match { case SU => in.incompleteError(offset, "unterminated comment"); count = -1 case '*' if in.readIfStartsWith('/') => count -= 1 case '/' if in.readIfStartsWith('*') => count += 1 case c => if (isDoc) in.scratch append c } if (!isDoc) (COMMENT) else value(COMMENT, in.scratch.toString) } else doOperator('/') case c @ ('~' | '!' | '@' | '#' | '%' | '^' | '*' | '+' | '-' | /* '<' | | '/' */ '>' | '?' | ':' | '=' | '&' | '|' | '\\') => doOperator(c) case c @ ('A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '$' | '_' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z') => in.scratch.setLength(0) in.scratch.append(c : Char) getIdentRest val name = global.newTermName(in.scratch.toString) in.scratch.setLength(0) val code = name2token(name) if (code == IDENTIFIER) value(code, identifier(name)) else value(code, (name)) case '0' => if (in.head match { case 'x' | 'X' => true case _ => false }) { in.next; value(getNumber(offset, 16, "0x"), 16) } else value(getNumber(offset, 8, "0"), 8) case '1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' => getNumber(offset, 10, "") case '.' => val frac = getFraction(false) val code = (frac getOrElse DOT) code case '\'' => def endQ(cvalue : Char) : Int = { if (!in.readIfStartsWith('\'')) { in.error(offset, "missing terminating quote") } value(CHARLIT, cvalue) } in.next match { case CR|LF|FF|SU|EOF if !in.isUnicode => in.error(offset, "unterminated character literal") value(CHARLIT, 0.toChar) case '\'' => in.error(offset, "empty character literal") value(CHARLIT, 0.toChar) case '\\' => endQ(escapeCode(offset)) case c if (Character.isUnicodeIdentifierStart(c)) && in.head != '\'' => in.scratch.setLength(0) in.scratch append c getIdentRest if (in.readIfStartsWith('\'')) in.error(offset, "unexpected quote after symbol") value(SYMBOLLIT, in.scratch.toString) case c if isSpecial(c) && in.head != '\'' => in.scratch.setLength(0) in.scratch append(c) getOperatorRest if (in.readIfStartsWith('\'')) in.error(offset, "unexpected quote after symbol") value(SYMBOLLIT, in.scratch.toString) case c => endQ(c) } case '\"' => if (in.readIfStartsWith('\"')) { if (in.readIfStartsWith('\"')) { // multiline in.scratch setLength 0 while (in.next match { case SU if !in.isUnicode => in.incompleteError(offset, "unterminated multi-line string"); false case '\"' if in.readIfStartsWith('\"') => if (in.readIfStartsWith('\"')) false else { in.scratch append "\"\""; true } case '\\' if false => // XXX: not for multi-line strings? in.scratch append escapeCode(in.offset - 1) true case c => in.scratch append c; true }) {} val ret = value(STRINGLIT, in.scratch.toString) in.scratch setLength 0 ret } else value(STRINGLIT, "") } else { in.scratch setLength 0 while (in.next match { case '\"' => false case CR|LF|FF|SU if !in.isUnicode => in.error(offset, "unterminated string"); false case '\\' =>
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?