markupparser.scala
来自「JAVA 语言的函数式编程扩展」· SCALA 代码 · 共 1,216 行 · 第 1/3 页
SCALA
1,216 行
* see [66] */ def xCharRef(ch: () => Char, nextch: () => Unit): String = { Utility.parseCharRef(ch, nextch, reportSyntaxError _) /* val hex = (ch() == 'x') && { nextch(); true }; val base = if (hex) 16 else 10; var i = 0; while (ch() != ';') { ch() match { case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => i = i * base + Character.digit( ch(), base ); case 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' => if (! hex) reportSyntaxError("hex char not allowed in decimal char ref\n" +"Did you mean to write &#x ?"); else i = i * base + Character.digit(ch(), base); case _ => reportSyntaxError("character '" + ch() + " not allowed in char ref\n"); } nextch(); } new String(Array(i.asInstanceOf[char])) */ } /** Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' * * see [15] */ def xComment: NodeSeq = { val sb: StringBuilder = new StringBuilder() xToken('-') xToken('-') while (true) { if (ch == '-' && { sb.append(ch); nextch; ch == '-' }) { sb.length = sb.length - 1 nextch xToken('>') return handle.comment(pos, sb.toString()) } else sb.append(ch) nextch } throw FatalError("this cannot happen") } /* todo: move this into the NodeBuilder class */ def appendText(pos: Int, ts: NodeBuffer, txt: String): Unit = { if (preserveWS) ts &+ handle.text(pos, txt); else for (t <- TextBuffer.fromString(txt).toText) { ts &+ handle.text(pos, t.text); } } /** '<' content1 ::= ... */ def content1(pscope: NamespaceBinding, ts: NodeBuffer): Unit = ch match { case '!' => nextch if ('[' == ch) // CDATA ts &+ xCharData else if ('D' == ch) // doctypedecl, parse DTD // @todo REMOVE HACK parseDTD() else // comment ts &+ xComment case '?' => // PI nextch ts &+ xProcInstr case _ => ts &+ element1(pscope) // child } /** content1 ::= '<' content1 | '&' charref ... */ def content(pscope: NamespaceBinding): NodeSeq = { var ts = new NodeBuffer var exit = eof while (! exit) { //Console.println("in content, ch = '"+ch+"' line="+scala.io.Position.line(pos)); /* if( xEmbeddedBlock ) { ts.append( xEmbeddedExpr ); } else {*/ tmppos = pos; exit = eof; if(!eof) ch match { case '<' => // another tag //Console.println("before ch = '"+ch+"' line="+scala.io.Position.line(pos)+" pos="+pos); nextch; //Console.println("after ch = '"+ch+"' line="+scala.io.Position.line(pos)+" pos="+pos); if('/' ==ch) exit = true; // end tag else content1(pscope, ts) //case '{' => /* if( xCheckEmbeddedBlock ) { ts.appendAll(xEmbeddedExpr); } else {*/ // val str = new StringBuilder("{"); // str.append(xText); // appendText(tmppos, ts, str.toString()); /*}*/ // postcond: xEmbeddedBlock == false! case '&' => // EntityRef or CharRef nextch; ch match { case '#' => // CharacterRef nextch; val theChar = handle.text( tmppos, xCharRef ({ ()=> ch },{ () => nextch }) ); xToken(';'); ts &+ theChar ; case _ => // EntityRef val n = xName xToken(';') n match { case "lt" => ts &+ '<' case "gt" => ts &+ '>' case "amp" => ts &+ '&' case "quot" => ts &+ '"' case _ => /* ts + handle.entityRef( tmppos, n ) ; */ push(n) } } case _ => // text content //Console.println("text content?? pos = "+pos); appendText(tmppos, ts, xText); // here xEmbeddedBlock might be true } /*}*/ } val list = ts.toList // 2do: optimize seq repr. new NodeSeq { val theSeq = list } } // content(NamespaceBinding) /** externalID ::= SYSTEM S syslit * PUBLIC S pubid S syslit */ def externalID(): ExternalID = ch match { case 'S' => nextch xToken("YSTEM") xSpace val sysID = systemLiteral() new SystemID(sysID) case 'P' => nextch; xToken("UBLIC") xSpace val pubID = pubidLiteral() xSpace val sysID = systemLiteral() new PublicID(pubID, sysID) } /** parses document type declaration and assigns it to instance variable * dtd. * * <! parseDTD ::= DOCTYPE name ... > */ def parseDTD(): Unit = { // dirty but fast //Console.println("(DEBUG) parseDTD"); var extID: ExternalID = null if (this.dtd ne null) reportSyntaxError("unexpected character (DOCTYPE already defined"); xToken("DOCTYPE") xSpace val n = xName xSpace //external ID if ('S' == ch || 'P' == ch) { extID = externalID() xSpaceOpt } /* parse external subset of DTD */ if ((null != extID) && isValidating) { pushExternal(extID.systemId) //val extSubsetSrc = externalSource( extID.systemId ); extIndex = inpStack.length /* .indexOf(':') != -1) { // assume URI Source.fromFile(new java.net.URI(extID.systemLiteral)); } else { Source.fromFile(extID.systemLiteral); } */ //Console.println("I'll print it now"); //val old = curInput; //tmppos = curInput.pos; //val oldch = ch; //curInput = extSubsetSrc; //pos = 0; //nextch; extSubset() pop() extIndex = -1 //curInput = old; //pos = curInput.pos; //ch = curInput.ch; //eof = false; //while(extSubsetSrc.hasNext) //Console.print(extSubsetSrc.next); //Console.println("returned from external, current ch = "+ch ) } if ('[' == ch) { // internal subset nextch /* TODO */ //Console.println("hello"); intSubset() //while(']' != ch) // nextch; // TODO: do the DTD parsing?? ?!?!?!?!! xToken(']') xSpaceOpt } xToken('>') this.dtd = new DTD { /*override var*/ externalID = extID /*override val */decls = handle.decls.reverse } //this.dtd.initializeEntities(); if (doc ne null) doc.dtd = this.dtd handle.endDTD(n) } def element(pscope: NamespaceBinding): NodeSeq = { xToken('<') element1(pscope) } /** '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag * | xmlTag1 '/' '>' */ def element1(pscope: NamespaceBinding): NodeSeq = { val pos = this.pos val Tuple3(qname, aMap, scope) = xTag(pscope) val Tuple2(pre, local) = Utility.prefix(qname) match { case Some(p) => (p,qname.substring(p.length+1, qname.length)) case _ => (null,qname) } val ts = { if (ch == '/') { // empty element xToken('/') xToken('>') handle.elemStart(pos, pre, local, aMap, scope) NodeSeq.Empty } else { // element with content xToken('>') handle.elemStart(pos, pre, local, aMap, scope) val tmp = content(scope) xEndTag(qname) tmp } } val res = handle.elem(pos, pre, local, aMap, scope, ts) handle.elemEnd(pos, pre, local) res } //def xEmbeddedExpr: MarkupType; /** Name ::= (Letter | '_' | ':') (NameChar)* * * see [5] of XML 1.0 specification */ def xName: String = { if (isNameStart(ch)) { while (isNameChar(ch)) { putChar(ch) nextch } val n = cbuf.toString().intern() cbuf.length = 0 n } else { reportSyntaxError("name expected") "" } } /** scan [S] '=' [S]*/ def xEQ = { xSpaceOpt; xToken('='); xSpaceOpt } /** skip optional space S? */ def xSpaceOpt = while (isSpace(ch) && !eof) { nextch; } /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ def xSpace = if (isSpace(ch)) { nextch; xSpaceOpt } else reportSyntaxError("whitespace expected") /** '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>' * * see [15] */ def xProcInstr: NodeSeq = { val sb:StringBuilder = new StringBuilder() val n = xName if (isSpace(ch)) { xSpace while (true) { if (ch == '?' && { sb.append( ch ); nextch; ch == '>' }) { sb.length = sb.length - 1; nextch; return handle.procInstr(tmppos, n, sb.toString); } else sb.append(ch); nextch } }; xToken('?') xToken('>') handle.procInstr(tmppos, n, sb.toString) } /** parse character data. * precondition: xEmbeddedBlock == false (we are not in a scala block) */ def xText: String = { //if( xEmbeddedBlock ) throw FatalError("internal error: encountered embedded block"); // assert /*if( xCheckEmbeddedBlock ) return "" else {*/ //Console.println("in xText! ch = '"+ch+"'"); var exit = false; while (! exit) { //Console.println("LOOP in xText! ch = '"+ch+"' + pos="+pos); putChar(ch); val opos = pos; nextch; //Console.println("STILL LOOP in xText! ch = '"+ch+"' + pos="+pos+" opos="+opos); exit = eof || /*{ nextch; xCheckEmbeddedBlock }||*/( ch == '<' ) || ( ch == '&' ); } val str = cbuf.toString(); cbuf.length = 0; str /*}*/ } /** attribute value, terminated by either ' or ". value may not contain <. * AttValue ::= `'` { _ } `'` * | `"` { _ } `"` */ def systemLiteral(): String = { val endch = ch if (ch != '\'' && ch != '"') reportSyntaxError("quote ' or \" expected"); nextch while (ch != endch) { putChar(ch) nextch } nextch val str = cbuf.toString() cbuf.length = 0 str } /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" */ def pubidLiteral(): String = { val endch = ch if (ch!='\'' && ch != '"') reportSyntaxError("quote ' or \" expected"); nextch while (ch != endch) { putChar(ch) //Console.println("hello '"+ch+"'"+isPubIDChar(ch)); if (!isPubIDChar(ch)) reportSyntaxError("char '"+ch+"' is not allowed in public id"); nextch } nextch val str = cbuf.toString() cbuf.length = 0 str
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?