markupparser.scala
来自「JAVA 语言的函数式编程扩展」· SCALA 代码 · 共 1,216 行 · 第 1/3 页
SCALA
1,216 行
/* __ *\** ________ ___ / / ___ Scala API **** / __/ __// _ | / / / _ | (c) 2003-2007, LAMP/EPFL **** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **** /____/\___/_/ |_/____/_/ | | **** |/ **\* */// $Id: MarkupParser.scala 14560 2008-04-09 09:57:07Z emir $package scala.xml.parsingimport scala.io.Sourceimport scala.xml.dtd._/** * An XML parser. * * Parses XML 1.0, invokes callback methods of a MarkupHandler * and returns whatever the markup handler returns. Use * <code>ConstructingParser</code> if you just want to parse XML to * construct instances of <code>scala.xml.Node</code>. * * While XML elements are returned, DTD declarations - if handled - are * collected using side-effects. * * @author Burak Emir * @version 1.0 */trait MarkupParser extends AnyRef with TokenTests { self: MarkupParser with MarkupHandler => val input: Source /** if true, does not remove surplus whitespace */ val preserveWS: Boolean def externalSource(systemLiteral: String): Source // // variables, values // var curInput: Source = input /** the handler of the markup, returns this */ private val handle: MarkupHandler = this /** stack of inputs */ var inpStack: List[Source] = Nil /** holds the position in the source file */ var pos: Int = _ /* used when reading external subset */ var extIndex = -1 /** holds temporary values of pos */ var tmppos: Int = _ /** holds the next character */ var ch: Char = _ /** character buffer, for names */ protected val cbuf = new StringBuilder() var dtd: DTD = null protected var doc: Document = null var eof: Boolean = false // // methods // /** <? prolog ::= xml S ... ?> */ def xmlProcInstr(): MetaData = { xToken("xml") xSpace val (md,scp) = xAttributes(TopScope) if (scp != TopScope) reportSyntaxError("no xmlns definitions here, please."); xToken('?') xToken('>') md } /** <? prolog ::= xml S? * // this is a bit more lenient than necessary... */ def prolog(): Tuple3[Option[String], Option[String], Option[Boolean]] = { //Console.println("(DEBUG) prolog") var n = 0 var info_ver: Option[String] = None var info_enc: Option[String] = None var info_stdl: Option[Boolean] = None var m = xmlProcInstr() xSpaceOpt m("version") match { case null => ; case Text("1.0") => info_ver = Some("1.0"); n += 1 case _ => reportSyntaxError("cannot deal with versions != 1.0") } m("encoding") match { case null => ; case Text(enc) => if (!isValidIANAEncoding(enc)) reportSyntaxError("\"" + enc + "\" is not a valid encoding") else { info_enc = Some(enc) n += 1 } } m("standalone") match { case null => ; case Text("yes") => info_stdl = Some(true); n += 1 case Text("no") => info_stdl = Some(false); n += 1 case _ => reportSyntaxError("either 'yes' or 'no' expected") } if (m.length - n != 0) { reportSyntaxError("VersionInfo EncodingDecl? SDDecl? or '?>' expected!"); } //Console.println("[MarkupParser::prolog] finished parsing prolog!"); Tuple3(info_ver,info_enc,info_stdl) } /** prolog, but without standalone */ def textDecl(): Tuple2[Option[String],Option[String]] = { var info_ver: Option[String] = None var info_enc: Option[String] = None var m = xmlProcInstr() var n = 0 m("version") match { case null => ; case Text("1.0") => info_ver = Some("1.0"); n += 1 case _ => reportSyntaxError("cannot deal with versions != 1.0") } m("encoding") match { case null => ; case Text(enc) => if (!isValidIANAEncoding(enc)) reportSyntaxError("\"" + enc + "\" is not a valid encoding") else { info_enc = Some(enc) n += 1 } } if (m.length - n != 0) { reportSyntaxError("VersionInfo EncodingDecl? or '?>' expected!"); } //Console.println("[MarkupParser::textDecl] finished parsing textdecl"); Tuple2(info_ver, info_enc); } /** *[22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? *[23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' *[24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') *[25] Eq ::= S? '=' S? *[26] VersionNum ::= '1.0' *[27] Misc ::= Comment | PI | S */ def document(): Document = { //Console.println("(DEBUG) document") doc = new Document() this.dtd = null var info_prolog: Tuple3[Option[String], Option[String], Option[Boolean]] = Tuple3(None, None, None); if ('<' != ch) { reportSyntaxError("< expected") return null } nextch // is prolog ? var children: NodeSeq = null if ('?' == ch) { //Console.println("[MarkupParser::document] starts with xml declaration"); nextch; info_prolog = prolog() doc.version = info_prolog._1 doc.encoding = info_prolog._2 doc.standAlone = info_prolog._3 children = content(TopScope) // DTD handled as side effect } else { //Console.println("[MarkupParser::document] does not start with xml declaration"); // val ts = new NodeBuffer(); content1(TopScope, ts); // DTD handled as side effect ts &+ content(TopScope); children = NodeSeq.fromSeq(ts); } //Console.println("[MarkupParser::document] children now: "+children.toList); var elemCount = 0; var theNode: Node = null; for (c <- children) c match { case _:ProcInstr => ; case _:Comment => ; case _:EntityRef => // todo: fix entities, shouldn't be "special" reportSyntaxError("no entity references alllowed here"); case s:SpecialNode => if (s.toString().trim().length > 0) //non-empty text nodes not allowed elemCount = elemCount + 2; case m:Node => elemCount = elemCount + 1; theNode = m; } if (1 != elemCount) { reportSyntaxError("document must contain exactly one element") Console.println(children.toList) } doc.children = children doc.docElem = theNode doc } /** append Unicode character to name buffer*/ protected def putChar(c: Char) = cbuf.append(c) //var xEmbeddedBlock = false; /** this method assign the next character to ch and advances in input */ def nextch { if (curInput.hasNext) { ch = curInput.next pos = curInput.pos } else { val ilen = inpStack.length; //Console.println(" ilen = "+ilen+ " extIndex = "+extIndex); if ((ilen != extIndex) && (ilen > 0)) { /** for external source, inpStack == Nil ! need notify of eof! */ pop() } else { eof = true ch = 0.asInstanceOf[Char] } } } //final val enableEmbeddedExpressions: Boolean = false; /** munch expected XML token, report syntax error for unexpected */ def xToken(that: Char) { if (ch == that) nextch else { reportSyntaxError("'" + that + "' expected instead of '" + ch + "'") error("FATAL") } } def xToken(that: Seq[Char]): Unit = { val it = that.elements; while (it.hasNext) xToken(it.next); } /** parse attribute and create namespace scope, metadata * [41] Attributes ::= { S Name Eq AttValue } */ def xAttributes(pscope:NamespaceBinding): (MetaData,NamespaceBinding) = { var scope: NamespaceBinding = pscope var aMap: MetaData = Null while (isNameStart(ch)) { val pos = this.pos val qname = xName val _ = xEQ val value = xAttributeValue() Utility.prefix(qname) match { case Some("xmlns") => val prefix = qname.substring(6 /*xmlns:*/ , qname.length); scope = new NamespaceBinding(prefix, value, scope); case Some(prefix) => val key = qname.substring(prefix.length+1, qname.length); aMap = new PrefixedAttribute(prefix, key, Text(value), aMap); case _ => if( qname == "xmlns" ) scope = new NamespaceBinding(null, value, scope); else aMap = new UnprefixedAttribute(qname, Text(value), aMap); } if ((ch != '/') && (ch != '>') && ('?' != ch)) xSpace; } if(!aMap.wellformed(scope)) reportSyntaxError( "double attribute"); (aMap,scope) } /** attribute value, terminated by either ' or ". value may not contain <. * AttValue ::= `'` { _ } `'` * | `"` { _ } `"` */ def xAttributeValue(): String = { val endch = ch nextch while (ch != endch) { if ('<' == ch) reportSyntaxError( "'<' not allowed in attrib value" ); putChar(ch) nextch } nextch val str = cbuf.toString() cbuf.length = 0 // well-formedness constraint normalizeAttributeValue(str) } /** entity value, terminated by either ' or ". value may not contain <. * AttValue ::= `'` { _ } `'` * | `"` { _ } `"` */ def xEntityValue(): String = { val endch = ch nextch while (ch != endch) { putChar(ch) nextch } nextch val str = cbuf.toString() cbuf.length = 0 str } /** parse a start or empty tag. * [40] STag ::= '<' Name { S Attribute } [S] * [44] EmptyElemTag ::= '<' Name { S Attribute } [S] */ protected def xTag(pscope:NamespaceBinding): Tuple3[String, MetaData, NamespaceBinding] = { val qname = xName xSpaceOpt val (aMap: MetaData, scope: NamespaceBinding) = { if (isNameStart(ch)) xAttributes(pscope) else (Null, pscope) } (qname, aMap, scope) } /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>' */ def xEndTag(n: String) = { xToken('/') val m = xName if (n != m) reportSyntaxError("expected closing tag of " + n/* +", not "+m*/); xSpaceOpt xToken('>') } /** '<! CharData ::= [CDATA[ ( {char} - {char}"]]>"{char} ) ']]>' * * see [15] */ def xCharData: NodeSeq = { xToken("[CDATA[") val pos1 = pos val sb: StringBuilder = new StringBuilder() while (true) { if (ch==']' && { sb.append(ch); nextch; ch == ']' } && { sb.append(ch); nextch; ch == '>' } ) { sb.length = sb.length - 2 nextch; return handle.text( pos1, sb.toString() ); } else sb.append( ch ); nextch; } throw FatalError("this cannot happen"); }; /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" *
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?