markupparser.scala

来自「JAVA 语言的函数式编程扩展」· SCALA 代码 · 共 1,216 行 · 第 1/3 页

SCALA
1,216
字号
/*                     __                                               *\**     ________ ___   / /  ___     Scala API                            ****    / __/ __// _ | / /  / _ |    (c) 2003-2007, LAMP/EPFL             ****  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **** /____/\___/_/ |_/____/_/ | |                                         ****                          |/                                          **\*                                                                      */// $Id: MarkupParser.scala 14560 2008-04-09 09:57:07Z emir $package scala.xml.parsingimport scala.io.Sourceimport scala.xml.dtd._/** * An XML parser. * * Parses XML 1.0, invokes callback methods of a MarkupHandler * and returns whatever the markup handler returns. Use * <code>ConstructingParser</code> if you just want to parse XML to * construct instances of <code>scala.xml.Node</code>. * * While XML elements are returned, DTD declarations - if handled - are  * collected using side-effects. * * @author  Burak Emir * @version 1.0 */trait MarkupParser extends AnyRef with TokenTests { self:  MarkupParser with MarkupHandler =>  val input: Source  /** if true, does not remove surplus whitespace */  val preserveWS: Boolean  def externalSource(systemLiteral: String): Source  //  // variables, values  //  var curInput: Source = input  /** the handler of the markup, returns this */  private val handle: MarkupHandler = this  /** stack of inputs */  var inpStack: List[Source] = Nil  /** holds the position in the source file */  var pos: Int = _  /* used when reading external subset */  var extIndex = -1  /** holds temporary values of pos */  var tmppos: Int = _  /** holds the next character */  var ch: Char = _  /** character buffer, for names */  protected val cbuf = new StringBuilder()  var dtd: DTD = null  protected var doc: Document = null  var eof: Boolean = false  //  // methods  //  /** &lt;? prolog ::= xml S ... ?&gt;   */  def xmlProcInstr(): MetaData = {    xToken("xml")    xSpace    val (md,scp) = xAttributes(TopScope)    if (scp != TopScope)      reportSyntaxError("no xmlns definitions here, please.");    xToken('?')    xToken('>')    md  }  /** &lt;? prolog ::= xml S?   *  // this is a bit more lenient than necessary...   */  def prolog(): Tuple3[Option[String], Option[String], Option[Boolean]] = {    //Console.println("(DEBUG) prolog")    var n = 0    var info_ver: Option[String] = None    var info_enc: Option[String] = None    var info_stdl: Option[Boolean] = None    var m = xmlProcInstr()    xSpaceOpt    m("version") match {      case null  => ;      case Text("1.0") => info_ver = Some("1.0"); n += 1      case _     => reportSyntaxError("cannot deal with versions != 1.0")    }    m("encoding") match {      case null => ;      case Text(enc) =>        if (!isValidIANAEncoding(enc))          reportSyntaxError("\"" + enc + "\" is not a valid encoding")        else {          info_enc = Some(enc)          n += 1        }    }    m("standalone") match {      case null => ;      case Text("yes") => info_stdl = Some(true);  n += 1      case Text("no")  => info_stdl = Some(false); n += 1      case _     => reportSyntaxError("either 'yes' or 'no' expected")    }    if (m.length - n != 0) {      reportSyntaxError("VersionInfo EncodingDecl? SDDecl? or '?>' expected!");    }    //Console.println("[MarkupParser::prolog] finished parsing prolog!");    Tuple3(info_ver,info_enc,info_stdl)  }  /** prolog, but without standalone */  def textDecl(): Tuple2[Option[String],Option[String]] = {    var info_ver: Option[String] = None    var info_enc: Option[String] = None    var m = xmlProcInstr()    var n = 0    m("version") match {      case null => ;      case Text("1.0") => info_ver = Some("1.0"); n += 1      case _     => reportSyntaxError("cannot deal with versions != 1.0")    }    m("encoding") match {      case null => ;      case Text(enc)  =>        if (!isValidIANAEncoding(enc))          reportSyntaxError("\"" + enc + "\" is not a valid encoding")        else {          info_enc = Some(enc)          n += 1        }    }    if (m.length - n != 0) {      reportSyntaxError("VersionInfo EncodingDecl? or '?>' expected!");    }    //Console.println("[MarkupParser::textDecl] finished parsing textdecl");    Tuple2(info_ver, info_enc);  }  /**   *[22]        prolog     ::=          XMLDecl? Misc* (doctypedecl Misc*)?   *[23]        XMLDecl    ::=          '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'   *[24]        VersionInfo        ::=          S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')   *[25]        Eq         ::=          S? '=' S?   *[26]        VersionNum         ::=          '1.0'   *[27]        Misc       ::=          Comment | PI | S   */  def document(): Document = {    //Console.println("(DEBUG) document")    doc = new Document()    this.dtd = null    var info_prolog: Tuple3[Option[String], Option[String], Option[Boolean]] = Tuple3(None, None, None);    if ('<' != ch) {      reportSyntaxError("< expected")      return null    }    nextch // is prolog ?    var children: NodeSeq = null    if ('?' == ch) {      //Console.println("[MarkupParser::document] starts with xml declaration");      nextch;      info_prolog = prolog()      doc.version    = info_prolog._1      doc.encoding   = info_prolog._2      doc.standAlone = info_prolog._3      children = content(TopScope) // DTD handled as side effect    } else {      //Console.println("[MarkupParser::document] does not start with xml declaration"); //      val ts = new NodeBuffer();      content1(TopScope, ts); // DTD handled as side effect      ts &+ content(TopScope);      children = NodeSeq.fromSeq(ts);    }    //Console.println("[MarkupParser::document] children now: "+children.toList);    var elemCount = 0;    var theNode: Node = null;    for (c <- children) c match {      case _:ProcInstr => ;      case _:Comment => ;      case _:EntityRef => // todo: fix entities, shouldn't be "special"        reportSyntaxError("no entity references alllowed here");      case s:SpecialNode =>        if (s.toString().trim().length > 0) //non-empty text nodes not allowed          elemCount = elemCount + 2;      case m:Node =>        elemCount = elemCount + 1;        theNode = m;    }    if (1 != elemCount) {      reportSyntaxError("document must contain exactly one element")      Console.println(children.toList)    }    doc.children = children    doc.docElem = theNode    doc  }  /** append Unicode character to name buffer*/  protected def putChar(c: Char) = cbuf.append(c)  //var xEmbeddedBlock = false;  /** this method assign the next character to ch and advances in input */  def nextch {    if (curInput.hasNext) {      ch = curInput.next      pos = curInput.pos    } else {      val ilen = inpStack.length;      //Console.println("  ilen = "+ilen+ " extIndex = "+extIndex);      if ((ilen != extIndex) && (ilen > 0)) {         /** for external source, inpStack == Nil ! need notify of eof! */        pop()      } else {        eof = true        ch = 0.asInstanceOf[Char]      }    }  }  //final val enableEmbeddedExpressions: Boolean = false;  /** munch expected XML token, report syntax error for unexpected  */  def xToken(that: Char) {    if (ch == that)      nextch    else  {      reportSyntaxError("'" + that + "' expected instead of '" + ch + "'")      error("FATAL")    }  }  def xToken(that: Seq[Char]): Unit = {    val it = that.elements;    while (it.hasNext)      xToken(it.next);  }  /** parse attribute and create namespace scope, metadata   *  [41] Attributes    ::= { S Name Eq AttValue }   */  def xAttributes(pscope:NamespaceBinding): (MetaData,NamespaceBinding) = {    var scope: NamespaceBinding = pscope    var aMap: MetaData = Null    while (isNameStart(ch)) {      val pos = this.pos      val qname = xName      val _     = xEQ      val value = xAttributeValue()      Utility.prefix(qname) match {        case Some("xmlns") =>          val prefix = qname.substring(6 /*xmlns:*/ , qname.length);          scope = new NamespaceBinding(prefix, value, scope);                case Some(prefix)       =>           val key = qname.substring(prefix.length+1, qname.length);          aMap = new PrefixedAttribute(prefix, key, Text(value), aMap);        case _             =>           if( qname == "xmlns" )             scope = new NamespaceBinding(null, value, scope);          else             aMap = new UnprefixedAttribute(qname, Text(value), aMap);      }                  if ((ch != '/') && (ch != '>') && ('?' != ch))        xSpace;     }    if(!aMap.wellformed(scope))        reportSyntaxError( "double attribute");    (aMap,scope)  }  /** attribute value, terminated by either ' or ". value may not contain &lt;.   *       AttValue     ::= `'` { _  } `'`   *                      | `"` { _ } `"`   */  def xAttributeValue(): String = {    val endch = ch    nextch    while (ch != endch) {      if ('<' == ch)        reportSyntaxError( "'<' not allowed in attrib value" );      putChar(ch)      nextch    }    nextch    val str = cbuf.toString()    cbuf.length = 0    // well-formedness constraint    normalizeAttributeValue(str)  }  /** entity value, terminated by either ' or ". value may not contain &lt;.   *       AttValue     ::= `'` { _  } `'`   *                      | `"` { _ } `"`   */  def xEntityValue(): String = {    val endch = ch    nextch    while (ch != endch) {      putChar(ch)      nextch    }    nextch    val str = cbuf.toString()    cbuf.length = 0    str  }  /** parse a start or empty tag.   *  [40] STag         ::= '&lt;' Name { S Attribute } [S]    *  [44] EmptyElemTag ::= '&lt;' Name { S Attribute } [S]    */  protected def xTag(pscope:NamespaceBinding): Tuple3[String, MetaData, NamespaceBinding] = {    val qname = xName    xSpaceOpt    val (aMap: MetaData, scope: NamespaceBinding) = {      if (isNameStart(ch))         xAttributes(pscope)      else         (Null, pscope)    }    (qname, aMap, scope)  }  /** [42]  '&lt;' xmlEndTag ::=  '&lt;' '/' Name S? '&gt;'   */  def xEndTag(n: String) = {    xToken('/')    val m = xName    if (n != m)      reportSyntaxError("expected closing tag of " + n/* +", not "+m*/);    xSpaceOpt    xToken('>')  }  /** '&lt;! CharData ::= [CDATA[ ( {char} - {char}"]]&gt;"{char} ) ']]&gt;'   *   * see [15]   */  def xCharData: NodeSeq = {    xToken("[CDATA[")    val pos1 = pos    val sb: StringBuilder = new StringBuilder()    while (true) {      if (ch==']'  &&         { sb.append(ch); nextch; ch == ']' } &&         { sb.append(ch); nextch; ch == '>' } ) {        sb.length = sb.length - 2        nextch;         return handle.text( pos1, sb.toString() );      } else sb.append( ch );      nextch;     }    throw FatalError("this cannot happen");  };  /** CharRef ::= "&amp;#" '0'..'9' {'0'..'9'} ";"   *            | "&amp;#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"   *

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?