markupparser.scala

来自「JAVA 语言的函数式编程扩展」· SCALA 代码 · 共 1,216 行 · 第 1/3 页

SCALA
1,216
字号
   * see [66]   */  def xCharRef(ch: () => Char, nextch: () => Unit): String = {    Utility.parseCharRef(ch, nextch, reportSyntaxError _)    /*    val hex  = (ch() == 'x') && { nextch(); true };    val base = if (hex) 16 else 10;    var i = 0;    while (ch() != ';') {      ch() match {        case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>          i = i * base + Character.digit( ch(), base );        case 'a' | 'b' | 'c' | 'd' | 'e' | 'f'           | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' =>          if (! hex)             reportSyntaxError("hex char not allowed in decimal char ref\n"                         +"Did you mean to write &#x ?");          else             i = i * base + Character.digit(ch(), base);        case _ =>          reportSyntaxError("character '" + ch() + " not allowed in char ref\n");      }      nextch();    }    new String(Array(i.asInstanceOf[char]))    */  }  /** Comment ::= '&lt;!--' ((Char - '-') | ('-' (Char - '-')))* '--&gt;'   *   * see [15]   */  def xComment: NodeSeq = {    val sb: StringBuilder = new StringBuilder()    xToken('-')    xToken('-')    while (true) {       if (ch == '-'  && { sb.append(ch); nextch; ch == '-' }) {        sb.length = sb.length - 1        nextch        xToken('>')        return handle.comment(pos, sb.toString())      } else sb.append(ch)      nextch    }    throw FatalError("this cannot happen")  }  /* todo: move this into the NodeBuilder class */  def appendText(pos: Int, ts: NodeBuffer, txt: String): Unit = {    if (preserveWS)      ts &+ handle.text(pos, txt);    else      for (t <- TextBuffer.fromString(txt).toText) {        ts &+ handle.text(pos, t.text);      }  }  /** '&lt;' content1 ::=  ... */  def content1(pscope: NamespaceBinding, ts: NodeBuffer): Unit =    ch match {      case '!' =>        nextch      if ('[' == ch)                 // CDATA         ts &+ xCharData      else if ('D' == ch) // doctypedecl, parse DTD // @todo REMOVE HACK        parseDTD()      else // comment        ts &+ xComment      case '?' =>                    // PI        nextch        ts &+ xProcInstr      case _   =>         ts &+ element1(pscope)      // child    }  /** content1 ::=  '&lt;' content1 | '&amp;' charref ... */  def content(pscope: NamespaceBinding): NodeSeq = {    var ts = new NodeBuffer    var exit = eof    while (! exit) {      //Console.println("in content, ch = '"+ch+"' line="+scala.io.Position.line(pos));      /*      if( xEmbeddedBlock ) {       ts.append( xEmbeddedExpr );       } else {*/        tmppos = pos;        exit = eof;        if(!eof)           ch match {          case '<' => // another tag            //Console.println("before ch = '"+ch+"' line="+scala.io.Position.line(pos)+" pos="+pos);            nextch;             //Console.println("after ch = '"+ch+"' line="+scala.io.Position.line(pos)+" pos="+pos);            if('/' ==ch)              exit = true;                    // end tag            else              content1(pscope, ts)          //case '{' => /*            if( xCheckEmbeddedBlock ) {              ts.appendAll(xEmbeddedExpr);            } else {*/          //    val str = new StringBuilder("{");          //    str.append(xText);          //    appendText(tmppos, ts, str.toString());            /*}*/          // postcond: xEmbeddedBlock == false!          case '&' => // EntityRef or CharRef             nextch;            ch match {              case '#' => // CharacterRef                nextch;                val theChar = handle.text( tmppos,                                           xCharRef ({ ()=> ch },{ () => nextch }) );                xToken(';');                ts &+ theChar ;              case _ => // EntityRef                val n = xName                xToken(';')                n match {                  case "lt"    => ts &+ '<'                  case "gt"    => ts &+ '>'                  case "amp"   => ts &+ '&'                  case "quot" => ts &+ '"'                  case _ =>                    /*                     ts + handle.entityRef( tmppos, n ) ;                     */                    push(n)                }            }          case _ => // text content            //Console.println("text content?? pos = "+pos);            appendText(tmppos, ts, xText);          // here xEmbeddedBlock might be true          }    /*}*/    }    val list = ts.toList    // 2do: optimize seq repr.    new NodeSeq {      val theSeq = list    }  } // content(NamespaceBinding)  /** externalID ::= SYSTEM S syslit   *                 PUBLIC S pubid S syslit   */  def externalID(): ExternalID = ch match {    case 'S' =>      nextch      xToken("YSTEM")      xSpace      val sysID = systemLiteral()      new SystemID(sysID)    case 'P' =>      nextch; xToken("UBLIC")      xSpace      val pubID = pubidLiteral()      xSpace      val sysID = systemLiteral()      new PublicID(pubID, sysID)  }  /** parses document type declaration and assigns it to instance variable   *  dtd.   *   *  &lt;! parseDTD ::= DOCTYPE name ... >   */   def parseDTD(): Unit = { // dirty but fast    //Console.println("(DEBUG) parseDTD");    var extID: ExternalID = null    if (this.dtd ne null)      reportSyntaxError("unexpected character (DOCTYPE already defined");    xToken("DOCTYPE")    xSpace    val n = xName    xSpace    //external ID    if ('S' == ch || 'P' == ch) {      extID = externalID()      xSpaceOpt    }    /* parse external subset of DTD      */    if ((null != extID) && isValidating) {      pushExternal(extID.systemId)      //val extSubsetSrc = externalSource( extID.systemId );      extIndex = inpStack.length      /*       .indexOf(':') != -1) { // assume URI         Source.fromFile(new java.net.URI(extID.systemLiteral));       } else {         Source.fromFile(extID.systemLiteral);       }      */      //Console.println("I'll print it now");      //val old = curInput;      //tmppos = curInput.pos;      //val oldch = ch;      //curInput = extSubsetSrc;      //pos = 0;      //nextch;      extSubset()      pop()      extIndex = -1      //curInput = old;      //pos = curInput.pos;      //ch = curInput.ch;      //eof = false;      //while(extSubsetSrc.hasNext)      //Console.print(extSubsetSrc.next);      //Console.println("returned from external, current ch = "+ch )    }    if ('[' == ch) { // internal subset      nextch      /* TODO */      //Console.println("hello");      intSubset()      //while(']' != ch)      //  nextch;      // TODO: do the DTD parsing?? ?!?!?!?!!      xToken(']')      xSpaceOpt    }    xToken('>')    this.dtd = new DTD {      /*override var*/ externalID = extID      /*override val */decls      = handle.decls.reverse    }    //this.dtd.initializeEntities();    if (doc ne null)      doc.dtd = this.dtd    handle.endDTD(n)  }  def element(pscope: NamespaceBinding): NodeSeq = {    xToken('<')    element1(pscope)  }  /** '&lt;' element ::= xmlTag1 '&gt;'  { xmlExpr | '{' simpleExpr '}' } ETag   *               | xmlTag1 '/' '&gt;'   */  def element1(pscope: NamespaceBinding): NodeSeq = {    val pos = this.pos    val Tuple3(qname, aMap, scope) = xTag(pscope)    val Tuple2(pre, local) = Utility.prefix(qname) match {      case Some(p) => (p,qname.substring(p.length+1, qname.length))      case _       => (null,qname)    }    val ts = {      if (ch == '/') {  // empty element        xToken('/')        xToken('>')        handle.elemStart(pos, pre, local, aMap, scope)        NodeSeq.Empty      }      else {           // element with content        xToken('>')        handle.elemStart(pos, pre, local, aMap, scope)        val tmp = content(scope)        xEndTag(qname)        tmp      }    }    val res = handle.elem(pos, pre, local, aMap, scope, ts)    handle.elemEnd(pos, pre, local)    res  }  //def xEmbeddedExpr: MarkupType;  /** Name ::= (Letter | '_' | ':') (NameChar)*   *   *  see  [5] of XML 1.0 specification   */  def xName: String = {    if (isNameStart(ch)) {      while (isNameChar(ch)) {        putChar(ch)        nextch      }      val n = cbuf.toString().intern()      cbuf.length = 0      n    } else {      reportSyntaxError("name expected")      ""    }  }  /** scan [S] '=' [S]*/  def xEQ = { xSpaceOpt; xToken('='); xSpaceOpt }  /** skip optional space S? */  def xSpaceOpt = while (isSpace(ch) && !eof) { nextch; }  /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */  def xSpace =    if (isSpace(ch)) { nextch; xSpaceOpt }    else reportSyntaxError("whitespace expected")  /** '&lt;?' ProcInstr ::= Name [S ({Char} - ({Char}'&gt;?' {Char})]'?&gt;'   *   * see [15]   */  def xProcInstr: NodeSeq = {    val sb:StringBuilder = new StringBuilder()    val n = xName    if (isSpace(ch)) {      xSpace      while (true) {        if (ch == '?' && { sb.append( ch ); nextch; ch == '>' }) {          sb.length = sb.length - 1;          nextch;          return handle.procInstr(tmppos, n, sb.toString);        } else          sb.append(ch);        nextch      }    };    xToken('?')    xToken('>')    handle.procInstr(tmppos, n, sb.toString)  }  /** parse character data.   *   precondition: xEmbeddedBlock == false (we are not in a scala block)   */  def xText: String = {    //if( xEmbeddedBlock ) throw FatalError("internal error: encountered embedded block"); // assert    /*if( xCheckEmbeddedBlock )      return ""    else {*/    //Console.println("in xText! ch = '"+ch+"'");      var exit = false;      while (! exit) {        //Console.println("LOOP in xText! ch = '"+ch+"' + pos="+pos);        putChar(ch);        val opos = pos;        nextch;        //Console.println("STILL LOOP in xText! ch = '"+ch+"' + pos="+pos+" opos="+opos);                exit = eof || /*{ nextch; xCheckEmbeddedBlock }||*/( ch == '<' ) || ( ch == '&' );      }      val str = cbuf.toString();      cbuf.length = 0;      str    /*}*/  }  /** attribute value, terminated by either ' or ". value may not contain &lt;.   *       AttValue     ::= `'` { _ } `'`   *                      | `"` { _ } `"`   */  def systemLiteral(): String = {    val endch = ch    if (ch != '\'' && ch != '"')      reportSyntaxError("quote ' or \" expected");    nextch    while (ch != endch) {      putChar(ch)      nextch    }    nextch    val str = cbuf.toString()    cbuf.length = 0    str  }  /* [12]       PubidLiteral ::=        '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" */  def pubidLiteral(): String = {    val endch = ch    if (ch!='\'' && ch != '"')      reportSyntaxError("quote ' or \" expected");    nextch    while (ch != endch) {      putChar(ch)      //Console.println("hello '"+ch+"'"+isPubIDChar(ch));      if (!isPubIDChar(ch))        reportSyntaxError("char '"+ch+"' is not allowed in public id");      nextch    }    nextch    val str = cbuf.toString()    cbuf.length = 0    str

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?