utility.scala
来自「JAVA 语言的函数式编程扩展」· SCALA 代码 · 共 486 行
SCALA
486 行
/* __ *\** ________ ___ / / ___ Scala API **** / __/ __// _ | / / / _ | (c) 2003-2007, LAMP/EPFL **** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **** /____/\___/_/ |_/____/_/ | | **** |/ **\* */// $Id: Utility.scala 13343 2007-11-25 20:43:41Z emir $package scala.xmlimport collection.mutable.{Set, HashSet}/** * The <code>Utility</code> object provides utility functions for processing * instances of bound and not bound XML classes, as well as escaping text nodes. * * @author Burak Emir */object Utility extends AnyRef with parsing.TokenTests { /** trims an element - call this method, when you know that it is an * element (and not a text node) so you know that it will not be trimmed * away. With this assumption, the function can return a <code>Node</code>, * rather than a <code>Seq[Node]</code>. If you don't know, call * <code>trimProper</code> and account for the fact that you may get back * an empty sequence of nodes. * * precondition: node is not a text node (it might be trimmed) */ def trim(x:Node): Node = x match { case Elem(pre,lab,md,scp,child@_*) => Elem(pre,lab,md,scp, (child flatMap trimProper):_*) } /** trim a child of an element. Attribute values and Atom nodes that are not Text nodes are unaffected */ def trimProper(x:Node): Seq[Node] = x match { case Elem(pre,lab,md,scp,child@_*) => Elem(pre,lab,md,scp, (child flatMap trimProper):_*) case Text(s) => new TextBuffer().append(s).toText case _ => x } /** returns a sorted attribute list */ def sort(md: MetaData): MetaData = if((md eq Null) || (md.next eq Null)) md else { val key = md.key val smaller = sort(md.filter { m => m.key < key }) val greater = sort(md.filter { m => m.key > key }) smaller.append( Null ).append(md.copy ( greater )) } /** returns the node with its attribute list sorted alphabetically (prefixes are ignored) */ def sort(n:Node): Node = n match { case Elem(pre,lab,md,scp,child@_*) => Elem(pre,lab,sort(md),scp, (child map sort):_*) case _ => n } /** @deprecated a string might also be Atom(s) - define your own conversion */ @deprecated def view(s: String): Text = Text(s) /** * Escapes the characters < > & and " from string. * * @param text ... * @return ... */ final def escape(text: String): String = escape(text, new StringBuilder()).toString() /** * Appends escaped string to <code>s</code>. * * @param text ... * @param s ... * @return ... */ final def escape(text: String, s: StringBuilder): StringBuilder = { for (c <- text.elements) c match { case '<' => s.append("<") case '>' => s.append(">") case '&' => s.append("&") case '"' => s.append(""") //case '\'' => s.append("'") // is valid xhtml but not html, and IE doesn't know it, says jweb case _ => s.append(c) } s } /** * Appends unescaped string to <code>s</code>, amp becomes & * lt becomes < etc.. * * @param ref ... * @param s ... * @return <code>null</code> if <code>ref</code> was not a predefined * entity. */ final def unescape(ref: String, s: StringBuilder): StringBuilder = ref match { case "lt" => s.append('<') case "gt" => s.append('>') case "amp" => s.append('&') case "quot" => s.append('"') case "apos" => s.append('\'') case _ => null } /** * Returns a set of all namespaces used in a sequence of nodes * and all their descendants, including the empty namespaces. * * @param nodes ... * @return ... */ def collectNamespaces(nodes: Seq[Node]): Set[String] = { var m = new HashSet[String]() val it = nodes.elements while (it.hasNext) collectNamespaces(it.next, m); m } /** * Adds all namespaces in node to set. * * @param n ... * @param set ... */ def collectNamespaces(n: Node, set: Set[String]) { if (n.typeTag$ >= 0) { set += n.namespace for (a <- n.attributes) a match { case _:PrefixedAttribute => set += a.getNamespace(n) case _ => } for (i <- n.child) collectNamespaces(i, set) } } /** * Returs the string representation of an XML node, with comments stripped * the comments. * * @param n the XML node * @return the string representation of node <code>n</code>. * * @see "toXML(Node, Boolean)" */ def toXML(n: Node): String = toXML(n, true) /** * Return the string representation of a Node. uses namespace mapping from * <code>defaultPrefixes(n)</code>. * * @param n the XML node * @param stripComment ... * @return ... * * @todo define a way to escape literal characters to &xx; references */ def toXML(n: Node, stripComment: Boolean): String = { val sb = new StringBuilder() toXML(n, TopScope, sb, stripComment) sb.toString() } /** * Appends a tree to the given stringbuffer within given namespace scope. * * @param n the node * @param pscope the parent scope * @param sb stringbuffer to append to * @param stripComment if true, strip comments */ def toXML(x: Node, pscope: NamespaceBinding, sb: StringBuilder, stripComment: Boolean) { x match { case c: Comment if !stripComment => c.toString(sb) case x: SpecialNode => x.toString(sb) case g: Group => for (c <- g.nodes) toXML(c, x.scope, sb, stripComment) case _ => // print tag with namespace declarations sb.append('<') x.nameToString(sb) if (x.attributes ne null) x.attributes.toString(sb) x.scope.toString(sb, pscope) sb.append('>') sequenceToXML(x.child, x.scope, sb, stripComment) sb.append("</") x.nameToString(sb) sb.append('>') } } /** * @param children ... * @param pscope ... * @param sb ... * @param stripComment ... */ def sequenceToXML(children: Seq[Node], pscope: NamespaceBinding, sb: StringBuilder, stripComment: Boolean) { if (children.isEmpty) { return } else if (children forall { case y: Atom[_] => !y.isInstanceOf[Text] case _ => false }) { // add space val it = children.elements val f = it.next toXML(f, pscope, sb, stripComment) while (it.hasNext) { val x = it.next sb.append(' ') toXML(x, pscope, sb, stripComment) } } else { for (c <- children) toXML(c, pscope, sb, stripComment) } } /** * Returns prefix of qualified name if any. * * @param name ... * @return ... */ final def prefix(name: String): Option[String] = { val i = name.indexOf(':'.asInstanceOf[Int]) if (i != -1) Some(name.substring(0, i)) else None } /** * Returns a hashcode for the given constituents of a node * * @param uri * @param label * @param attribHashCode * @param children */ def hashCode(pre: String, label: String, attribHashCode: Int, scpeHash: Int, children: Seq[Node]) = { ( if(pre ne null) {41 * pre.hashCode() % 7} else {0}) + label.hashCode() * 53 + attribHashCode * 7 + scpeHash * 31 + children.hashCode() } /** * Returns a hashcode for the given constituents of a node * * @param uri * @param label * @param attribs * @param children def hashCode(uri: String, label: String, attribs: scala.collection.mutable.HashMap[(String,String),String], scpe: Int, children: Seq[Node]): Int = { 41 * uri.hashCode() % 7 + label.hashCode() + attribs.toList.hashCode() + scpe + children.hashCode() } */ /** * @param s ... * @return ... */ def systemLiteralToString(s: String): String = { val sb = new StringBuilder() systemLiteralToString(sb, s) sb.toString() } /** * @param sb ... * @param s ... * @return ... */ def systemLiteralToString(sb: StringBuilder, s: String): StringBuilder = { sb.append("SYSTEM ") appendQuoted(s, sb) } /** * @param s ... * @return ... */ def publicLiteralToString(s: String): String = { val sb = new StringBuilder() systemLiteralToString(sb, s) sb.toString() } /** * @param sb ... * @param s ... * @return ... */ def publicLiteralToString(sb: StringBuilder, s: String): StringBuilder = { sb.append("PUBLIC \"").append(s).append('"') } /** * Appends "s" if string <code>s</code> does not contain ", * 's' otherwise. * * @param s ... * @param sb ... * @return ... */ def appendQuoted(s: String, sb: StringBuilder) = { val ch = if (s.indexOf('"'.asInstanceOf[Int]) == -1) '"' else '\''; sb.append(ch).append(s).append(ch) } /** * Appends "s" and escapes and " i s with \" * * @param s ... * @param sb ... * @return ... */ def appendEscapedQuoted(s: String, sb: StringBuilder): StringBuilder = { sb.append('"') for (c <- s) c match { case '"' => sb.append('\\'); sb.append('"') case _ => sb.append(c) } sb.append('"') } /** * @param s ... * @param index ... * @return ... */ def getName(s: String, index: Int): String = { var i = index; val sb = new StringBuilder(); if (i < s.length) { var c = s.charAt(i); if (isNameStart(s.charAt(i))) while (i < s.length && { c = s.charAt(i); isNameChar(c)}) { sb.append(c) i = i + 1 } sb.toString() } else null } /** * Returns <code>null</code> if the value is a correct attribute value, * error message if it isn't. * * @param value ... * @return ... */ def checkAttributeValue(value: String): String = { var i = 0 while (i < value.length) { value.charAt(i) match { case '<' => return "< not allowed in attribute value"; case '&' => val n = getName(value, i+1) if (n eq null) return "malformed entity reference in attribute value ["+value+"]"; i = i + n.length + 1 if (i >= value.length || value.charAt(i) != ';') return "malformed entity reference in attribute value ["+value+"]"; case _ => } i = i + 1 } null } /** * new * * @param value ... * @return ... */ def parseAttributeValue(value: String): Seq[Node] = { val zs: Seq[Char] = value val sb = new StringBuilder var rfb: StringBuilder = null val nb = new NodeBuffer() val it = zs.elements while (it.hasNext) { var c = it.next c match { case '&' => // entity! flush buffer into text node it.next match { case '#' => c = it.next val theChar = parseCharRef ({ ()=> c },{ () => c = it.next },{s => throw new RuntimeException(s)}) sb.append(theChar) case x => if (rfb eq null) rfb = new StringBuilder() rfb.append(x) c = it.next while (c != ';') { rfb.append(c) c = it.next } val ref = rfb.toString() rfb.setLength(0) unescape(ref,sb) match { case null => if (sb.length > 0) { // flush buffer nb += Text(sb.toString()) sb.setLength(0) } nb += EntityRef(sb.toString()) // add entityref case _ => } } case x => sb.append(x) } } if (sb.length > 0) { // flush buffer val x = Text(sb.toString()) if (nb.length == 0) return x else nb += x } return nb } /** * <pre> * CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" * </pre> * <p> * see [66] * <p> * * @param ch ... * @param nextch ... * @param reportSyntaxError ... * @return ... */ def parseCharRef(ch: () => Char, nextch: () => Unit, reportSyntaxError:(String) => Unit): String = { val hex = (ch() == 'x') && { nextch(); true } val base = if (hex) 16 else 10 var i = 0 while (ch() != ';') { ch() match { case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => i = i * base + ch().asDigit case 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' => if (! hex) reportSyntaxError("hex char not allowed in decimal char ref\n" + "Did you mean to write &#x ?") else i = i * base + ch().asDigit case _ => reportSyntaxError("character '" + ch() + "' not allowed in char ref\n") } nextch() } new String(io.UTF8Codec.encode(i),"utf8") }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?