📄 xmlutil.java
字号:
/* Sesame - Storage and Querying architecture for RDF and RDF Schema * Copyright (C) 2001-2005 Aduna * * Contact: * Aduna * Prinses Julianaplein 14 b * 3817 CS Amersfoort * The Netherlands * tel. +33 (0)33 465 99 87 * fax. +33 (0)33 465 99 87 * * http://aduna.biz/ * http://www.openrdf.org/ * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package org.openrdf.util.xml;import org.openrdf.util.StringUtil;public class XmlUtil { /** * Replaces the special XML-characters &, <, >, " and ' with * their entities. * @see #resolveEntities **/ public static String escapeChars(String text) { String result = StringUtil.gsub("&", "&", text); result = StringUtil.gsub("<", "<", result); result = StringUtil.gsub(">", ">", result); result = StringUtil.gsub("\"", """, result); result = StringUtil.gsub("'", "'", result); return result; } /** * Replaces all XML character entities with the character they represent. * @see #escapeChars **/ public static String resolveEntities(String text) { StringBuffer result = new StringBuffer(); int ampIndex = text.indexOf("&"); int prevIndex = 0; while (ampIndex >= 0) { result.append(text.substring(prevIndex, ampIndex)); int colonIndex = text.indexOf(";", ampIndex); result.append( resolveEntity( text.substring(ampIndex + 1, colonIndex) ) ); prevIndex = colonIndex + 1; ampIndex = text.indexOf("&", prevIndex); } result.append(text.substring(prevIndex)); return result.toString(); } /** * Resolves an entity reference or character reference to its value. * * @param entName The 'name' of the reference. This is the string between * & and ;, e.g. amp, quot, #65 or #x41. * @return The value of the supplied reference, or the reference itself * if it could not be resolved. **/ public static String resolveEntity(String entName) { if (entName.startsWith("#")) { // character reference StringBuffer result = new StringBuffer(); if (entName.charAt(1) == 'x') { // Hex-notation result.append((char)Integer.parseInt(entName.substring(2), 16)); } else { // Dec-notation result.append((char)Integer.parseInt(entName.substring(1))); } return result.toString(); } else if (entName.equals("apos")) { return "'"; } else if (entName.equals("quot")) { return "\""; } else if (entName.equals("gt")) { return ">"; } else if (entName.equals("lt")) { return "<"; } else if (entName.equals("amp")) { return "&"; } else { return entName; } } /** * Tries to find a point in the supplied URI where this URI can be * safely split into a namespace part and a local name. According * to the XML specifications, a local name must start with a letter * or underscore and can be followed by zero or more 'NCName' * characters. * * @param uri The URI to split. * @return The index of the first character of the local name, or * <tt>-1</tt> if the URI can not be split into a namespace and * local name. **/ public static int findSplitIndex(String uri) { int uriLength = uri.length(); // Search last character that is not an NCName character int i = uriLength - 1; while (i >= 0) { char c = uri.charAt(i); // Check for # and / characters explicitly as these // are used as the end of a namespace very frequently if (c == '#' || c == '/' || !XmlUtil.isNCNameChar(c)) { // Found it at index i break; } i--; } // Character after the just found non-NCName character could // be an NCName character, but not a letter or underscore. // Skip characters that are not letters or underscores. i++; while (i < uriLength) { char c = uri.charAt(i); if (c == '_' || XmlUtil.isLetter(c)) { break; } i++; } // Check that a legal split point has been found if (i == uriLength) { i = -1; } return i; } /** * Checks whether the supplied String is an NCName (Namespace Classified Name) * as specified at <a href="http://www.w3.org/TR/REC-xml-names/#NT-NCName"> * http://www.w3.org/TR/REC-xml-names/#NT-NCName</a>. **/ public static final boolean isNCName(String name) { int nameLength = name.length(); if (nameLength == 0) { return false; } // Check first character char c = name.charAt(0); if (c == '_' || isLetter(c)) { // Check the rest of the characters for (int i = 1; i < nameLength; i++) { c = name.charAt(i); if (!isNCNameChar(c)) { return false; } } // All characters have been checked return true; } return false; } public static final boolean isNCNameChar(char c) { return _isAsciiBaseChar(c) || _isAsciiDigit(c) || c == '.' || c == '-' || c == '_' || _isNonAsciiBaseChar(c) || _isNonAsciiDigit(c) || isIdeographic(c) || isCombiningChar(c) || isExtender(c); } public static final boolean isLetter(char c) { return _isAsciiBaseChar(c) || _isNonAsciiBaseChar(c) || isIdeographic(c); } private static final boolean _isAsciiBaseChar(char c) { return _charInRange(c, 0x0041, 0x005A) || _charInRange(c, 0x0061, 0x007A); } private static final boolean _isNonAsciiBaseChar(char c) { return _charInRange(c, 0x00C0, 0x00D6) || _charInRange(c, 0x00D8, 0x00F6) || _charInRange(c, 0x00F8, 0x00FF) || _charInRange(c, 0x0100, 0x0131) || _charInRange(c, 0x0134, 0x013E) || _charInRange(c, 0x0141, 0x0148) || _charInRange(c, 0x014A, 0x017E) || _charInRange(c, 0x0180, 0x01C3) || _charInRange(c, 0x01CD, 0x01F0) || _charInRange(c, 0x01F4, 0x01F5) || _charInRange(c, 0x01FA, 0x0217) || _charInRange(c, 0x0250, 0x02A8) || _charInRange(c, 0x02BB, 0x02C1) || c == 0x0386 || _charInRange(c, 0x0388, 0x038A) || c == 0x038C || _charInRange(c, 0x038E, 0x03A1) || _charInRange(c, 0x03A3, 0x03CE) || _charInRange(c, 0x03D0, 0x03D6) || c == 0x03DA || c == 0x03DC || c == 0x03DE || c == 0x03E0 || _charInRange(c, 0x03E2, 0x03F3) || _charInRange(c, 0x0401, 0x040C) || _charInRange(c, 0x040E, 0x044F) || _charInRange(c, 0x0451, 0x045C) || _charInRange(c, 0x045E, 0x0481) || _charInRange(c, 0x0490, 0x04C4) || _charInRange(c, 0x04C7, 0x04C8) || _charInRange(c, 0x04CB, 0x04CC) || _charInRange(c, 0x04D0, 0x04EB) || _charInRange(c, 0x04EE, 0x04F5) || _charInRange(c, 0x04F8, 0x04F9) || _charInRange(c, 0x0531, 0x0556) || c == 0x0559 || _charInRange(c, 0x0561, 0x0586) || _charInRange(c, 0x05D0, 0x05EA) || _charInRange(c, 0x05F0, 0x05F2) || _charInRange(c, 0x0621, 0x063A) || _charInRange(c, 0x0641, 0x064A) || _charInRange(c, 0x0671, 0x06B7) || _charInRange(c, 0x06BA, 0x06BE) || _charInRange(c, 0x06C0, 0x06CE) || _charInRange(c, 0x06D0, 0x06D3) || c == 0x06D5 || _charInRange(c, 0x06E5, 0x06E6) || _charInRange(c, 0x0905, 0x0939) || c == 0x093D || _charInRange(c, 0x0958, 0x0961) || _charInRange(c, 0x0985, 0x098C) || _charInRange(c, 0x098F, 0x0990) || _charInRange(c, 0x0993, 0x09A8) || _charInRange(c, 0x09AA, 0x09B0) || c == 0x09B2 || _charInRange(c, 0x09B6, 0x09B9) || _charInRange(c, 0x09DC, 0x09DD) || _charInRange(c, 0x09DF, 0x09E1) || _charInRange(c, 0x09F0, 0x09F1) || _charInRange(c, 0x0A05, 0x0A0A) || _charInRange(c, 0x0A0F, 0x0A10) || _charInRange(c, 0x0A13, 0x0A28) || _charInRange(c, 0x0A2A, 0x0A30) || _charInRange(c, 0x0A32, 0x0A33) || _charInRange(c, 0x0A35, 0x0A36) || _charInRange(c, 0x0A38, 0x0A39) ||
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -