xmlutil.java

来自「这是外国一个开源推理机」· Java 代码 · 共 582 行 · 第 1/2 页
JAVA
582 行
/*  Sesame - Storage and Querying architecture for RDF and RDF Schema *  Copyright (C) 2001-2005 Aduna * *  Contact:  *  	Aduna *  	Prinses Julianaplein 14 b *  	3817 CS Amersfoort *  	The Netherlands *  	tel. +33 (0)33 465 99 87 *  	fax. +33 (0)33 465 99 87 * *  	http://aduna.biz/ *  	http://www.openrdf.org/ *   *  This library is free software; you can redistribute it and/or *  modify it under the terms of the GNU Lesser General Public *  License as published by the Free Software Foundation; either *  version 2.1 of the License, or (at your option) any later version. * *  This library is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU *  Lesser General Public License for more details. * *  You should have received a copy of the GNU Lesser General Public *  License along with this library; if not, write to the Free Software *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */package org.openrdf.util.xml;import org.openrdf.util.StringUtil;public class XmlUtil {	/**	 * Replaces the special XML-characters &amp;, &lt;, &gt;, " and ' with	 * their entities.	 * @see #resolveEntities	 **/	public static String escapeChars(String text) {		String result = StringUtil.gsub("&", "&amp;", text);		result = StringUtil.gsub("<", "&lt;", result);		result = StringUtil.gsub(">", "&gt;", result);		result = StringUtil.gsub("\"", "&quot;", result);		result = StringUtil.gsub("'", "&apos;", result);		return result;	}	/**	 * Replaces all XML character entities with the character they represent.	 * @see #escapeChars	 **/	public static String resolveEntities(String text) {		StringBuffer result = new StringBuffer();		int ampIndex = text.indexOf("&");		int prevIndex = 0;		while (ampIndex >= 0) {			result.append(text.substring(prevIndex, ampIndex));			int colonIndex = text.indexOf(";", ampIndex);			result.append(				resolveEntity( text.substring(ampIndex + 1, colonIndex) )			);			prevIndex = colonIndex + 1;			ampIndex = text.indexOf("&", prevIndex);		}		result.append(text.substring(prevIndex));		return result.toString();	}	/**	 * Resolves an entity reference or character reference to its value. 	 *	 * @param entName The 'name' of the reference. This is the string between	 * &amp; and ;, e.g. amp, quot, #65 or #x41.	 * @return The value of the supplied reference, or the reference itself	 * if it could not be resolved.	 **/	public static String resolveEntity(String entName) {		if (entName.startsWith("#")) {			// character reference			StringBuffer result = new StringBuffer();			if (entName.charAt(1) == 'x') {				// Hex-notation				result.append((char)Integer.parseInt(entName.substring(2), 16));			}			else {				// Dec-notation				result.append((char)Integer.parseInt(entName.substring(1)));			}			return result.toString();		}		else if (entName.equals("apos")) {			return "'";		}		else if (entName.equals("quot")) {			return "\"";		}		else if (entName.equals("gt")) {			return ">";		}		else if (entName.equals("lt")) {			return "<";		}		else if (entName.equals("amp")) {			return "&";		}		else {			return entName;		}	}	/**	 * Tries to find a point in the supplied URI where this URI can be	 * safely split into a namespace part and a local name. According	 * to the XML specifications, a local name must start with a letter	 * or underscore and can be followed by zero or more 'NCName'	 * characters.	 *	 * @param uri The URI to split.	 * @return The index of the first character of the local name, or	 * <tt>-1</tt> if the URI can not be split into a namespace and	 * local name.	 **/	public static int findSplitIndex(String uri) {		int uriLength = uri.length();		// Search last character that is not an NCName character		int i = uriLength - 1;		while (i >= 0) {			char c = uri.charAt(i);			// Check for # and / characters explicitly as these			// are used as the end of a namespace very frequently			if (c == '#' || c == '/' || !XmlUtil.isNCNameChar(c)) {				// Found it at index i				break;			}			i--;		}		// Character after the just found non-NCName character could		// be an NCName character, but not a letter or underscore.		// Skip characters that are not letters or underscores.		i++;		while (i < uriLength) {			char c = uri.charAt(i);			if (c == '_' || XmlUtil.isLetter(c)) {				break;			}			i++;		}		// Check that a legal split point has been found		if (i == uriLength) {			i = -1;		}		return i;	}	/**	 * Checks whether the supplied String is an NCName (Namespace Classified Name)	 * as specified at <a href="http://www.w3.org/TR/REC-xml-names/#NT-NCName">	 * http://www.w3.org/TR/REC-xml-names/#NT-NCName</a>.	 **/	public static final boolean isNCName(String name) {		int nameLength = name.length();		if (nameLength == 0) {			return false;		}		// Check first character		char c = name.charAt(0);		if (c == '_' || isLetter(c)) {			// Check the rest of the characters			for (int i = 1; i < nameLength; i++) {				c = name.charAt(i);				if (!isNCNameChar(c)) {					return false;				}			}			// All characters have been checked			return true;		}		return false;	}	public static final boolean isNCNameChar(char c) {		return 			_isAsciiBaseChar(c) ||			_isAsciiDigit(c) ||			c == '.' || c == '-' || c == '_' ||			_isNonAsciiBaseChar(c) ||			_isNonAsciiDigit(c) ||			isIdeographic(c) ||			isCombiningChar(c) ||			isExtender(c);	}	public static final boolean isLetter(char c) {		return			_isAsciiBaseChar(c) ||			_isNonAsciiBaseChar(c) ||			isIdeographic(c);	}	private static final boolean _isAsciiBaseChar(char c) {		return			_charInRange(c, 0x0041, 0x005A) ||			_charInRange(c, 0x0061, 0x007A);	}	private static final boolean _isNonAsciiBaseChar(char c) {		return			_charInRange(c, 0x00C0, 0x00D6) ||			_charInRange(c, 0x00D8, 0x00F6) ||			_charInRange(c, 0x00F8, 0x00FF) ||			_charInRange(c, 0x0100, 0x0131) ||			_charInRange(c, 0x0134, 0x013E) ||			_charInRange(c, 0x0141, 0x0148) ||			_charInRange(c, 0x014A, 0x017E) ||			_charInRange(c, 0x0180, 0x01C3) ||			_charInRange(c, 0x01CD, 0x01F0) ||			_charInRange(c, 0x01F4, 0x01F5) ||			_charInRange(c, 0x01FA, 0x0217) ||			_charInRange(c, 0x0250, 0x02A8) ||			_charInRange(c, 0x02BB, 0x02C1) ||			c == 0x0386 ||			_charInRange(c, 0x0388, 0x038A) ||			c == 0x038C ||			_charInRange(c, 0x038E, 0x03A1) ||			_charInRange(c, 0x03A3, 0x03CE) ||			_charInRange(c, 0x03D0, 0x03D6) ||			c == 0x03DA ||			c == 0x03DC ||			c == 0x03DE ||			c == 0x03E0 ||			_charInRange(c, 0x03E2, 0x03F3) ||			_charInRange(c, 0x0401, 0x040C) ||			_charInRange(c, 0x040E, 0x044F) ||			_charInRange(c, 0x0451, 0x045C) ||			_charInRange(c, 0x045E, 0x0481) ||			_charInRange(c, 0x0490, 0x04C4) ||			_charInRange(c, 0x04C7, 0x04C8) ||			_charInRange(c, 0x04CB, 0x04CC) ||			_charInRange(c, 0x04D0, 0x04EB) ||			_charInRange(c, 0x04EE, 0x04F5) ||			_charInRange(c, 0x04F8, 0x04F9) ||			_charInRange(c, 0x0531, 0x0556) ||			c == 0x0559 ||			_charInRange(c, 0x0561, 0x0586) ||			_charInRange(c, 0x05D0, 0x05EA) ||			_charInRange(c, 0x05F0, 0x05F2) ||			_charInRange(c, 0x0621, 0x063A) ||			_charInRange(c, 0x0641, 0x064A) ||			_charInRange(c, 0x0671, 0x06B7) ||			_charInRange(c, 0x06BA, 0x06BE) ||			_charInRange(c, 0x06C0, 0x06CE) ||			_charInRange(c, 0x06D0, 0x06D3) ||			c == 0x06D5 ||			_charInRange(c, 0x06E5, 0x06E6) ||			_charInRange(c, 0x0905, 0x0939) ||			c == 0x093D ||			_charInRange(c, 0x0958, 0x0961) ||			_charInRange(c, 0x0985, 0x098C) ||			_charInRange(c, 0x098F, 0x0990) ||			_charInRange(c, 0x0993, 0x09A8) ||			_charInRange(c, 0x09AA, 0x09B0) ||			c == 0x09B2 ||			_charInRange(c, 0x09B6, 0x09B9) ||			_charInRange(c, 0x09DC, 0x09DD) ||			_charInRange(c, 0x09DF, 0x09E1) ||			_charInRange(c, 0x09F0, 0x09F1) ||			_charInRange(c, 0x0A05, 0x0A0A) ||			_charInRange(c, 0x0A0F, 0x0A10) ||			_charInRange(c, 0x0A13, 0x0A28) ||			_charInRange(c, 0x0A2A, 0x0A30) ||			_charInRange(c, 0x0A32, 0x0A33) ||			_charInRange(c, 0x0A35, 0x0A36) ||			_charInRange(c, 0x0A38, 0x0A39) ||
xmlutil.java - 源码说明

本页面展示了「这是外国一个开源推理机」中的 xmlutil.java 源码文件，采用 Java 编程语言编写，共 582 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与开源相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?