xmlchars.java

来自「Mobile 应用程序使用 Java Micro Edition (Java M」· Java 代码 · 共 428 行
JAVA
428 行
/* * $Id: XmlChars.java,v 1.1.1.1 2000/11/23 01:53:35 edwingo Exp $ * * The Apache Software License, Version 1.1 * * * Copyright (c) 2000 The Apache Software Foundation.  All rights  * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer.  * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * 3. The end-user documentation included with the redistribution, *    if any, must include the following acknowledgment:   *       "This product includes software developed by the *        Apache Software Foundation (http://www.apache.org/)." *    Alternately, this acknowledgment may appear in the software itself, *    if and wherever such third-party acknowledgments normally appear. * * 4. The names "Crimson" and "Apache Software Foundation" must *    not be used to endorse or promote products derived from this *    software without prior written permission. For written  *    permission, please contact apache@apache.org. * * 5. Products derived from this software may not be called "Apache", *    nor may "Apache" appear in their name, without prior written *    permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation and was * originally based on software copyright (c) 1999, Sun Microsystems, Inc.,  * http://www.sun.com.  For more information on the Apache Software  * Foundation, please see <http://www.apache.org/>. */package com.sun.imageio.metadata;/** * Methods in this class are used to determine whether characters may * appear in certain roles in XML documents.  Such methods are used * both to parse and to create such documents. * * @version 1.8 * @author David Brownell */public class XmlChars{    // can't construct instances    private XmlChars () { }    /**     * Returns true if the argument, a UCS-4 character code, is valid in     * XML documents.  Unicode characters fit into the low sixteen     * bits of a UCS-4 character, and pairs of Unicode <em>surrogate     * characters</em> can be combined to encode UCS-4 characters in     * documents containing only Unicode.  (The <code>char</code> datatype     * in the Java Programming Language represents Unicode characters,     * including unpaired surrogates.)     *     * <P> In XML, UCS-4 characters can also be encoded by the use of     * <em>character references</em> such as <b>&amp;#x12345678;</b>, which     * happens to refer to a character that is disallowed in XML documents.     * UCS-4 characters allowed in XML documents can be expressed with     * one or two Unicode characters.     *     * @param ucs4char The 32-bit UCS-4 character being tested.     */    static public boolean isChar (int ucs4char)    {	// [2] Char ::= #x0009 | #x000A | #x000D	//			| [#x0020-#xD7FF]	//	... surrogates excluded!	//			| [#xE000-#xFFFD]	// 			| [#x10000-#x10ffff]	return ((ucs4char >= 0x0020 && ucs4char <= 0xD7FF)		|| ucs4char == 0x000A || ucs4char == 0x0009		|| ucs4char == 0x000D		|| (ucs4char >= 0xE000 && ucs4char <= 0xFFFD)		|| (ucs4char >= 0x10000 && ucs4char <= 0x10ffff));    }    /**     * Returns true if the character is allowed to be a non-initial     * character in names according to the XML recommendation.     * @see #isNCNameChar     * @see #isLetter     */    public static boolean isNameChar (char c)    {	// [4] NameChar ::= Letter | Digit | '.' | '_' | ':'	//			| CombiningChar | Extender	if (isLetter2 (c))	    return true;	else if (c == '>')	    return false;	else if (c == '.' || c == '-' || c == '_' || c == ':'		|| isExtender (c))	    return true;	else	    return false;    }    /**     * Returns true if the character is allowed to be a non-initial     * character in unscoped names according to the rules of the XML     * Namespaces proposed recommendation.  Except for precluding     * the colon (used to separate names from their scopes) these     * characters are just as allowed by the XML recommendation.     * @see #isNameChar     * @see #isLetter     */    public static boolean isNCNameChar (char c)    {	// [NC 5] NCNameChar ::= Letter | Digit | '.' | '_' 	//			| CombiningChar | Extender	return c != ':' && isNameChar (c);    }    /**     * Returns true if the character is allowed where XML supports     * whitespace characters, false otherwise.     */    public static boolean isSpace (char c)    {	return c == ' ' || c == '\t' || c == '\n' || c == '\r';    }    /*     * NOTE:  java.lang.Character.getType() values are:     *     * UNASSIGNED                    = 0,     *     * UPPERCASE_LETTER            = 1,    // Lu     * LOWERCASE_LETTER            = 2,    // Ll     * TITLECASE_LETTER            = 3,    // Lt     * MODIFIER_LETTER             = 4,    // Lm     * OTHER_LETTER                = 5,    // Lo     * NON_SPACING_MARK            = 6,    // Mn     * ENCLOSING_MARK              = 7,    // Me     * COMBINING_SPACING_MARK      = 8,    // Mc     * DECIMAL_DIGIT_NUMBER        = 9,    // Nd     * LETTER_NUMBER               = 10,   // Nl     * OTHER_NUMBER                = 11,   // No     * SPACE_SEPARATOR             = 12,   // Zs     * LINE_SEPARATOR              = 13,   // Zl     * PARAGRAPH_SEPARATOR         = 14,   // Zp     * CONTROL                     = 15,   // Cc     * FORMAT                      = 16,   // Cf     *                         // 17 reserved for proposed Ci category     * PRIVATE_USE                 = 18,   // Co     * SURROGATE                   = 19,   // Cs     * DASH_PUNCTUATION            = 20,   // Pd     * START_PUNCTUATION           = 21,   // Ps     * END_PUNCTUATION             = 22,   // Pe     * CONNECTOR_PUNCTUATION       = 23,   // Pc     * OTHER_PUNCTUATION           = 24,   // Po     * MATH_SYMBOL                 = 25,   // Sm     * CURRENCY_SYMBOL             = 26,   // Sc     * MODIFIER_SYMBOL             = 27,   // Sk     * OTHER_SYMBOL                = 28;   // So     */    /**     * Returns true if the character is an XML "letter".  XML Names must     * start with Letters or a few other characters, but other characters     * in names must only satisfy the <em>isNameChar</em> predicate.     *     * @see #isNameChar     * @see #isNCNameChar     */    public static boolean isLetter (char c)    {	// [84] Letter ::= BaseChar | Ideographic	// [85] BaseChar ::= ... too much to repeat	// [86] Ideographic ::= ... too much to repeat	//	// Optimize the typical case.	//	if (c >= 'a' && c <= 'z')	    return true;	if (c == '/')	    return false;	if (c >= 'A' && c <= 'Z')	    return true;	//	// Since the tables are too ridiculous to use in code,	// we're using the footnotes here to drive this test.	//	switch (Character.getType (c)) {	    // app. B footnote says these are 'name start'	    // chars' ...	  case Character.LOWERCASE_LETTER:		// Ll	  case Character.UPPERCASE_LETTER:		// Lu	  case Character.OTHER_LETTER:			// Lo	  case Character.TITLECASE_LETTER:		// Lt	  case Character.LETTER_NUMBER:			// Nl	    // OK, here we just have some exceptions to check...	    return !isCompatibilityChar (c)		    // per "5.14 of Unicode", rule out some combiners		&& !(c >= 0x20dd && c <= 0x20e0);	  default:	    // check for some exceptions:  these are "alphabetic"	    return ((c >= 0x02bb && c <=  0x02c1)		    || c == 0x0559 || c == 0x06e5 || c == 0x06e6);	}    }    //    // XML 1.0 discourages "compatibility" characters in names; these    // were defined to permit passing through some information stored in    // older non-Unicode character sets.  These always have alternative    // representations in Unicode, e.g. using combining chars.    //    private static boolean isCompatibilityChar (char c)    {	// the numerous comparisions here seem unavoidable,	// but the switch can reduce the number which must	// actually be executed.	switch ((c >> 8) & 0x0ff) {	  case 0x00:	    // ISO Latin/1 has a few compatibility characters	    return c == 0x00aa || c == 0x00b5 || c == 0x00ba;	  case 0x01:	    // as do Latin Extended A and (parts of) B	    return (c >= 0x0132 && c <= 0x0133)		|| (c >= 0x013f && c <= 0x0140)		|| c == 0x0149		|| c == 0x017f		|| (c >= 0x01c4 && c <= 0x01cc)		|| (c >= 0x01f1 && c <= 0x01f3) ;	  case 0x02:		   // some spacing modifiers	    return (c >= 0x02b0 && c <= 0x02b8)		|| (c >= 0x02e0 && c <= 0x02e4);	  	  case 0x03:	    return c == 0x037a;			// Greek	  case 0x05:	    return c == 0x0587;			// Armenian	  case 0x0e:	    return c >= 0x0edc && c <= 0x0edd;	// Laotian	  case 0x11:	    // big chunks of Hangul Jamo are all "compatibility"	    return c == 0x1101		|| c == 0x1104		|| c == 0x1108		|| c == 0x110a		|| c == 0x110d		|| (c >= 0x1113 && c <= 0x113b)		|| c == 0x113d		|| c == 0x113f		|| (c >= 0x1141 && c <= 0x114b)		|| c == 0x114d		|| c == 0x114f		|| (c >= 0x1151 && c <= 0x1153)		|| (c >= 0x1156 && c <= 0x1158)		|| c == 0x1162		|| c == 0x1164		|| c == 0x1166		|| c == 0x1168		|| (c >= 0x116a && c <= 0x116c)		|| (c >= 0x116f && c <= 0x1171)		|| c == 0x1174		|| (c >= 0x1176 && c <= 0x119d)		|| (c >= 0x119f && c <= 0x11a2)		|| (c >= 0x11a9 && c <= 0x11aa)		|| (c >= 0x11ac && c <= 0x11ad)		|| (c >= 0x11b0 && c <= 0x11b6)		|| c == 0x11b9		|| c == 0x11bb		|| (c >= 0x11c3 && c <= 0x11ea)		|| (c >= 0x11ec && c <= 0x11ef)		|| (c >= 0x11f1 && c <= 0x11f8)		;	  case 0x20:	    return c == 0x207f;			// superscript	  case 0x21:	    return		// various letterlike symbols		   c == 0x2102		|| c == 0x2107		|| (c >= 0x210a && c <= 0x2113)		|| c == 0x2115		|| (c >= 0x2118 && c <= 0x211d)		|| c == 0x2124		|| c == 0x2128		|| (c >= 0x212c && c <= 0x212d)		|| (c >= 0x212f && c <= 0x2138)		    // most Roman numerals (less 1K, 5K, 10K)		|| (c >= 0x2160 && c <= 0x217f)		;	  case 0x30:	    // some Hiragana	    return c >= 0x309b && c <= 0x309c;	  case 0x31:	    // all Hangul Compatibility Jamo	    return c >= 0x3131 && c <= 0x318e;	  case 0xf9:	  case 0xfa:	  case 0xfb:	  case 0xfc:	  case 0xfd:	  case 0xfe:	  case 0xff:	    // the whole "compatibility" area is for that purpose!	    return true;		  default:	    // most of Unicode isn't flagged as being for compatibility	    return false;	}    }    // guts of isNameChar/isNCNameChar    private static boolean isLetter2 (char c)    {	// [84] Letter ::= BaseChar | Ideographic	// [85] BaseChar ::= ... too much to repeat	// [86] Ideographic ::= ... too much to repeat	// [87] CombiningChar ::= ... too much to repeat	//	// Optimize the typical case.	//	if (c >= 'a' && c <= 'z')	    return true;	if (c == '>')	    return false;	if (c >= 'A' && c <= 'Z')	    return true;	//	// Since the tables are too ridiculous to use in code,	// we're using the footnotes here to drive this test.	//	switch (Character.getType (c)) {	    // app. B footnote says these are 'name start'	    // chars' ...	  case Character.LOWERCASE_LETTER:		// Ll	  case Character.UPPERCASE_LETTER:		// Lu	  case Character.OTHER_LETTER:			// Lo	  case Character.TITLECASE_LETTER:		// Lt	  case Character.LETTER_NUMBER:			// Nl	    // ... and these are name characters 'other	    // than name start characters'	  case Character.COMBINING_SPACING_MARK:	// Mc	  case Character.ENCLOSING_MARK:		// Me	  case Character.NON_SPACING_MARK:		// Mn	  case Character.MODIFIER_LETTER:		// Lm	  case Character.DECIMAL_DIGIT_NUMBER:		// Nd	    // OK, here we just have some exceptions to check...	    return !isCompatibilityChar (c)		    // per "5.14 of Unicode", rule out some combiners		&& !(c >= 0x20dd && c <= 0x20e0);	  default:		// added a character ...	    return c == 0x0387;	}    }    private static boolean isDigit (char c)    {	// [88] Digit ::= ... 	//	// java.lang.Character.isDigit is correct from the XML point	// of view except that it allows "fullwidth" digits.	//	return Character.isDigit (c)		&& ! ( (c >= 0xff10) && (c <= 0xff19));    }    private static boolean isExtender (char c)    {	// [89] Extender ::= ... 	return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387		|| c == 0x0640 || c == 0x0e46 || c == 0x0ec6		|| c == 0x3005 || (c >= 0x3031 && c <= 0x3035)		|| (c >= 0x309d && c <= 0x309e)		|| (c >= 0x30fc && c <= 0x30fe)		;    }}
xmlchars.java - 源码说明

本页面展示了「Mobile 应用程序使用 Java Micro Edition (Java ME) 平台」中的 xmlchars.java 源码文件，采用 Java 编程语言编写，共 428 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Java ME相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?