📄 character.java
字号:
/* java.lang.Character -- Wrapper class for char, and Unicode subsets Copyright (C) 1998, 1999, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.This file is part of GNU Classpath.GNU Classpath is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2, or (at your option)any later version.GNU Classpath is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNUGeneral Public License for more details.You should have received a copy of the GNU General Public Licensealong with GNU Classpath; see the file COPYING. If not, write to theFree Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA02110-1301 USA.Linking this library statically or dynamically with other modules ismaking a combined work based on this library. Thus, the terms andconditions of the GNU General Public License cover the wholecombination.As a special exception, the copyright holders of this library give youpermission to link this library with independent modules to produce anexecutable, regardless of the license terms of these independentmodules, and to copy and distribute the resulting executable underterms of your choice, provided that you also meet, for each linkedindependent module, the terms and conditions of the license of thatmodule. An independent module is a module which is not derived fromor based on this library. If you modify this library, you may extendthis exception to your version of the library, but you are notobligated to do so. If you do not wish to do so, delete thisexception statement from your version. *//* * Note: This class must not be merged with Classpath. Gcj uses C-style * arrays (see include/java-chartables.h) to store the Unicode character * database, whereas Classpath uses Java objects (char[] extracted from * String constants) in gnu.java.lang.CharData. Gcj's approach is more * efficient, because there is no vtable or data relocation to worry about. * However, despite the difference in the database interface, the two * versions share identical algorithms. */package java.lang;import java.io.Serializable;import java.text.Collator;import java.util.Locale;/** * Wrapper class for the primitive char data type. In addition, this class * allows one to retrieve property information and perform transformations * on the 57,707 defined characters in the Unicode Standard, Version 3.0.0. * java.lang.Character is designed to be very dynamic, and as such, it * retrieves information on the Unicode character set from a separate * database, gnu.java.lang.CharData, which can be easily upgraded. * * <p>For predicates, boundaries are used to describe * the set of characters for which the method will return true. * This syntax uses fairly normal regular expression notation. * See 5.13 of the Unicode Standard, Version 3.0, for the * boundary specification. * * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a> * for more information on the Unicode Standard. * * @author Tom Tromey (tromey@cygnus.com) * @author Paul N. Fisher * @author Jochen Hoenicke * @author Eric Blake (ebb9@email.byu.edu) * @since 1.0 * @status updated to 1.4 */public final class Character implements Serializable, Comparable{ /** * A subset of Unicode blocks. * * @author Paul N. Fisher * @author Eric Blake (ebb9@email.byu.edu) * @since 1.2 */ public static class Subset { /** The name of the subset. */ private final String name; /** * Construct a new subset of characters. * * @param name the name of the subset * @throws NullPointerException if name is null */ protected Subset(String name) { // Note that name.toString() is name, unless name was null. this.name = name.toString(); } /** * Compares two Subsets for equality. This is <code>final</code>, and * restricts the comparison on the <code>==</code> operator, so it returns * true only for the same object. * * @param o the object to compare * @return true if o is this */ public final boolean equals(Object o) { return o == this; } /** * Makes the original hashCode of Object final, to be consistent with * equals. * * @return the hash code for this object */ public final int hashCode() { return super.hashCode(); } /** * Returns the name of the subset. * * @return the name */ public final String toString() { return name; } } // class Subset /** * A family of character subsets in the Unicode specification. A character * is in at most one of these blocks. * * This inner class was generated automatically from * <code>libjava/gnu/gcj/convert/Blocks-3.txt</code>, by some perl scripts. * This Unicode definition file can be found on the * <a href="http://www.unicode.org">http://www.unicode.org</a> website. * JDK 1.4 uses Unicode version 3.0.0. * * @author scripts/unicode-blocks.pl (written by Eric Blake) * @since 1.2 */ public static final class UnicodeBlock extends Subset { /** The start of the subset. */ private final int start; /** The end of the subset. */ private final int end; /** The canonical name of the block according to the Unicode standard. */ private final String canonicalName; /** Constants for the <code>forName()</code> method */ private static final int CANONICAL_NAME = 0; private static final int NO_SPACES_NAME = 1; private static final int CONSTANT_NAME = 2; /** * Constructor for strictly defined blocks. * * @param start the start character of the range * @param end the end character of the range * @param name the block name */ private UnicodeBlock(int start, int end, String name, String canonicalName) { super(name); this.start = start; this.end = end; this.canonicalName = canonicalName; } /** * Returns the Unicode character block which a character belongs to. * <strong>Note</strong>: This method does not support the use of * supplementary characters. For such support, <code>of(int)</code> * should be used instead. * * @param ch the character to look up * @return the set it belongs to, or null if it is not in one */ public static UnicodeBlock of(char ch) { return of((int) ch); } /** * Returns the Unicode character block which a code point belongs to. * * @param codePoint the character to look up * @return the set it belongs to, or null if it is not in one. * @throws IllegalArgumentException if the specified code point is * invalid. * @since 1.5 */ public static UnicodeBlock of(int codePoint) { if (codePoint > MAX_CODE_POINT) throw new IllegalArgumentException("The supplied integer value is " + "too large to be a codepoint."); // Simple binary search for the correct block. int low = 0; int hi = sets.length - 1; while (low <= hi) { int mid = (low + hi) >> 1; UnicodeBlock b = sets[mid]; if (codePoint < b.start) hi = mid - 1; else if (codePoint > b.end) low = mid + 1; else return b; } return null; } /** * <p> * Returns the <code>UnicodeBlock</code> with the given name, as defined * by the Unicode standard. The version of Unicode in use is defined by * the <code>Character</code> class, and the names are given in the * <code>Blocks-<version>.txt</code> file corresponding to that version. * The name may be specified in one of three ways: * </p> * <ol> * <li>The canonical, human-readable name used by the Unicode standard. * This is the name with all spaces and hyphens retained. For example, * `Basic Latin' retrieves the block, UnicodeBlock.BASIC_LATIN.</li> * <li>The canonical name with all spaces removed e.g. `BasicLatin'.</li> * <li>The name used for the constants specified by this class, which * is the canonical name with all spaces and hyphens replaced with * underscores e.g. `BASIC_LATIN'</li> * </ol> * <p> * The names are compared case-insensitively using the case comparison * associated with the U.S. English locale. The method recognises the * previous names used for blocks as well as the current ones. At * present, this simply means that the deprecated `SURROGATES_AREA' * will be recognised by this method (the <code>of()</code> methods * only return one of the three new surrogate blocks). * </p> * * @param blockName the name of the block to look up. * @return the specified block. * @throws NullPointerException if the <code>blockName</code> is * <code>null</code>. * @throws IllegalArgumentException if the name does not match any Unicode * block. * @since 1.5 */ public static final UnicodeBlock forName(String blockName) { int type; if (blockName.indexOf(' ') != -1) type = CANONICAL_NAME; else if (blockName.indexOf('_') != -1) type = CONSTANT_NAME; else type = NO_SPACES_NAME; Collator usCollator = Collator.getInstance(Locale.US); usCollator.setStrength(Collator.PRIMARY); /* Special case for deprecated blocks not in sets */ switch (type) { case CANONICAL_NAME: if (usCollator.compare(blockName, "Surrogates Area") == 0) return SURROGATES_AREA; break; case NO_SPACES_NAME: if (usCollator.compare(blockName, "SurrogatesArea") == 0) return SURROGATES_AREA; break; case CONSTANT_NAME: if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) return SURROGATES_AREA; break; } /* Other cases */ int setLength = sets.length; switch (type) { case CANONICAL_NAME: for (int i = 0; i < setLength; i++) { UnicodeBlock block = sets[i]; if (usCollator.compare(blockName, block.canonicalName) == 0) return block; } break; case NO_SPACES_NAME: for (int i = 0; i < setLength; i++) { UnicodeBlock block = sets[i]; String nsName = block.canonicalName.replaceAll(" ",""); if (usCollator.compare(blockName, nsName) == 0) return block; } break; case CONSTANT_NAME: for (int i = 0; i < setLength; i++) { UnicodeBlock block = sets[i]; if (usCollator.compare(blockName, block.toString()) == 0) return block; } break; } throw new IllegalArgumentException("No Unicode block found for " + blockName + "."); } /** * Basic Latin. * 0x0000 - 0x007F. */ public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock(0x0000, 0x007F, "BASIC_LATIN", "Basic Latin"); /** * Latin-1 Supplement. * 0x0080 - 0x00FF. */ public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock(0x0080, 0x00FF, "LATIN_1_SUPPLEMENT", "Latin-1 Supplement"); /** * Latin Extended-A. * 0x0100 - 0x017F. */ public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock(0x0100, 0x017F, "LATIN_EXTENDED_A", "Latin Extended-A"); /** * Latin Extended-B. * 0x0180 - 0x024F. */ public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock(0x0180, 0x024F, "LATIN_EXTENDED_B", "Latin Extended-B"); /** * IPA Extensions. * 0x0250 - 0x02AF. */ public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock(0x0250, 0x02AF, "IPA_EXTENSIONS", "IPA Extensions"); /** * Spacing Modifier Letters. * 0x02B0 - 0x02FF. */ public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock(0x02B0, 0x02FF, "SPACING_MODIFIER_LETTERS", "Spacing Modifier Letters"); /** * Combining Diacritical Marks. * 0x0300 - 0x036F. */ public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock(0x0300, 0x036F, "COMBINING_DIACRITICAL_MARKS", "Combining Diacritical Marks"); /** * Greek. * 0x0370 - 0x03FF. */ public static final UnicodeBlock GREEK = new UnicodeBlock(0x0370, 0x03FF, "GREEK", "Greek"); /** * Cyrillic. * 0x0400 - 0x04FF. */ public static final UnicodeBlock CYRILLIC = new UnicodeBlock(0x0400, 0x04FF, "CYRILLIC", "Cyrillic"); /** * Cyrillic Supplementary. * 0x0500 - 0x052F. * @since 1.5 */ public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock(0x0500, 0x052F, "CYRILLIC_SUPPLEMENTARY", "Cyrillic Supplementary"); /** * Armenian. * 0x0530 - 0x058F. */ public static final UnicodeBlock ARMENIAN = new UnicodeBlock(0x0530, 0x058F, "ARMENIAN", "Armenian"); /** * Hebrew. * 0x0590 - 0x05FF. */ public static final UnicodeBlock HEBREW = new UnicodeBlock(0x0590, 0x05FF, "HEBREW", "Hebrew"); /** * Arabic. * 0x0600 - 0x06FF. */ public static final UnicodeBlock ARABIC = new UnicodeBlock(0x0600, 0x06FF, "ARABIC", "Arabic"); /** * Syriac. * 0x0700 - 0x074F. * @since 1.4 */ public static final UnicodeBlock SYRIAC = new UnicodeBlock(0x0700, 0x074F, "SYRIAC", "Syriac"); /** * Thaana. * 0x0780 - 0x07BF. * @since 1.4 */ public static final UnicodeBlock THAANA = new UnicodeBlock(0x0780, 0x07BF, "THAANA", "Thaana"); /** * Devanagari. * 0x0900 - 0x097F. */ public static final UnicodeBlock DEVANAGARI = new UnicodeBlock(0x0900, 0x097F, "DEVANAGARI", "Devanagari"); /** * Bengali. * 0x0980 - 0x09FF. */ public static final UnicodeBlock BENGALI = new UnicodeBlock(0x0980, 0x09FF, "BENGALI", "Bengali"); /** * Gurmukhi. * 0x0A00 - 0x0A7F.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -