character.java

来自「纯java操作系统jnode,安装简单和操作简单的个人使用的Java操作系统」· Java 代码 · 共 2,178 行 · 第 1/4 页

JAVA
2,178
字号

	/**
	 * Determines if a character can follow the first letter in
	 * a Java identifier.  This is the combination of isJavaLetter (isLetter,
	 * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
	 * numeric letter (like Roman numerals), combining marks, non-spacing marks,
	 * or isIdentifierIgnorable.
	 *
	 * @param ch character to test
	 * @return true if ch can follow the first letter in a Java identifier
	 * @deprecated Replaced by {@link #isJavaIdentifierPart(char)}
	 * @see #isJavaLetter(char)
	 * @see #isJavaIdentifierStart(char)
	 * @see #isJavaIdentifierPart(char)
	 * @see #isLetter(char)
	 * @see #isLetterOrDigit(char)
	 * @see #isUnicodeIdentifierPart(char)
	 * @see #isIdentifierIgnorable(char)
	 */
	public static boolean isJavaLetterOrDigit(char ch) {
		return isJavaIdentifierPart(ch);
	}

	/**
	 * Determines if a character can start a Java identifier. This is the
	 * combination of isLetter, any character where getType returns
	 * LETTER_NUMBER, currency symbols (like '$'), and connecting punctuation
	 * (like '_').
	 * <br>
	 * Java identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]
	 *
	 * @param ch character to test
	 * @return true if ch can start a Java identifier, else false
	 * @see #isJavaIdentifierPart(char)
	 * @see #isLetter(char)
	 * @see #isUnicodeIdentifierStart(char)
	 * @since 1.1
	 */
	public static boolean isJavaIdentifierStart(char ch) {
		return (
			(1 << getType(ch))
				& ((1 << UPPERCASE_LETTER)
					| (1 << LOWERCASE_LETTER)
					| (1 << TITLECASE_LETTER)
					| (1 << MODIFIER_LETTER)
					| (1 << OTHER_LETTER)
					| (1 << LETTER_NUMBER)
					| (1 << CURRENCY_SYMBOL)
					| (1 << CONNECTOR_PUNCTUATION)))
			!= 0;
	}

	/**
	 * Determines if a character can follow the first letter in
	 * a Java identifier.  This is the combination of isJavaLetter (isLetter,
	 * type of LETTER_NUMBER, currency, connecting punctuation) and digit,
	 * numeric letter (like Roman numerals), combining marks, non-spacing marks,
	 * or isIdentifierIgnorable.
	 * <br>
	 * Java identifier extender =
	 *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Sc]|[Pc]|[Mn]|[Mc]|[Nd]|[Cf]
	 *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
	 *
	 * @param ch character to test
	 * @return true if ch can follow the first letter in a Java identifier
	 * @see #isIdentifierIgnorable(char)
	 * @see #isJavaIdentifierStart(char)
	 * @see #isLetterOrDigit(char)
	 * @see #isUnicodeIdentifierPart(char)
	 * @since 1.1
	 */
	public static boolean isJavaIdentifierPart(char ch) {
		int category = getType(ch);
		return (
			(1 << category)
				& ((1 << UPPERCASE_LETTER)
					| (1 << LOWERCASE_LETTER)
					| (1 << TITLECASE_LETTER)
					| (1 << MODIFIER_LETTER)
					| (1 << OTHER_LETTER)
					| (1 << NON_SPACING_MARK)
					| (1 << COMBINING_SPACING_MARK)
					| (1 << DECIMAL_DIGIT_NUMBER)
					| (1 << LETTER_NUMBER)
					| (1 << CURRENCY_SYMBOL)
					| (1 << CONNECTOR_PUNCTUATION)
					| (1 << FORMAT)))
			!= 0
			|| (category == CONTROL && isIdentifierIgnorable(ch));
	}

	/**
	 * Determines if a character can start a Unicode identifier.  Only
	 * letters can start a Unicode identifier, but this includes characters
	 * in LETTER_NUMBER.
	 * <br>
	 * Unicode identifier start = [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]
	 *
	 * @param ch character to test
	 * @return true if ch can start a Unicode identifier, else false
	 * @see #isJavaIdentifierStart(char)
	 * @see #isLetter(char)
	 * @see #isUnicodeIdentifierPart(char)
	 * @since 1.1
	 */
	public static boolean isUnicodeIdentifierStart(char ch) {
		return (
			(1 << getType(ch))
				& ((1 << UPPERCASE_LETTER)
					| (1 << LOWERCASE_LETTER)
					| (1 << TITLECASE_LETTER)
					| (1 << MODIFIER_LETTER)
					| (1 << OTHER_LETTER)
					| (1 << LETTER_NUMBER)))
			!= 0;
	}

	/**
	 * Determines if a character can follow the first letter in
	 * a Unicode identifier. This includes letters, connecting punctuation,
	 * digits, numeric letters, combining marks, non-spacing marks, and
	 * isIdentifierIgnorable.
	 * <br>
	 * Unicode identifier extender =
	 *   [Lu]|[Ll]|[Lt]|[Lm]|[Lo]|[Nl]|[Mn]|[Mc]|[Nd]|[Pc]|[Cf]|
	 *   |U+0000-U+0008|U+000E-U+001B|U+007F-U+009F
	 *
	 * @param ch character to test
	 * @return true if ch can follow the first letter in a Unicode identifier
	 * @see #isIdentifierIgnorable(char)
	 * @see #isJavaIdentifierPart(char)
	 * @see #isLetterOrDigit(char)
	 * @see #isUnicodeIdentifierStart(char)
	 * @since 1.1
	 */
	public static boolean isUnicodeIdentifierPart(char ch) {
		int category = getType(ch);
		return (
			(1 << category)
				& ((1 << UPPERCASE_LETTER)
					| (1 << LOWERCASE_LETTER)
					| (1 << TITLECASE_LETTER)
					| (1 << MODIFIER_LETTER)
					| (1 << OTHER_LETTER)
					| (1 << NON_SPACING_MARK)
					| (1 << COMBINING_SPACING_MARK)
					| (1 << DECIMAL_DIGIT_NUMBER)
					| (1 << LETTER_NUMBER)
					| (1 << CONNECTOR_PUNCTUATION)
					| (1 << FORMAT)))
			!= 0
			|| (category == CONTROL && isIdentifierIgnorable(ch));
	}

	/**
	 * Determines if a character is ignorable in a Unicode identifier. This
	 * includes the non-whitespace ISO control characters (<code>'\u0000'</code>
	 * through <code>'\u0008'</code>, <code>'\u000E'</code> through
	 * <code>'\u001B'</code>, and <code>'\u007F'</code> through
	 * <code>'\u009F'</code>), and FORMAT characters.
	 * <br>
	 * Unicode identifier ignorable = [Cf]|U+0000-U+0008|U+000E-U+001B
	 *    |U+007F-U+009F
	 *
	 * @param ch character to test
	 * @return true if ch is ignorable in a Unicode or Java identifier
	 * @see #isJavaIdentifierPart(char)
	 * @see #isUnicodeIdentifierPart(char)
	 * @since 1.1
	 */
	public static boolean isIdentifierIgnorable(char ch) {
		return (
			ch <= '\u009F'
				&& (ch < '\t'
					|| ch >= '\u007F'
					|| (ch <= '\u001B' && ch >= '\u000E')))
			|| getType(ch) == FORMAT;
	}

	/**
	 * Converts a Unicode character into its lowercase equivalent mapping.
	 * If a mapping does not exist, then the character passed is returned.
	 * Note that isLowerCase(toLowerCase(ch)) does not always return true.
	 *
	 * @param ch character to convert to lowercase
	 * @return lowercase mapping of ch, or ch if lowercase mapping does
	 *         not exist
	 * @see #isLowerCase(char)
	 * @see #isUpperCase(char)
	 * @see #toTitleCase(char)
	 * @see #toUpperCase(char)
	 */
	public static char toLowerCase(char ch) {
		// Signedness doesn't matter, as result is cast back to char.
		return (char) (ch + lower[readChar(ch) >> 7]);
	}

	/**
	 * Converts a Unicode character into its uppercase equivalent mapping.
	 * If a mapping does not exist, then the character passed is returned.
	 * Note that isUpperCase(toUpperCase(ch)) does not always return true.
	 *
	 * @param ch character to convert to uppercase
	 * @return uppercase mapping of ch, or ch if uppercase mapping does
	 *         not exist
	 * @see #isLowerCase(char)
	 * @see #isUpperCase(char)
	 * @see #toLowerCase(char)
	 * @see #toTitleCase(char)
	 */
	public static char toUpperCase(char ch) {
		// Signedness doesn't matter, as result is cast back to char.
		return (char) (ch + upper[readChar(ch) >> 7]);
	}

	/**
	 * Converts a Unicode character into its titlecase equivalent mapping.
	 * If a mapping does not exist, then the character passed is returned.
	 * Note that isTitleCase(toTitleCase(ch)) does not always return true.
	 *
	 * @param ch character to convert to titlecase
	 * @return titlecase mapping of ch, or ch if titlecase mapping does
	 *         not exist
	 * @see #isTitleCase(char)
	 * @see #toLowerCase(char)
	 * @see #toUpperCase(char)
	 */
	public static char toTitleCase(char ch) {
		// As title is short, it doesn't hurt to exhaustively iterate over it.
		for (int i = title.length - 2; i >= 0; i -= 2)
			if (title[i] == ch)
				return title[i + 1];
		return toUpperCase(ch);
	}

	/**
	 * Converts a character into a digit of the specified radix. If the radix
	 * exceeds MIN_RADIX or MAX_RADIX, or if the result of getNumericValue(ch)
	 * exceeds the radix, or if ch is not a decimal digit or in the case
	 * insensitive set of 'a'-'z', the result is -1.
	 * <br>
	 * character argument boundary = [Nd]|U+0041-U+005A|U+0061-U+007A
	 *    |U+FF21-U+FF3A|U+FF41-U+FF5A
	 *
	 * @param ch character to convert into a digit
	 * @param radix radix in which ch is a digit
	 * @return digit which ch represents in radix, or -1 not a valid digit
	 * @see #MIN_RADIX
	 * @see #MAX_RADIX
	 * @see #forDigit(int, int)
	 * @see #isDigit(char)
	 * @see #getNumericValue(char)
	 */
	public static int digit(char ch, int radix) {
		if (radix < MIN_RADIX || radix > MAX_RADIX)
			return -1;
		char attr = readChar(ch);
		if (((1 << (attr & TYPE_MASK))
			& ((1 << UPPERCASE_LETTER)
				| (1 << LOWERCASE_LETTER)
				| (1 << DECIMAL_DIGIT_NUMBER)))
			!= 0) {
			// Signedness doesn't matter; 0xffff vs. -1 are both rejected.
			int digit = numValue[attr >> 7];
			return (digit >= 0 && digit < radix) ? digit : -1;
		}
		return -1;
	}

	/**
	 * Returns the Unicode numeric value property of a character. For example,
	 * <code>'\\u216C'</code> (the Roman numeral fifty) returns 50.
	 *
	 * <p>This method also returns values for the letters A through Z, (not
	 * specified by Unicode), in these ranges: <code>'\u0041'</code>
	 * through <code>'\u005A'</code> (uppercase); <code>'\u0061'</code>
	 * through <code>'\u007A'</code> (lowercase); and <code>'\uFF21'</code>
	 * through <code>'\uFF3A'</code>, <code>'\uFF41'</code> through
	 * <code>'\uFF5A'</code> (full width variants).
	 *
	 * <p>If the character lacks a numeric value property, -1 is returned.
	 * If the character has a numeric value property which is not representable
	 * as a nonnegative integer, such as a fraction, -2 is returned.
	 *
	 * character argument boundary = [Nd]|[Nl]|[No]|U+0041-U+005A|U+0061-U+007A
	 *    |U+FF21-U+FF3A|U+FF41-U+FF5A
	 *
	 * @param ch character from which the numeric value property will
	 *        be retrieved
	 * @return the numeric value property of ch, or -1 if it does not exist, or
	 *         -2 if it is not representable as a nonnegative integer
	 * @see #forDigit(int, int)
	 * @see #digit(char, int)
	 * @see #isDigit(char)
	 * @since 1.1
	 */
	public static int getNumericValue(char ch) {
		// Treat numValue as signed.
		return (short) numValue[readChar(ch) >> 7];
	}

	/**
	 * Determines if a character is a ISO-LATIN-1 space. This is only the five
	 * characters <code>'\t'</code>, <code>'\n'</code>, <code>'\f'</code>,
	 * <code>'\r'</code>, and <code>' '</code>.
	 * <br>
	 * Java space = U+0020|U+0009|U+000A|U+000C|U+000D
	 *
	 * @param ch character to test
	 * @return true if ch is a space, else false
	 * @deprecated Replaced by {@link #isWhitespace(char)}
	 * @see #isSpaceChar(char)
	 * @see #isWhitespace(char)
	 */
	public static boolean isSpace(char ch) {
		// Performing the subtraction up front alleviates need to compare longs.
		return ch-- <= ' '
			&& ((1 << ch)
				& ((1 << (' ' - 1))
					| (1 << ('\t' - 1))
					| (1 << ('\n' - 1))
					| (1 << ('\r' - 1))
					| (1 << ('\f' - 1))))
				!= 0;
	}

	/**
	 * Determines if a character is a Unicode space character. This includes
	 * SPACE_SEPARATOR, LINE_SEPARATOR, and PARAGRAPH_SEPARATOR.
	 * <br>
	 * Unicode space = [Zs]|[Zp]|[Zl]
	 *
	 * @param ch character to test
	 * @return true if ch is a Unicode space, else false
	 * @see #isWhitespace(char)
	 * @since 1.1
	 */
	public static boolean isSpaceChar(char ch) {
		return (
			(1 << getType(ch))
				& ((1 << SPACE_SEPARATOR)
					| (1 << LINE_SEPARATOR)
					| (1 << PARAGRAPH_SEPARATOR)))
			!= 0;
	}

	/**
	 * Determines if a character is Java whitespace. This includes Unicode
	 * space characters (SPACE_SEPARATOR, LINE_SEPARATOR, and
	 * PARAGRAPH_SEPARATOR) except the non-breaking spaces
	 * (<code>'\u00A0'</code>, <code>'\u2007'</code>, and <code>'\u202F'</code>);
	 * and these characters: <code>'\u0009'</code>, <code>'\u000A'</code>,
	 * <code>'\u000B'</code>, <code>'\u000C'</code>, <code>'\u000D'</code>,
	 * <code>'\u001C'</code>, <code>'\u001D'</code>, <code>'\u001E'</code>,
	 * and <code>'\u001F'</code>.
	 * <br>
	 * Java whitespace = ([Zs] not Nb)|[Zl]|[Zp]|U+0009-U+000D|U+001C-U+001F
	 *
	 * @param ch character to test
	 * @return true if ch is Java whitespace, else false
	 * @see #isSpaceChar(char)
	 * @since 1.1
	 */
	public static boolean isWhitespace(char ch) {
		int attr = readChar(ch);
		return (
			(((1 << (attr & TYPE_MASK))
				& ((1 << SPACE_SEPARATOR)
					| (1 << LINE_SEPARATOR)
					| (1 << PARAGRAPH_SEPARATOR)))
				!= 0)
				&& (attr & NO_BREAK_MASK) == 0)
			|| (ch <= '\u001F'
				&& ((1 << ch)
					& ((1 << '\t')
						| (1 << '\n')
						| (1 << '\u000B')
						| (1 << '\u000C')
						| (1 << '\r')
						| (1 << '\u001C')
						| (1 << '\u001D')
						| (1 << '\u001E')
						| (1 << '\u001F')))
					!= 0);
	}

	/**
	 * Determines if a character has the ISO Control property.
	 * <br>
	 * ISO Control = [Cc]
	 *
	 * @param ch character to test
	 * @return true if ch is an ISO Control character, else false
	 * @see #isSpaceChar(char)
	 * @see #isWhitespace(char)
	 * @since 1.1
	 */
	public static boolean isISOControl(char ch) {
		return getType(ch) == CONTROL;
	}

	/**
	 * Returns the Unicode general category property of a character.
	 *
	 * @param ch character from which the general category property will
	 *        be retrieved
	 * @return the character category property of ch as an integer
	 * @see #UNASSIGNED
	 * @see #UPPERCASE_LETTER
	 * @see #LOWERCASE_LETTER
	 * @see #TITLECASE_LETTER
	 * @see #MODIFIER_LETTER
	 * @see #OTHER_LETTER
	 * @see #NON_SPACING_MARK
	 * @see #ENCLOSING_MARK
	 * @see #COMBINING_SPACING_MARK
	 * @see #DECIMAL_DIGIT_NUMBER
	 * @see #LETTER_NUMBER
	 * @see #OTHER_NUMBER
	 * @see #SPACE_SEPARATOR
	 * @see #LINE_SEPARATOR
	 * @see #PARAGRAPH_SEPARATOR
	 * @see #CONTROL
	 * @see #FORMAT
	 * @see #PRIVATE_USE
	 * @see #SURROGATE
	 * @see #DASH_PUNCTUATION
	 * @see #START_PUNCTUATION
	 * @see #END_PUNCTUATION
	 * @see #CONNECTOR_PUNCTUATION
	 * @see #OTHER_PUNCTUATION
	 * @see #MATH_SYMBOL
	 * @see #CURRENCY_SYMBOL
	 * @see #MODIFIER_SYMBOL
	 * @see #INITIAL_QUOTE_PUNCTUATION
	 * @see #FINAL_QUOTE_PUNCTUATION
	 * @since 1.1
	 */
	public static int getType(char ch) {
		return readChar(ch) & TYPE_MASK;
	}

	/**
	 * Converts a digit into a character which represents that digit
	 * in a specified radix. If the radix exceeds MIN_RADIX or MAX_RADIX,
	 * or the digit exceeds the radix, then the null character <code>'\0'</code>
	 * is returned.  Otherwise the return value is in '0'-'9' and 'a'-'z'.
	 * <br>
	 * return value boundary = U+0030-U+0039|U+0061-U+007A
	 *
	 * @param digit digit to be converted into a character
	 * @param radix radix of digit
	 * @return character representing digit in radix, or '\0'
	 * @see #MIN_RADIX
	 * @see #MAX_RADIX
	 * @see #digit(char, int)
	 */
	public static char forDigit(int digit, int radix) {
		if (radix < MIN_RADIX
			|| radix > MAX_RADIX
			|| digit < 0
			|| digit >= radix)
			return '\0';
		return Number.digits[digit];
	}

	/**
	 * Returns the Unicode directionality property of the character. This
	 * is used in the visual ordering of text.
	 *
	 * @param ch the character to look up
	 * @return the directionality constant, or DIRECTIONALITY_UNDEFINED
	 * @see #DIRECTIONALITY_UNDEFINED
	 * @see #DIRECTIONALITY_LEFT_TO_RIGHT
	 * @see #DIRECTIONALITY_RIGHT_TO_LEFT
	 * @see #DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
	 * @see #DIRECTIONALITY_EUROPEAN_NUMBER
	 * @see #DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
	 * @see #DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
	 * @see #DIRECTIONALITY_ARABIC_NUMBER
	 * @see #DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
	 * @see #DIRECTIONALITY_NONSPACING_MARK
	 * @see #DIRECTIONALITY_BOUNDARY_NEUTRAL
	 * @see #DIRECTIONALITY_PARAGRAPH_SEPARATOR
	 * @see #DIRECTIONALITY_SEGMENT_SEPARATOR
	 * @see #DIRECTIONALITY_WHITESPACE
	 * @see #DIRECTIONALITY_OTHER_NEUTRALS
	 * @see #DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
	 * @see #DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
	 * @see #DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
	 * @see #DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
	 * @see #DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
	 * @since 1.4
	 */
	public static byte getDirectionality(char ch) {
		// The result will correctly be signed.
		return (byte) (direction[readChar(ch) >> 7] >> 2);
	}

	/**
	 * Determines whether the character is mirrored according to Unicode. For
	 * example, <code>\u0028</code> (LEFT PARENTHESIS) appears as '(' in
	 * left-to-right text, but ')' in right-to-left text.
	 *
	 * @param ch the character to look up
	 * @return true if the character is mirrored
	 * @since 1.4
	 */
	public static boolean isMirrored(char ch) {
		return (readChar(ch) & MIRROR_MASK) != 0;
	}

	/**
	 * Compares another Character to this Character, numerically.
	 *
	 * @param anotherCharacter Character to compare with this Character
	 * @return a negative integer if this Character is less than
	 *         anotherCharacter, zero if this Character is equal, and
	 *         a positive integer if this Character is greater
	 * @throws NullPointerException if anotherCharacter is null
	 * @since 1.2
	 */
	public int compareTo(Character anotherCharacter) {
		return value - anotherCharacter.value;
	}

	/**
	 * Compares an object to this Character.  Assuming the object is a
	 * Character object, this method performs the same comparison as
	 * compareTo(Character).
	 *
	 * @param o object to compare
	 * @return the comparison value
	 * @throws ClassCastException if o is not a Character object
	 * @throws NullPointerException if o is null
	 * @see #compareTo(Character)
	 * @since 1.2
	 */
	public int compareTo(Object o) {
		return compareTo((Character) o);
	}
} // class Character

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?