📄 wordbreakdata.java
字号:
// 1 (byte)(SI+6), (byte)(SI+2), (byte)(SI+3), (byte)(SI+4), (byte)(SI+5), (byte)(SI+6), (byte)(SI+7), (byte)(SI+7), (byte)(SI+5), (byte)(SI+8), (byte)(SI+8), (byte)(SI+9), (byte)(SI+10), (byte)(SI+12), (byte)(SI+11), (byte)(SI+8), (byte)(1), STOP, // 2 STOP, (byte)(SI+2), (byte)(SI+3), (byte)(4), (byte)(4), STOP, STOP, STOP, (byte)(4), STOP, STOP, STOP, STOP, STOP, STOP, STOP, (byte)(2), STOP, // 3 STOP, (byte)(SI+2), (byte)(SI+3), STOP, (byte)(7), SI_STOP, STOP, (byte)(7), (byte)(SI+7), STOP, STOP, STOP, STOP, STOP, STOP, STOP, (byte)(3), STOP, // 4 STOP, (byte)(SI+2), STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, (byte)(4), STOP, // 5 STOP, (byte)(SI+2), (byte)(SI+3), STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, (byte)(5), STOP, // 6 STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, (byte)(6), STOP, // 7 STOP, STOP, (byte)(SI+3), STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, (byte)(7), STOP, // 8 STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, (byte)(SI+8), (byte)(SI+8), STOP, STOP, STOP, STOP, (byte)(SI+8), (byte)(8), STOP, // 9 STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, (byte)(SI+9), STOP, STOP, (byte)(9), STOP, (byte)(9), STOP, // 10 STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, (byte)(SI+10),STOP, (byte)(10), STOP, (byte)(10), STOP, // 11 STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, (byte)(SI+9), (byte)(SI+10), STOP, (byte)(SI+11), STOP, (byte)(11), STOP, // 12 STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, STOP, (byte)(SI+12), STOP, STOP, (byte)(12), STOP }; private static final WordBreakTable kWordBackward = new WordBreakTable(COL_COUNT, kWordBackwardData); private static final int kRawMapping[] = { BREAK, // UNASSIGNED = 0, letter, // UPPERCASE_LETTER = 1, letter, // LOWERCASE_LETTER = 2, letter, // TITLECASE_LETTER = 3, letter, // MODIFIER_LETTER = 4, letter, // OTHER_LETTER = 5, nsm, // NON_SPACING_MARK = 6, nsm, // ENCLOSING_MARK = 7, BREAK, // COMBINING_SPACING_MARK = 8, number, // DECIMAL_DIGIT_NUMBER = 9, letter, // LETTER_NUMBER = 10, number, // OTHER_NUMBER = 11, blank, // SPACE_SEPARATOR = 12, BREAK, // LINE_SEPARATOR = 13, BREAK, // PARAGRAPH_SEPARATOR = 14, BREAK, // CONTROL = 15, BREAK, // FORMAT = 16 BREAK, // ???? = 17, BREAK, // PRIVATE_USE = 18, BREAK, // SURROGATE = 19, midLetter, // DASH_PUNCTUATION = 20, BREAK, // START_PUNCTUATION = 21, BREAK, // END_PUNCTUATION = 22, BREAK, // CONNECTOR_PUNCTUATION = 23, BREAK, // OTHER_PUNCTUATION = 24, BREAK, // MATH_SYMBOL = 25, preNum, // CURRENCY_SYMBOL = 26, BREAK, // MODIFIER_SYMBOL = 27, BREAK, // OTHER_SYMBOL = 28, BREAK, // INITIAL_QUOTE_PUNCTUATION = 29, BREAK, // FINAL_QUOTE_PUNCTUATION = 30, }; private static final SpecialMapping kExceptionChar[] = { //note: the ranges in this table must be sorted in ascending order //as required by the UnicodeClassMapping class. new SpecialMapping(ASCII_HORIZONTAL_TABULATION, blank), new SpecialMapping(ASCII_LINEFEED, lf), new SpecialMapping(ASCII_FORM_FEED, lf), new SpecialMapping(ASCII_CARRIAGE_RETURN, cr), new SpecialMapping(ASCII_QUOTATION_MARK, midLetNum), new SpecialMapping(ASCII_NUMBER_SIGN, preNum), new SpecialMapping(ASCII_PERCENT, postNum), new SpecialMapping(ASCII_AMPERSAND, postNum), new SpecialMapping(ASCII_APOSTROPHE, midLetNum), new SpecialMapping(ASCII_COMMA, midNum), new SpecialMapping(ASCII_FULL_STOP, preMidNum), new SpecialMapping(ASCII_CENT_SIGN, postNum), new SpecialMapping(LATIN1_SOFTHYPHEN, midLetter), new SpecialMapping(ARABIC_PERCENT_SIGN, postNum), new SpecialMapping(ARABIC_DECIMAL_SEPARATOR, midNum), new SpecialMapping(PUNCTUATION_HYPHENATION_POINT, midLetter), new SpecialMapping(PUNCTUATION_LINE_SEPARATOR, PUNCTUATION_PARAGRAPH_SEPARATOR, lf), new SpecialMapping(PER_MILLE_SIGN, postNum), new SpecialMapping(PER_TEN_THOUSAND_SIGN, postNum), new SpecialMapping(IDEOGRAPHIC_ITERATION_MARK, kanji), new SpecialMapping(HIRAGANA_LETTER_SMALL_A, HIRAGANA_LETTER_VU, hira), new SpecialMapping(COMBINING_KATAKANA_HIRAGANA_VOICED_SOUND_MARK, HIRAGANA_SEMIVOICED_SOUND_MARK, diacrit), new SpecialMapping(HIRAGANA_ITERATION_MARK, HIRAGANA_VOICED_ITERATION_MARK, hira), new SpecialMapping(KATAKANA_LETTER_SMALL_A, KATAKANA_LETTER_SMALL_KE, kata), new SpecialMapping(KATAKANA_HIRAGANA_PROLONGED_SOUND_MARK, diacrit), new SpecialMapping(KATAKANA_ITERATION_MARK, KATAKANA_VOICED_ITERATION_MARK, kata), new SpecialMapping(UNICODE_LOW_BOUND_HAN, UNICODE_HIGH_BOUND_HAN, kanji), new SpecialMapping(HANGUL_SYL_LOW, HANGUL_SYL_HIGH, letter), new SpecialMapping(CJK_COMPATIBILITY_F900, CJK_COMPATIBILITY_FA2D, kanji), new SpecialMapping(END_OF_STRING, EOS) }; private static final boolean WordExceptionFlags[] = { false, // kNonCharacter = 0, false, // kUppercaseLetter = 1, false, // kLowercaseLetter = 2, false, // kTitlecaseLetter = 3, true, // kModifierLetter = 4, true, // kOtherLetter = 5, true, // kNonSpacingMark = 6, false, // kEnclosingMark = 7, false, // kCombiningSpacingMark = 8, false, // kDecimalNumber = 9, false, // kLetterNumber = 10, false, // kOtherNumber = 11, false, // kSpaceSeparator = 12, true, // kLineSeparator = 13, true, // kParagraphSeparator = 14, true, // kControlCharacter = 15, false, // kFormatCharacter = 16, false, // UNDEFINED = 17, false, // kPrivateUseCharacter = 18, false, // kSurrogate = 19, true, // kDashPunctuation = 20, false, // kOpenPunctuation = 21, false, // kClosePunctuation = 22, false, // kConnectorPunctuation = 23, true, // kOtherPunctuation = 24, false, // kMathSymbol = 25, true, // kCurrencySymbol = 26, false, // kModifierSymbol = 27, false, // kOtherSymbol = 28, false, // kInitialQuotePunctuation = 29, false, // kFinalQuotePunctuation = 30, }; private static final int kWordAsciiValues[] = { // null soh stx etx eot enq ask bell BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, // bs ht lf vt ff cr so si BREAK, blank, lf, BREAK, lf, cr, BREAK, BREAK, // dle dc1 dc2 dc3 dc4 nak syn etb BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, // can em sub esc fs gs rs us BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, // sp ! " # $ % & ' blank, BREAK, midLetNum, preNum, preNum, postNum, postNum, midLetNum, // ( ) * + , - . / BREAK, BREAK, BREAK, BREAK, midNum, midLetter, preMidNum, BREAK, // 0 1 2 3 4 5 6 7 number, number, number, number, number, number, number, number, // 8 9 : ; < = > ? number, number, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, // @ A B C D E F G BREAK, letter, letter, letter, letter, letter, letter, letter, // H I J K L M N O letter, letter, letter, letter, letter, letter, letter, letter, // P Q R S T U V W letter, letter, letter, letter, letter, letter, letter, letter, // X Y Z [ \ ] ^ _ letter, letter, letter, BREAK, BREAK, BREAK, BREAK, BREAK, // ` a b c d e f g BREAK, letter, letter, letter, letter, letter, letter, letter, // h i j k l m n o letter, letter, letter, letter, letter, letter, letter, letter, // p q r s t u v w letter, letter, letter, letter, letter, letter, letter, letter, // x y z { | } ~ del letter, letter, letter, BREAK, BREAK, BREAK, BREAK, BREAK, // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, // ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, BREAK, // nbsp inv-! cents pounds currency yen broken-bar section blank, BREAK, postNum, preNum, preNum, preNum, BREAK, BREAK, // umlaut copyright super-a gui-left not soft-hyph registered macron BREAK, BREAK, letter, BREAK, BREAK, midLetter, BREAK, BREAK, // degree +/- super-2 super-3 acute micro paragraph bullet BREAK, BREAK, number, number, BREAK, letter, BREAK, BREAK, // cedilla super-1 super-o gui-right 1/4 1/2 3/4 inv-? BREAK, letter, BREAK, BREAK, number, number, number, BREAK, // A-grave A-acute A-hat A-tilde A-umlaut A-ring AE C-cedilla letter, letter, letter, letter, letter, letter, letter, letter, // E-grave E-acute E-hat E-umlaut I-grave I-acute I-hat I-umlaut letter, letter, letter, letter, letter, letter, letter, letter, // Edh N-tilde O-grave O-acute O-hat O-tilde O-umlaut times letter, letter, letter, letter, letter, letter, letter, BREAK, // O-slash U-grave U-acute U-hat U-umlaut Y-acute Thorn ess-zed letter, letter, letter, letter, letter, letter, letter, letter, // a-grave a-acute a-hat a-tilde a-umlaut a-ring ae c-cedilla letter, letter, letter, letter, letter, letter, letter, letter, // e-grave e-acute e-hat e-umlaut i-grave i-acute i-hat i-umlaut letter, letter, letter, letter, letter, letter, letter, letter, // edh n-tilde o-grave o-acute o-hat o-tilde o-umlaut over letter, letter, letter, letter, letter, letter, letter, BREAK, // o-slash u-grave u-acute u-hat u-umlaut y-acute thorn y-umlaut letter, letter, letter, letter, letter, letter, letter, letter }; private static final UnicodeClassMapping kWordMap = new UnicodeClassMapping(kRawMapping, kExceptionChar, WordExceptionFlags, kWordAsciiValues);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -