sentencebreakdata.java
来自「《移动Agent技术》一书的所有章节源代码。」· Java 代码 · 共 330 行 · 第 1/2 页
JAVA
330 行
(byte)(SI+2), (byte)(SI+2), (byte)(SI+2), (byte)(SI+2),
(byte)(SI+2), (byte)(SI+2), (byte)(SI+3), STOP,
(byte)(SI+2), (byte)(SI+3), (byte)(SI+2), (byte)(SI+2),
(byte)(SI+2), (byte)(SI+1), STOP,
// 2
(byte)(SI+2), (byte)(SI+2), (byte)(SI+2), (byte)(SI+2),
(byte)(SI+2), (byte)(SI+2), (byte)(SI+3), STOP,
(byte)(SI+2), (byte)(SI+3), (byte)(SI+2), (byte)(SI+2),
STOP, (byte)(SI+2), STOP,
// 3
(byte)(SI+2), (byte)(SI+4), (byte)(SI+2), (byte)(SI+2),
(byte)(SI+2), (byte)(SI+2), (byte)(SI+3), STOP,
(byte)(SI+3), (byte)(SI+2), (byte)(SI+2), (byte)(SI+2),
STOP, (byte)(SI+3), STOP,
// 4
(byte)(SI+2), (byte)(SI+4), SI_STOP, SI_STOP,
(byte)(SI+2), (byte)(SI+2), (byte)(SI+3), STOP,
(byte)(SI+2), (byte)(SI+3), (byte)(SI+2), (byte)(SI+2),
STOP, (byte)(SI+4), STOP
};
private static final WordBreakTable kSentenceBackward
= new WordBreakTable(COL_COUNT, kSentenceBackwardData);
private static final int kRawMapping[] =
{
other, // UNASSIGNED = 0,
upperCase, // UPPERCASE_LETTER = 1,
lowerCase, // LOWERCASE_LETTER = 2,
other, // TITLECASE_LETTER = 3,
other, // MODIFIER_LETTER = 4,
other, // OTHER_LETTER = 5,
nsm, // NON_SPACING_MARK = 6,
nsm, // ENCLOSING_MARK = 7,
other, // COMBINING_SPACING_MARK = 8,
number, // DECIMAL_DIGIT_NUMBER = 9,
number, // LETTER_NUMBER = 10,
number, // OTHER_NUMBER = 11,
space, // SPACE_SEPARATOR = 12,
space, // LINE_SEPARATOR = 13,
space, // PARAGRAPH_SEPARATOR = 14, ???????
other, // CONTROL = 15,
other, // PRIVATE_USE = 16,
other, // FORMAT = 17,
other, // ???? = 18,
other, // SURROGATE = 19,
other, // DASH_PUNCTUATION = 20,
openBracket, // START_PUNCTUATION = 21,
closeBracket, // END_PUNCTUATION = 22,
other, // CONNECTOR_PUNCTUATION = 23,
other, // OTHER_PUNCTUATION = 24,
other, // MATH_SYMBOL = 25,
other, // CURRENCY_SYMBOL = 26,
other, // MODIFIER_SYMBOL = 27,
other, // OTHER_SYMBOL = 28;
};
private static SpecialMapping kExceptionChar[] =
{
//note: the ranges in this table must be sorted in ascending order
//as required by the UnicodeClassMapping class.
new SpecialMapping(ASCII_HORIZONTAL_TABULATION, space),
new SpecialMapping(ASCII_LINEFEED, space),
new SpecialMapping(ASCII_FORM_FEED, terminator),
new SpecialMapping(ASCII_CARRIAGE_RETURN, space),
new SpecialMapping(ASCII_EXCLAMATION_MARK, terminator),
new SpecialMapping(ASCII_QUOTATION_MARK, quote),
new SpecialMapping(ASCII_APOSTROPHE, quote),
new SpecialMapping(ASCII_FULL_STOP, ambiguosTerm),
new SpecialMapping(ASCII_QUESTION_MARK, terminator),
new SpecialMapping(ASCII_NONBREAKING_SPACE, other),
new SpecialMapping(PUNCTUATION_LINE_SEPARATOR, space),
new SpecialMapping(PUNCTUATION_PARAGRAPH_SEPARATOR, paragraphBreak),
new SpecialMapping(PUNCTUATION_IDEOGRAPHIC_FULL_STOP, terminator),
new SpecialMapping(HIRAGANA_LETTER_SMALL_A, HIRAGANA_LETTER_VU, cjk),
new SpecialMapping(COMBINING_KATAKANA_HIRAGANA_VOICED_SOUND_MARK,
HIRAGANA_SEMIVOICED_SOUND_MARK, cjk), // cjk
new SpecialMapping(KATAKANA_LETTER_SMALL_A, KATAKANA_LETTER_SMALL_KE,
cjk), // cjk
new SpecialMapping(UNICODE_LOW_BOUND_HAN, UNICODE_HIGH_BOUND_HAN, cjk),
new SpecialMapping(CJK_COMPATIBILITY_F900, CJK_COMPATIBILITY_FA2D,cjk),
new SpecialMapping(UNICODE_ZERO_WIDTH_NON_BREAKING_SPACE, other),
new SpecialMapping(END_OF_STRING, EOS)
};
private static final boolean SentenceExceptionFlags[] = {
false, // kNonCharacter = 0,
false, // kUppercaseLetter = 1,
false, // kLowercaseLetter = 2,
false, // kTitlecaseLetter = 3,
false, // kModifierLetter = 4,
true, // kOtherLetter = 5,
true, // kNonSpacingMark = 6,
false, // kEnclosingMark = 7,
false, // kCombiningSpacingMark = 8,
false, // kDecimalNumber = 9,
false, // kLetterNumber = 10,
false, // kOtherNumber = 11,
true, // kSpaceSeparator = 12,
true, // kLineSeparator = 13,
true, // kParagraphSeparator = 14,
true, // kControlCharacter = 15,
true, // kFormatCharacter = 16,
false, // UNDEFINED = 17,
false, // kPrivateUseCharacter = 18,
false, // kSurrogate = 19,
false, // kDashPunctuation = 20,
false, // kOpenPunctuation = 21,
false, // kClosePunctuation = 22,
false, // kConnectorPunctuation = 23,
true, // kOtherPunctuation = 24,
false, // kMathSymbol = 25,
false, // kCurrencySymbol = 26,
false, // kModifierSymbol = 27,
false // kOtherSymbol = 28
};
private static final int kSentenceAsciiValues[] = {
// null soh stx etx eot enq ask bell
other, other, other, other, other, other, other, other,
// bs ht lf vt ff cr so si
other, space, space, other, terminator, space, other, other,
// dle dc1 dc2 dc3 dc4 nak syn etb
other, other, other, other, other, other, other, other,
// can em sub esc fs gs rs us
other, other, other, other, other, other, other, other,
// sp ! " # $ % & '
space, terminator, quote, other, other, other, other, quote,
// ( ) * + , - . /
openBracket, closeBracket, other, other, other, other, ambiguosTerm, other,
// 0 1 2 3 4 5 6 7
number, number, number, number, number, number, number, number,
// 8 9 : ; < = > ?
number, number, other, other, other, other, other, terminator,
// @ A B C D E F G
other, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase,
// H I J K L M N O
upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase,
// P Q R S T U V W
upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase,
// X Y Z [ \ ] ^ _
upperCase, upperCase, upperCase, openBracket, other, closeBracket, other, other,
// ` a b c d e f g
other, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase,
// h i j k l m n o
lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase,
// p q r s t u v w
lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase,
// x y z { | } ~ del
lowerCase, lowerCase, lowerCase, openBracket, other, closeBracket, other, other,
// ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
other, other, other, other, other, other, other, other,
// ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
other, other, other, other, other, other, other, other,
// ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
other, other, other, other, other, other, other, other,
// ctrl ctrl ctrl ctrl ctrl ctrl ctrl ctrl
other, other, other, other, other, other, other, other,
// nbsp
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?