📄 sentencebreakdata.java

📁 java源代码请看看啊提点宝贵的意见
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
    //    (the PB column)    // 3) If you see a closing punctuation mark or a Kanji character preceded    //    by whitespace, we can turn around and seek forward when we see a    //    sentence terminator.    private static final byte kSentenceBackwardData[] =    {        // other       space          terminator     ambTerm        // open        close          CJK            PB        // lower       upper          digit          quote        // nsm            EOS        // 0        STOP,          STOP,          STOP,          STOP,        STOP,          STOP,          STOP,          STOP,        STOP,          STOP,          STOP,          STOP,        STOP,          STOP,        // 1        (byte)(SI+2),  (byte)(SI+2),  (byte)(SI+2),  (byte)(SI+2),        (byte)(SI+2),  (byte)(SI+2),  (byte)(SI+3),  STOP,        (byte)(SI+2),  (byte)(SI+3),  (byte)(SI+2),  (byte)(SI+2),        (byte)(SI+1),  STOP,        // 2        (byte)(SI+2),  (byte)(SI+2),  (byte)(SI+2),  (byte)(SI+2),        (byte)(SI+2),  (byte)(SI+2),  (byte)(SI+3),  STOP,        (byte)(SI+2),  (byte)(SI+3),  (byte)(SI+2),  (byte)(SI+2),        (byte)(SI+2),  STOP,        // 3        (byte)(SI+2),  (byte)(SI+4),  (byte)(SI+2),  (byte)(SI+2),        (byte)(SI+2),  (byte)(SI+2),  (byte)(SI+3),  STOP,        (byte)(SI+3),  (byte)(SI+2),  (byte)(SI+2),  (byte)(SI+2),        (byte)(SI+3),  STOP,        // 4        (byte)(SI+2),  (byte)(SI+4),  SI_STOP,       SI_STOP,        (byte)(SI+2),  (byte)(SI+2),  (byte)(SI+3),  STOP,        (byte)(SI+2),  (byte)(SI+3),  (byte)(SI+2),  (byte)(SI+2),        (byte)(SI+4),  STOP    };    private static final WordBreakTable kSentenceBackward        = new WordBreakTable(COL_COUNT, kSentenceBackwardData);    private static final int kRawMapping[] =    {        other,        // UNASSIGNED             = 0,        upperCase,    // UPPERCASE_LETTER       = 1,        lowerCase,    // LOWERCASE_LETTER       = 2,        other,        // TITLECASE_LETTER       = 3,        other,        // MODIFIER_LETTER        = 4,        other,        // OTHER_LETTER           = 5,        nsm,          // NON_SPACING_MARK       = 6,        nsm,          // ENCLOSING_MARK         = 7,        other,        // COMBINING_SPACING_MARK = 8,        number,       // DECIMAL_DIGIT_NUMBER   = 9,        number,       // LETTER_NUMBER          = 10,        number,       // OTHER_NUMBER           = 11,        space,        // SPACE_SEPARATOR        = 12,        space,        // LINE_SEPARATOR         = 13,        space,        // PARAGRAPH_SEPARATOR    = 14,            ???????        other,        // CONTROL                = 15,        other,        // PRIVATE_USE            = 16,        other,        // FORMAT                 = 17,        other,        // ????                   = 18,        other,        // SURROGATE              = 19,        other,        // DASH_PUNCTUATION       = 20,        openBracket,  // START_PUNCTUATION      = 21,        closeBracket, // END_PUNCTUATION        = 22,        other,        // CONNECTOR_PUNCTUATION  = 23,        other,        // OTHER_PUNCTUATION      = 24,        other,        // MATH_SYMBOL            = 25,        other,        // CURRENCY_SYMBOL        = 26,        other,        // MODIFIER_SYMBOL        = 27,        other,        // OTHER_SYMBOL           = 28;        openBracket,  // INITIAL_QUOTE_PUNCTUATION = 29,        closeBracket, // FINAL_QUOTE_PUNCTUATION = 30,    };    private static final SpecialMapping kExceptionChar[] =    {        //note: the ranges in this table must be sorted in ascending order        //as required by the UnicodeClassMapping class.        new SpecialMapping(ASCII_HORIZONTAL_TABULATION, space),        new SpecialMapping(ASCII_LINEFEED, space),        new SpecialMapping(ASCII_FORM_FEED, terminator),        new SpecialMapping(ASCII_CARRIAGE_RETURN, space),        new SpecialMapping(ASCII_EXCLAMATION_MARK, terminator),        new SpecialMapping(ASCII_QUOTATION_MARK, quote),        new SpecialMapping(ASCII_APOSTROPHE, quote),        new SpecialMapping(ASCII_FULL_STOP, ambiguosTerm),        new SpecialMapping(ASCII_QUESTION_MARK, terminator),        new SpecialMapping(ASCII_NONBREAKING_SPACE, other),        new SpecialMapping(PUNCTUATION_LINE_SEPARATOR, space),        new SpecialMapping(PUNCTUATION_PARAGRAPH_SEPARATOR, paragraphBreak),        new SpecialMapping(PUNCTUATION_IDEOGRAPHIC_FULL_STOP, terminator),        new SpecialMapping(HIRAGANA_LETTER_SMALL_A, HIRAGANA_LETTER_VU, cjk),        new SpecialMapping(COMBINING_KATAKANA_HIRAGANA_VOICED_SOUND_MARK,                           HIRAGANA_SEMIVOICED_SOUND_MARK, cjk),         // cjk        new SpecialMapping(KATAKANA_LETTER_SMALL_A, KATAKANA_LETTER_SMALL_KE,                           cjk),   // cjk        new SpecialMapping(UNICODE_LOW_BOUND_HAN, UNICODE_HIGH_BOUND_HAN, cjk),        new SpecialMapping(CJK_COMPATIBILITY_F900, CJK_COMPATIBILITY_FA2D,cjk),        new SpecialMapping(UNICODE_ZERO_WIDTH_NON_BREAKING_SPACE, other),        new SpecialMapping(FULLWIDTH_EXCLAMATION_MARK, terminator),        new SpecialMapping(FULLWIDTH_FULL_STOP, terminator),        new SpecialMapping(FULLWIDTH_QUESTION_MARK, terminator),        new SpecialMapping(END_OF_STRING, EOS)    };    private static final boolean SentenceExceptionFlags[] = {        false,            // kNonCharacter         = 0,        false,            // kUppercaseLetter      = 1,        false,            // kLowercaseLetter      = 2,        false,            // kTitlecaseLetter      = 3,        false,            // kModifierLetter       = 4,        true,             // kOtherLetter          = 5,        true,             // kNonSpacingMark       = 6,        false,            // kEnclosingMark        = 7,        false,            // kCombiningSpacingMark = 8,        false,            // kDecimalNumber        = 9,        false,            // kLetterNumber         = 10,        false,            // kOtherNumber          = 11,        true,             // kSpaceSeparator       = 12,        true,             // kLineSeparator        = 13,        true,             // kParagraphSeparator   = 14,        true,             // kControlCharacter     = 15,        true,             // kFormatCharacter      = 16,        false,            // UNDEFINED             = 17,        false,            // kPrivateUseCharacter  = 18,        false,            // kSurrogate            = 19,        false,            // kDashPunctuation      = 20,        false,            // kOpenPunctuation      = 21,        false,            // kClosePunctuation     = 22,        false,            // kConnectorPunctuation = 23,        true,             // kOtherPunctuation     = 24,        false,            // kMathSymbol           = 25,        false,            // kCurrencySymbol       = 26,        false,            // kModifierSymbol       = 27,        false,            // kOtherSymbol          = 28,        false,            // kInitialQuotePunctuation = 29,        false,            // kFinalQuotePunctuation = 30,    };    private static final int kSentenceAsciiValues[] = {        //  null    soh     stx     etx     eot     enq     ask     bell            other,  other,  other,  other,  other,  other,  other,  other,        //  bs      ht      lf     vt     ff          cr     so     si            other,  space,  space, other, terminator, space, other, other,        //  dle     dc1     dc2     dc3     dc4     nak     syn     etb            other,  other,  other,  other,  other,  other,  other,  other,        //  can     em      sub     esc     fs      gs      rs      us            other,  other,  other,  other,  other,  other,  other,  other,        //  sp      !           "      #      $      %      &      '            space,  terminator, quote, other, other, other, other, quote,        //  (            )             *      +      ,      -      .             /            openBracket, closeBracket, other, other, other, other, ambiguosTerm, other,        //  0       1       2       3       4       5       6       7            number, number, number, number, number, number, number, number,        //  8       9       :       ;       <       =       >       ?            number, number, other,  other,  other,  other,  other,  terminator,        //  @       A          B          C          D          E          F          G            other,  upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase,        //  H          I          J          K          L          M          N          O            upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase,        //  P          Q          R          S          T          U          V          W            upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase,        //  X          Y          Z          [            \      ]             ^      _            upperCase, upperCase, upperCase, openBracket, other, closeBracket, other, other,        //  `       a          b          c          d          e          f          g            other,  lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase,        //  h          i          j          k          l          m          n          o            lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase,        //  p          q          r          s          t          u          v          w            lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase,        //  x          y          z          {            |      }             ~      del            lowerCase, lowerCase, lowerCase, openBracket, other, closeBracket, other, other,        //  ctrl    ctrl    ctrl    ctrl    ctrl    ctrl    ctrl    ctrl            other,  other,  other,  other,  other,  other,  other,  other,        //  ctrl    ctrl    ctrl    ctrl    ctrl    ctrl    ctrl    ctrl            other,  other,  other,  other,  other,  other,  other,  other,        //  ctrl    ctrl    ctrl    ctrl    ctrl    ctrl    ctrl    ctrl            other,  other,  other,  other,  other,  other,  other,  other,        //  ctrl    ctrl    ctrl    ctrl    ctrl    ctrl    ctrl    ctrl            other,  other,  other,  other,  other,  other,  other,  other,        //  nbsp      inv-!     cents     pounds    currency  yen       broken-bar  section            other,  other,  other,  other,  other,  other,  other,  other,        //  umlaut    copyright super-a   gui-left  not       soft-hyph registered  macron            other,  other,  lowerCase, openBracket, other, other, other, other,        //  degree    +/-       super-2   super-3   acute     micro     paragraph  bullet            other,  other,  number, number, other,  lowerCase, other, other,        //  cedilla   super-1   super-o   gui-right 1/4       1/2       3/4      inv-?            other,  lowerCase, other, closeBracket, number, number, number, other,        //  A-grave   A-acute   A-hat     A-tilde   A-umlaut A-ring    AE        C-cedilla            upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase,        //  E-grave   E-acute   E-hat     E-umlaut  I-grave   I-acute   I-hat    I-umlaut            upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase,        //  Edh       N-tilde   O-grave   O-acute   O-hat     O-tilde   O-umlaut times            upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, other,        //  O=slash   U-grave   U-acute   U-hat     U-umlaut  Y-acute   Thorn    ess-zed            upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, upperCase, lowerCase,        //  a-grave   a-acute   a-hat     a-tilde   a-umlaut  a-ring    ae       c-cedilla            lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase,        //  e-grave   e-acute   e-hat     e-umlaut  i-grave   i-acute   i-hat    i-umlaut            lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase,        //  edh       n-tilde   o-grave   o-acute   o-hat     o-tilde   o-umlaut  over            lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, other,        //  o-slash   u-grave   u-acute   u-hat     u-umlaut  y-acute   thorn    y=umlaut            lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase, lowerCase    };    private static final UnicodeClassMapping kSentenceMap        = new UnicodeClassMapping(kRawMapping, kExceptionChar, SentenceExceptionFlags,        kSentenceAsciiValues);}
上一页 12
💿 文件大小 245 K
👤 上传用户 liu2000dz
📂 所属分类 Java编程
🏷️ 相关标签

#java #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -