sentencebreakdata.java

来自「《移动Agent技术》一书的所有章节源代码。」· Java 代码 · 共 330 行 · 第 1/2 页

JAVA
330
字号
/*
 * @(#)SentenceBreakData.java	1.9 98/03/05
 *
 * (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
 * (C) Copyright IBM Corp. 1996 - All Rights Reserved
 *
 * Portions copyright (c) 1996 Sun Microsystems, Inc. All Rights Reserved.
 *
 *   The original version of this source code and documentation is copyrighted
 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
 * materials are provided under terms of a License Agreement between Taligent
 * and Sun. This technology is protected by multiple US and International
 * patents. This notice and attribution to Taligent may not be removed.
 *   Taligent is a registered trademark of Taligent, Inc.
 *
 * Permission to use, copy, modify, and distribute this software
 * and its documentation for NON-COMMERCIAL purposes and without
 * fee is hereby granted provided that this copyright notice
 * appears in all copies. Please refer to the file "copyright.html"
 * for further important copyright and licensing information.
 *
 * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
 * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
 * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
 * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
 * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
 * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
 *
 */

package java.text;

/**
 * The SentenceBreakData contains data used by SimpleTextBoundary
 * to determine sentence breaks.
 * @see #BreakIterator
 */
final class SentenceBreakData extends TextBoundaryData
{
    private static final byte other = 0;
    // lower case letters, digits...
    private static final byte space = 1;
    // spaces...
    private static final byte terminator = 2;
    // period, questionmark...
    private static final byte ambiguosTerm = 3;
    // Ambiguos terminator
    private static final byte openBracket = 4;
    // open brackets
    private static final byte closeBracket = 5;
    // close brackets
    private static final byte cjk = 6;
    // Characters where the previous sentence does not have a space
    // after a terminator. Common in Japanese, Chinese, and Korean
    private static final byte paragraphBreak = 7;
    // Paragraph break
    private static final byte lowerCase = 8;
    // Lower case
    private static final byte upperCase = 9;
    private static final byte number = 10;

    private static final byte quote = 11;

    private static final byte sent_cr = 12;
    private static final byte nsm = 13;
    private static final byte EOS = 14;

    // digit
    private static final int COL_COUNT = 15;

    private static final byte SI = (byte)0x80;
    private static final byte STOP = (byte) 0;
    private static final byte SI_STOP = (byte)SI + STOP;

    private static final byte kSentenceForwardData[] =
    {
        // other       space          terminator     ambTerm
        // open        close          CJK            PB
        // lower       upper          digit          Quote
        // cr          nsm            EOS

        // 0
        STOP,          STOP,          STOP,          STOP,
        STOP,          STOP,          STOP,          STOP,
        STOP,          STOP,          STOP,          STOP,
        STOP,          STOP,          STOP,

        // 1
        (byte)(SI+1),  (byte)(SI+1),  (byte)(SI+2),  (byte)(SI+5),
        (byte)(SI+1),  (byte)(SI+1),  (byte)(SI+1),  (byte)(SI+4),
        (byte)(SI+1),  (byte)(SI+8),  (byte)(SI+9),  (byte)(SI+1),
        (byte)(SI+10), (byte)(SI+1),  SI_STOP,

        // 2
        SI_STOP,       (byte)(SI+3),  (byte)(SI+2),  (byte)(SI+5),
        (byte)(SI+1),  (byte)(SI+2),  SI_STOP,       (byte)(SI+4),
        SI_STOP,       SI_STOP,       SI_STOP,       (byte)(SI+2),
        (byte)(SI+10), (byte)(SI+2),  SI_STOP,

        // 3
        SI_STOP,       (byte)(SI+3),  SI_STOP,       SI_STOP,
        SI_STOP,       SI_STOP,       SI_STOP,       (byte)(SI+4),
        SI_STOP,       SI_STOP,       SI_STOP,       SI_STOP,
        (byte)(SI+10), (byte)(SI+3),  SI_STOP,

        // 4
        SI_STOP,       SI_STOP,       SI_STOP,       SI_STOP,
        SI_STOP,       SI_STOP,       SI_STOP,       SI_STOP,
        SI_STOP,       SI_STOP,       SI_STOP,       SI_STOP,
        SI_STOP,       SI_STOP,       SI_STOP,

        // 5
        SI_STOP,       (byte)(SI+6),  (byte)(SI+2),  (byte)(SI+5),
        (byte)(SI+1),  (byte)(SI+5),  SI_STOP,       (byte)(SI+4),
        (byte)(SI+1),  SI_STOP,       SI_STOP,       (byte)(SI+5),
        (byte)(SI+10), (byte)(SI+5),  SI_STOP,

        // 6
        SI_STOP,       (byte)(SI+6),  SI_STOP,       SI_STOP,
        (byte)(SI+7),  (byte)(SI+1),  SI_STOP,       (byte)(SI+4),
        (byte)(SI+1),  SI_STOP,       (byte)(SI+1),  SI_STOP,
        (byte)(SI+10), (byte)(SI+6),  SI_STOP,

        // 7
        SI_STOP,       SI_STOP,       SI_STOP,       SI_STOP,
        (byte)(7),     SI_STOP,       SI_STOP,       SI_STOP,
        (byte)(SI+1),  STOP,          SI_STOP,       SI_STOP,
        SI_STOP,       (byte)(SI+7),  SI_STOP,

        // 8
        (byte)(SI+1),  (byte)(SI+1),  (byte)(SI+2),  (byte)(SI+8),
        (byte)(SI+1),  (byte)(SI+5),  SI_STOP,       (byte)(SI+4),
        (byte)(SI+1),  (byte)(SI+8),  (byte)(SI+9),  (byte)(SI+5),
        (byte)(SI+10), (byte)(SI+8),  SI_STOP,

        // 9
        (byte)(SI+1),  (byte)(SI+1),  (byte)(SI+2),  (byte)(SI+9),
        (byte)(SI+1),  (byte)(SI+5),  SI_STOP,       (byte)(SI+4),
        (byte)(SI+1),  (byte)(SI+1),  (byte)(SI+9),  (byte)(SI+5),
        (byte)(SI+10), (byte)(SI+9),  SI_STOP,

        // 10
        SI_STOP,       SI_STOP,       SI_STOP,       SI_STOP,
        SI_STOP,       SI_STOP,       SI_STOP,       (byte)(SI+4),
        SI_STOP,       SI_STOP,       SI_STOP,       SI_STOP,
        SI_STOP,       SI_STOP,       SI_STOP
    };

    private static final WordBreakTable kSentenceForward
        = new WordBreakTable(COL_COUNT, kSentenceForwardData);

    private static final byte kSentenceBackwardData[] =
    {
        // other       space          terminator     ambTerm
        // open        close          CJK            PB
        // lower       upper          digit          quote
        // cr          nsm            EOS

        // 0
        STOP,          STOP,          STOP,          STOP,
        STOP,          STOP,          STOP,          STOP,
        STOP,          STOP,          STOP,          STOP,
        STOP,          STOP,          STOP,

        // 1

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?