sentencebreakdata.java
来自「《移动Agent技术》一书的所有章节源代码。」· Java 代码 · 共 330 行 · 第 1/2 页
JAVA
330 行
/*
* @(#)SentenceBreakData.java 1.9 98/03/05
*
* (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
* (C) Copyright IBM Corp. 1996 - All Rights Reserved
*
* Portions copyright (c) 1996 Sun Microsystems, Inc. All Rights Reserved.
*
* The original version of this source code and documentation is copyrighted
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
* materials are provided under terms of a License Agreement between Taligent
* and Sun. This technology is protected by multiple US and International
* patents. This notice and attribution to Taligent may not be removed.
* Taligent is a registered trademark of Taligent, Inc.
*
* Permission to use, copy, modify, and distribute this software
* and its documentation for NON-COMMERCIAL purposes and without
* fee is hereby granted provided that this copyright notice
* appears in all copies. Please refer to the file "copyright.html"
* for further important copyright and licensing information.
*
* SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
* THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
* TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
* PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
* ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
* DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
*
*/
package java.text;
/**
* The SentenceBreakData contains data used by SimpleTextBoundary
* to determine sentence breaks.
* @see #BreakIterator
*/
final class SentenceBreakData extends TextBoundaryData
{
private static final byte other = 0;
// lower case letters, digits...
private static final byte space = 1;
// spaces...
private static final byte terminator = 2;
// period, questionmark...
private static final byte ambiguosTerm = 3;
// Ambiguos terminator
private static final byte openBracket = 4;
// open brackets
private static final byte closeBracket = 5;
// close brackets
private static final byte cjk = 6;
// Characters where the previous sentence does not have a space
// after a terminator. Common in Japanese, Chinese, and Korean
private static final byte paragraphBreak = 7;
// Paragraph break
private static final byte lowerCase = 8;
// Lower case
private static final byte upperCase = 9;
private static final byte number = 10;
private static final byte quote = 11;
private static final byte sent_cr = 12;
private static final byte nsm = 13;
private static final byte EOS = 14;
// digit
private static final int COL_COUNT = 15;
private static final byte SI = (byte)0x80;
private static final byte STOP = (byte) 0;
private static final byte SI_STOP = (byte)SI + STOP;
private static final byte kSentenceForwardData[] =
{
// other space terminator ambTerm
// open close CJK PB
// lower upper digit Quote
// cr nsm EOS
// 0
STOP, STOP, STOP, STOP,
STOP, STOP, STOP, STOP,
STOP, STOP, STOP, STOP,
STOP, STOP, STOP,
// 1
(byte)(SI+1), (byte)(SI+1), (byte)(SI+2), (byte)(SI+5),
(byte)(SI+1), (byte)(SI+1), (byte)(SI+1), (byte)(SI+4),
(byte)(SI+1), (byte)(SI+8), (byte)(SI+9), (byte)(SI+1),
(byte)(SI+10), (byte)(SI+1), SI_STOP,
// 2
SI_STOP, (byte)(SI+3), (byte)(SI+2), (byte)(SI+5),
(byte)(SI+1), (byte)(SI+2), SI_STOP, (byte)(SI+4),
SI_STOP, SI_STOP, SI_STOP, (byte)(SI+2),
(byte)(SI+10), (byte)(SI+2), SI_STOP,
// 3
SI_STOP, (byte)(SI+3), SI_STOP, SI_STOP,
SI_STOP, SI_STOP, SI_STOP, (byte)(SI+4),
SI_STOP, SI_STOP, SI_STOP, SI_STOP,
(byte)(SI+10), (byte)(SI+3), SI_STOP,
// 4
SI_STOP, SI_STOP, SI_STOP, SI_STOP,
SI_STOP, SI_STOP, SI_STOP, SI_STOP,
SI_STOP, SI_STOP, SI_STOP, SI_STOP,
SI_STOP, SI_STOP, SI_STOP,
// 5
SI_STOP, (byte)(SI+6), (byte)(SI+2), (byte)(SI+5),
(byte)(SI+1), (byte)(SI+5), SI_STOP, (byte)(SI+4),
(byte)(SI+1), SI_STOP, SI_STOP, (byte)(SI+5),
(byte)(SI+10), (byte)(SI+5), SI_STOP,
// 6
SI_STOP, (byte)(SI+6), SI_STOP, SI_STOP,
(byte)(SI+7), (byte)(SI+1), SI_STOP, (byte)(SI+4),
(byte)(SI+1), SI_STOP, (byte)(SI+1), SI_STOP,
(byte)(SI+10), (byte)(SI+6), SI_STOP,
// 7
SI_STOP, SI_STOP, SI_STOP, SI_STOP,
(byte)(7), SI_STOP, SI_STOP, SI_STOP,
(byte)(SI+1), STOP, SI_STOP, SI_STOP,
SI_STOP, (byte)(SI+7), SI_STOP,
// 8
(byte)(SI+1), (byte)(SI+1), (byte)(SI+2), (byte)(SI+8),
(byte)(SI+1), (byte)(SI+5), SI_STOP, (byte)(SI+4),
(byte)(SI+1), (byte)(SI+8), (byte)(SI+9), (byte)(SI+5),
(byte)(SI+10), (byte)(SI+8), SI_STOP,
// 9
(byte)(SI+1), (byte)(SI+1), (byte)(SI+2), (byte)(SI+9),
(byte)(SI+1), (byte)(SI+5), SI_STOP, (byte)(SI+4),
(byte)(SI+1), (byte)(SI+1), (byte)(SI+9), (byte)(SI+5),
(byte)(SI+10), (byte)(SI+9), SI_STOP,
// 10
SI_STOP, SI_STOP, SI_STOP, SI_STOP,
SI_STOP, SI_STOP, SI_STOP, (byte)(SI+4),
SI_STOP, SI_STOP, SI_STOP, SI_STOP,
SI_STOP, SI_STOP, SI_STOP
};
private static final WordBreakTable kSentenceForward
= new WordBreakTable(COL_COUNT, kSentenceForwardData);
private static final byte kSentenceBackwardData[] =
{
// other space terminator ambTerm
// open close CJK PB
// lower upper digit quote
// cr nsm EOS
// 0
STOP, STOP, STOP, STOP,
STOP, STOP, STOP, STOP,
STOP, STOP, STOP, STOP,
STOP, STOP, STOP,
// 1
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?