📄 breakiterator.java
字号:
/* * @(#)BreakIterator.java 1.32 03/01/23 * * Copyright 2003 Sun Microsystems, Inc. All rights reserved. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. *//* * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved * * The original version of this source code and documentation * is copyrighted and owned by Taligent, Inc., a wholly-owned * subsidiary of IBM. These materials are provided under terms * of a License Agreement between Taligent and Sun. This technology * is protected by multiple US and International patents. * * This notice and attribution to Taligent may not be removed. * Taligent is a registered trademark of Taligent, Inc. * */package java.text;import java.util.Vector;import java.util.Locale;import java.util.ResourceBundle;import java.util.MissingResourceException;import sun.text.resources.LocaleData;import java.text.CharacterIterator;import java.text.StringCharacterIterator;import java.net.URL;import java.io.InputStream;import java.io.IOException;import java.lang.ref.SoftReference;import java.security.AccessController;import java.security.PrivilegedAction;/** * The <code>BreakIterator</code> class implements methods for finding * the location of boundaries in text. Instances of <code>BreakIterator</code> * maintain a current position and scan over text * returning the index of characters where boundaries occur. * Internally, <code>BreakIterator</code> scans text using a * <code>CharacterIterator</code>, and is thus able to scan text held * by any object implementing that protocol. A <code>StringCharacterIterator</code> * is used to scan <code>String</code> objects passed to <code>setText</code>. * * <p> * You use the factory methods provided by this class to create * instances of various types of break iterators. In particular, * use <code>getWordIterator</code>, <code>getLineIterator</code>, * <code>getSentenceIterator</code>, and <code>getCharacterIterator</code> * to create <code>BreakIterator</code>s that perform * word, line, sentence, and character boundary analysis respectively. * A single <code>BreakIterator</code> can work only on one unit * (word, line, sentence, and so on). You must use a different iterator * for each unit boundary analysis you wish to perform. * * <p> * Line boundary analysis determines where a text string can be * broken when line-wrapping. The mechanism correctly handles * punctuation and hyphenated words. * * <p> * Sentence boundary analysis allows selection with correct interpretation * of periods within numbers and abbreviations, and trailing punctuation * marks such as quotation marks and parentheses. * * <p> * Word boundary analysis is used by search and replace functions, as * well as within text editing applications that allow the user to * select words with a double click. Word selection provides correct * interpretation of punctuation marks within and following * words. Characters that are not part of a word, such as symbols * or punctuation marks, have word-breaks on both sides. * * <p> * Character boundary analysis allows users to interact with characters * as they expect to, for example, when moving the cursor through a text * string. Character boundary analysis provides correct navigation of * through character strings, regardless of how the character is stored. * For example, an accented character might be stored as a base character * and a diacritical mark. What users consider to be a character can * differ between languages. * * <p> * <code>BreakIterator</code> is intended for use with natural * languages only. Do not use this class to tokenize a programming language. * * <P> * <strong>Examples</strong>:<P> * Creating and using text boundaries * <blockquote> * <pre> * public static void main(String args[]) { * if (args.length == 1) { * String stringToExamine = args[0]; * //print each word in order * BreakIterator boundary = BreakIterator.getWordInstance(); * boundary.setText(stringToExamine); * printEachForward(boundary, stringToExamine); * //print each sentence in reverse order * boundary = BreakIterator.getSentenceInstance(Locale.US); * boundary.setText(stringToExamine); * printEachBackward(boundary, stringToExamine); * printFirst(boundary, stringToExamine); * printLast(boundary, stringToExamine); * } * } * </pre> * </blockquote> * * Print each element in order * <blockquote> * <pre> * public static void printEachForward(BreakIterator boundary, String source) { * int start = boundary.first(); * for (int end = boundary.next(); * end != BreakIterator.DONE; * start = end, end = boundary.next()) { * System.out.println(source.substring(start,end)); * } * } * </pre> * </blockquote> * * Print each element in reverse order * <blockquote> * <pre> * public static void printEachBackward(BreakIterator boundary, String source) { * int end = boundary.last(); * for (int start = boundary.previous(); * start != BreakIterator.DONE; * end = start, start = boundary.previous()) { * System.out.println(source.substring(start,end)); * } * } * </pre> * </blockquote> * * Print first element * <blockquote> * <pre> * public static void printFirst(BreakIterator boundary, String source) { * int start = boundary.first(); * int end = boundary.next(); * System.out.println(source.substring(start,end)); * } * </pre> * </blockquote> * * Print last element * <blockquote> * <pre> * public static void printLast(BreakIterator boundary, String source) { * int end = boundary.last(); * int start = boundary.previous(); * System.out.println(source.substring(start,end)); * } * </pre> * </blockquote> * * Print the element at a specified position * <blockquote> * <pre> * public static void printAt(BreakIterator boundary, int pos, String source) { * int end = boundary.following(pos); * int start = boundary.previous(); * System.out.println(source.substring(start,end)); * } * </pre> * </blockquote> * * Find the next word * <blockquote> * <pre> * public static int nextWordStartAfter(int pos, String text) { * BreakIterator wb = BreakIterator.getWordInstance(); * wb.setText(text); * int last = wb.following(pos); * int current = wb.next(); * while (current != BreakIterator.DONE) { * for (int p = last; p < current; p++) { * if (Character.isLetter(text.charAt(p)) * return last; * } * last = current; * current = wb.next(); * } * return BreakIterator.DONE; * } * </pre> * (The iterator returned by BreakIterator.getWordInstance() is unique in that * the break positions it returns don't represent both the start and end of the * thing being iterated over. That is, a sentence-break iterator returns breaks * that each represent the end of one sentence and the beginning of the next. * With the word-break iterator, the characters between two boundaries might be a * word, or they might be the punctuation or whitespace between two words. The * above code uses a simple heuristic to determine which boundary is the beginning * of a word: If the characters between this boundary and the next boundary * include at least one letter (this can be an alphabetical letter, a CJK ideograph, * a Hangul syllable, a Kana character, etc.), then the text between this boundary * and the next is a word; otherwise, it's the material between words.) * </blockquote> * * @see CharacterIterator * */public abstract class BreakIterator implements Cloneable{ /** * Constructor. BreakIterator is stateless and has no default behavior. */ protected BreakIterator() { } /** * Create a copy of this iterator * @return A copy of this */ public Object clone() { try { return super.clone(); } catch (CloneNotSupportedException e) { throw new InternalError(); } } /** * DONE is returned by previous() and next() after all valid * boundaries have been returned. */ public static final int DONE = -1; /** * Return the first boundary. The iterator's current position is set * to the first boundary. * @return The character index of the first text boundary. */ public abstract int first(); /** * Return the last boundary. The iterator's current position is set * to the last boundary. * @return The character index of the last text boundary. */ public abstract int last(); /** * Return the nth boundary from the current boundary * @param n which boundary to return. A value of 0 * does nothing. Negative values move to previous boundaries * and positive values move to later boundaries. * @return The index of the nth boundary from the current position. */ public abstract int next(int n); /** * Return the boundary following the current boundary. * @return The character index of the next text boundary or DONE if all * boundaries have been returned. Equivalent to next(1). */ public abstract int next(); /** * Return the boundary preceding the current boundary. * @return The character index of the previous text boundary or DONE if all * boundaries have been returned. */ public abstract int previous(); /** * Return the first boundary following the specified offset. * The value returned is always greater than the offset or * the value BreakIterator.DONE * @param offset the offset to begin scanning. Valid values * are determined by the CharacterIterator passed to * setText(). Invalid values cause * an IllegalArgumentException to be thrown.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -