📄 breakiterator.java

📁 java源代码请看看啊提点宝贵的意见
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * @(#)BreakIterator.java	1.32 03/01/23 * * Copyright 2003 Sun Microsystems, Inc. All rights reserved. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. *//* * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved * (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved * * The original version of this source code and documentation * is copyrighted and owned by Taligent, Inc., a wholly-owned * subsidiary of IBM. These materials are provided under terms * of a License Agreement between Taligent and Sun. This technology * is protected by multiple US and International patents. * * This notice and attribution to Taligent may not be removed. * Taligent is a registered trademark of Taligent, Inc. * */package java.text;import java.util.Vector;import java.util.Locale;import java.util.ResourceBundle;import java.util.MissingResourceException;import sun.text.resources.LocaleData;import java.text.CharacterIterator;import java.text.StringCharacterIterator;import java.net.URL;import java.io.InputStream;import java.io.IOException;import java.lang.ref.SoftReference;import java.security.AccessController;import java.security.PrivilegedAction;/** * The <code>BreakIterator</code> class implements methods for finding * the location of boundaries in text. Instances of <code>BreakIterator</code> * maintain a current position and scan over text * returning the index of characters where boundaries occur. * Internally, <code>BreakIterator</code> scans text using a * <code>CharacterIterator</code>, and is thus able to scan text held * by any object implementing that protocol. A <code>StringCharacterIterator</code> * is used to scan <code>String</code> objects passed to <code>setText</code>. * * <p> * You use the factory methods provided by this class to create * instances of various types of break iterators. In particular, * use <code>getWordIterator</code>, <code>getLineIterator</code>, * <code>getSentenceIterator</code>, and <code>getCharacterIterator</code> * to create <code>BreakIterator</code>s that perform * word, line, sentence, and character boundary analysis respectively. * A single <code>BreakIterator</code> can work only on one unit * (word, line, sentence, and so on). You must use a different iterator * for each unit boundary analysis you wish to perform. * * <p> * Line boundary analysis determines where a text string can be * broken when line-wrapping. The mechanism correctly handles * punctuation and hyphenated words. * * <p> * Sentence boundary analysis allows selection with correct interpretation * of periods within numbers and abbreviations, and trailing punctuation * marks such as quotation marks and parentheses. * * <p> * Word boundary analysis is used by search and replace functions, as * well as within text editing applications that allow the user to * select words with a double click. Word selection provides correct * interpretation of punctuation marks within and following * words. Characters that are not part of a word, such as symbols * or punctuation marks, have word-breaks on both sides. * * <p> * Character boundary analysis allows users to interact with characters * as they expect to, for example, when moving the cursor through a text * string. Character boundary analysis provides correct navigation of * through character strings, regardless of how the character is stored. * For example, an accented character might be stored as a base character * and a diacritical mark. What users consider to be a character can * differ between languages. * * <p> * <code>BreakIterator</code> is intended for use with natural * languages only. Do not use this class to tokenize a programming language. * * <P> * <strong>Examples</strong>:<P> * Creating and using text boundaries * <blockquote> * <pre> * public static void main(String args[]) { *      if (args.length == 1) { *          String stringToExamine = args[0]; *          //print each word in order *          BreakIterator boundary = BreakIterator.getWordInstance(); *          boundary.setText(stringToExamine); *          printEachForward(boundary, stringToExamine); *          //print each sentence in reverse order *          boundary = BreakIterator.getSentenceInstance(Locale.US); *          boundary.setText(stringToExamine); *          printEachBackward(boundary, stringToExamine); *          printFirst(boundary, stringToExamine); *          printLast(boundary, stringToExamine); *      } * } * </pre> * </blockquote> * * Print each element in order * <blockquote> * <pre> * public static void printEachForward(BreakIterator boundary, String source) { *     int start = boundary.first(); *     for (int end = boundary.next(); *          end != BreakIterator.DONE; *          start = end, end = boundary.next()) { *          System.out.println(source.substring(start,end)); *     } * } * </pre> * </blockquote> * * Print each element in reverse order * <blockquote> * <pre> * public static void printEachBackward(BreakIterator boundary, String source) { *     int end = boundary.last(); *     for (int start = boundary.previous(); *          start != BreakIterator.DONE; *          end = start, start = boundary.previous()) { *         System.out.println(source.substring(start,end)); *     } * } * </pre> * </blockquote> * * Print first element * <blockquote> * <pre> * public static void printFirst(BreakIterator boundary, String source) { *     int start = boundary.first(); *     int end = boundary.next(); *     System.out.println(source.substring(start,end)); * } * </pre> * </blockquote> * * Print last element * <blockquote> * <pre> * public static void printLast(BreakIterator boundary, String source) { *     int end = boundary.last(); *     int start = boundary.previous(); *     System.out.println(source.substring(start,end)); * } * </pre> * </blockquote> * * Print the element at a specified position * <blockquote> * <pre> * public static void printAt(BreakIterator boundary, int pos, String source) { *     int end = boundary.following(pos); *     int start = boundary.previous(); *     System.out.println(source.substring(start,end)); * } * </pre> * </blockquote> * * Find the next word * <blockquote> * <pre> * public static int nextWordStartAfter(int pos, String text) { *     BreakIterator wb = BreakIterator.getWordInstance(); *     wb.setText(text); *     int last = wb.following(pos); *     int current = wb.next(); *     while (current != BreakIterator.DONE) { *         for (int p = last; p < current; p++) { *             if (Character.isLetter(text.charAt(p)) *                 return last; *         } *         last = current; *         current = wb.next(); *     } *     return BreakIterator.DONE; * } * </pre> * (The iterator returned by BreakIterator.getWordInstance() is unique in that * the break positions it returns don't represent both the start and end of the * thing being iterated over.  That is, a sentence-break iterator returns breaks * that each represent the end of one sentence and the beginning of the next. * With the word-break iterator, the characters between two boundaries might be a * word, or they might be the punctuation or whitespace between two words.  The * above code uses a simple heuristic to determine which boundary is the beginning * of a word: If the characters between this boundary and the next boundary * include at least one letter (this can be an alphabetical letter, a CJK ideograph, * a Hangul syllable, a Kana character, etc.), then the text between this boundary * and the next is a word; otherwise, it's the material between words.) * </blockquote> * * @see CharacterIterator * */public abstract class BreakIterator implements Cloneable{    /**     * Constructor. BreakIterator is stateless and has no default behavior.     */    protected BreakIterator()    {    }    /**     * Create a copy of this iterator     * @return A copy of this     */    public Object clone()    {        try {            return super.clone();        }        catch (CloneNotSupportedException e) {            throw new InternalError();        }    }    /**     * DONE is returned by previous() and next() after all valid     * boundaries have been returned.     */    public static final int DONE = -1;    /**     * Return the first boundary. The iterator's current position is set     * to the first boundary.     * @return The character index of the first text boundary.     */    public abstract int first();    /**     * Return the last boundary. The iterator's current position is set     * to the last boundary.     * @return The character index of the last text boundary.     */    public abstract int last();    /**     * Return the nth boundary from the current boundary     * @param n which boundary to return.  A value of 0     * does nothing.  Negative values move to previous boundaries     * and positive values move to later boundaries.     * @return The index of the nth boundary from the current position.     */    public abstract int next(int n);    /**     * Return the boundary following the current boundary.     * @return The character index of the next text boundary or DONE if all     * boundaries have been returned.  Equivalent to next(1).     */    public abstract int next();    /**     * Return the boundary preceding the current boundary.     * @return The character index of the previous text boundary or DONE if all     * boundaries have been returned.     */    public abstract int previous();    /**     * Return the first boundary following the specified offset.     * The value returned is always greater than the offset or     * the value BreakIterator.DONE     * @param offset the offset to begin scanning. Valid values     * are determined by the CharacterIterator passed to     * setText().  Invalid values cause     * an IllegalArgumentException to be thrown.
12 下一页
💿 文件大小 245 K
👤 上传用户 liu2000dz
📂 所属分类 Java编程
🏷️ 相关标签

#java #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -