📄 wordbreakiterator.java

📁 this gcc-g++-3.3.1.tar.gz is a source file of gcc, you can learn more about gcc through this codes f
💻 JAVA
字号:
/* WordBreakIterator.java - Default word BreakIterator.   Copyright (C) 1999, 2001 Free Software Foundation, Inc.This file is part of GNU Classpath.GNU Classpath is free software; you can redistribute it and/or modifyit under the terms of the GNU General Public License as published bythe Free Software Foundation; either version 2, or (at your option)any later version. GNU Classpath is distributed in the hope that it will be useful, butWITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNUGeneral Public License for more details.You should have received a copy of the GNU General Public Licensealong with GNU Classpath; see the file COPYING.  If not, write to theFree Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA02111-1307 USA.Linking this library statically or dynamically with other modules ismaking a combined work based on this library.  Thus, the terms andconditions of the GNU General Public License cover the wholecombination.As a special exception, the copyright holders of this library give youpermission to link this library with independent modules to produce anexecutable, regardless of the license terms of these independentmodules, and to copy and distribute the resulting executable underterms of your choice, provided that you also meet, for each linkedindependent module, the terms and conditions of the license of thatmodule.  An independent module is a module which is not derived fromor based on this library.  If you modify this library, you may extendthis exception to your version of the library, but you are notobligated to do so.  If you do not wish to do so, delete thisexception statement from your version. */package gnu.java.text;import java.text.BreakIterator;import java.text.CharacterIterator;/** * @author Tom Tromey <tromey@cygnus.com> * @date March 22, 1999 * Written using The Unicode Standard, Version 2.0. */public class WordBreakIterator extends BaseBreakIterator{  public Object clone ()  {    return new WordBreakIterator (this);  }  public WordBreakIterator ()  {    iter = null;  }  private WordBreakIterator (WordBreakIterator other)  {    iter = (CharacterIterator) other.iter.clone();  }  // Some methods to tell us different properties of characters.  private final boolean isHira (char c)  {    return c >= 0x3040 && c <= 0x309f;  }  private final boolean isKata (char c)  {    return c >= 0x30a0 && c <= 0x30ff;  }  private final boolean isHan (char c)  {    return c >= 0x4e00 && c <= 0x9fff;  }  public int next ()  {    int end = iter.getEndIndex();    if (iter.getIndex() == end)      return DONE;    while (iter.getIndex() < end)      {	char c = iter.current();	if (c == CharacterIterator.DONE)	  break;	int type = Character.getType(c);	char n = iter.next();	if (n == CharacterIterator.DONE)	  break;	// Break after paragraph separators.	if (type == Character.PARAGRAPH_SEPARATOR	    || type == Character.LINE_SEPARATOR)	  break;	// Break between letters and non-letters.	// FIXME: we treat apostrophe as part of a word.  This	// is an English-ism.	boolean is_letter = Character.isLetter(c);	if (c != '\'' && ! is_letter && type != Character.NON_SPACING_MARK	    && Character.isLetter(n))	  break;	// Always break after certain symbols, such as punctuation.	// This heuristic is derived from hints in the JCL book and is	// not part of Unicode.  It seems to be right, however.	// FIXME: we treat apostrophe as part of a word.  This	// is an English-ism.	if (c != '\''	    && (type == Character.DASH_PUNCTUATION		|| type == Character.START_PUNCTUATION		|| type == Character.END_PUNCTUATION		|| type == Character.CONNECTOR_PUNCTUATION		|| type == Character.OTHER_PUNCTUATION		|| type == Character.MATH_SYMBOL		|| type == Character.CURRENCY_SYMBOL		|| type == Character.MODIFIER_SYMBOL		|| type == Character.OTHER_SYMBOL		|| type == Character.FORMAT		|| type == Character.CONTROL))	  break;	boolean is_hira = isHira (c);	boolean is_kata = isKata (c);	boolean is_han = isHan (c);	// Special case Japanese.	if (! is_hira && ! is_kata && ! is_han	    && type != Character.NON_SPACING_MARK	    && (isHira (n) || isKata (n) || isHan (n)))	  break;	if (is_hira || is_kata || is_han || is_letter)	  {	    // Now we need to do some lookahead.  We might need to do	    // quite a bit of lookahead, so we save our position and	    // restore it later.	    int save = iter.getIndex();	    // Skip string of non spacing marks.	    while (n != CharacterIterator.DONE		   && Character.getType(n) == Character.NON_SPACING_MARK)	      n = iter.next();	    if (n == CharacterIterator.DONE)	      break;	    if ((is_hira && ! isHira (n))		|| (is_kata && ! isHira (n) && ! isKata (n))		|| (is_han && ! isHira (n) && ! isHan (n))		// FIXME: we treat apostrophe as part of a word.  This		// is an English-ism.		|| (is_letter && ! Character.isLetter(n) && n != '\''))	      break;	    iter.setIndex(save);	  }      }    return iter.getIndex();  }  public int previous ()  {    int start = iter.getBeginIndex();    if (iter.getIndex() == start)      return DONE;    while (iter.getIndex() >= start)      {	char c = iter.previous();	if (c == CharacterIterator.DONE)	  break;	boolean is_hira = isHira (c);	boolean is_kata = isKata (c);	boolean is_han = isHan (c);	boolean is_letter = Character.isLetter(c);	char n = iter.previous();	if (n == CharacterIterator.DONE)	  break;	iter.next();	int type = Character.getType(n);	// Break after paragraph separators.	if (type == Character.PARAGRAPH_SEPARATOR	    || type == Character.LINE_SEPARATOR)	  break;	// Break between letters and non-letters.	// FIXME: we treat apostrophe as part of a word.  This	// is an English-ism.	if (n != '\'' && ! Character.isLetter(n)	    && type != Character.NON_SPACING_MARK	    && is_letter)	  break;	// Always break after certain symbols, such as punctuation.	// This heuristic is derived from hints in the JCL book and is	// not part of Unicode.  It seems to be right, however.	// FIXME: we treat apostrophe as part of a word.  This	// is an English-ism.	if (n != '\''	    && (type == Character.DASH_PUNCTUATION		|| type == Character.START_PUNCTUATION		|| type == Character.END_PUNCTUATION		|| type == Character.CONNECTOR_PUNCTUATION		|| type == Character.OTHER_PUNCTUATION		|| type == Character.MATH_SYMBOL		|| type == Character.CURRENCY_SYMBOL		|| type == Character.MODIFIER_SYMBOL		|| type == Character.OTHER_SYMBOL		|| type == Character.FORMAT		|| type == Character.CONTROL))	  break;	// Special case Japanese.	if ((is_hira || is_kata || is_han)	    && ! isHira (n) && ! isKata (n) && ! isHan (n)	    && type != Character.NON_SPACING_MARK)	  break;	// We might have to skip over non spacing marks to see what's	// on the other side.	if (! is_hira || (! is_letter && c != '\''))	  {	    int save = iter.getIndex();	    while (n != CharacterIterator.DONE		   && Character.getType(n) == Character.NON_SPACING_MARK)	      n = iter.previous();	    iter.setIndex(save);	    // This is a strange case: a bunch of non-spacing marks at	    // the beginning.  We treat the current location as a word	    // break.	    if (n == CharacterIterator.DONE)	      break;	    if ((isHira (n) && ! is_hira)		|| (isKata (n) && ! is_hira && ! is_kata)		|| (isHan (n) && ! is_hira && ! is_han)		// FIXME: we treat apostrophe as part of a word.  This		// is an English-ism.		|| (! is_letter && c != '\'' && Character.isLetter(n)))	      break;	  }      }    return iter.getIndex();  }}
💿 文件大小 4511 K
👤 上传用户 Jane
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#this #gcc #through #source
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -