📄 spellchecker.java

📁 自动拼写检查的实现
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*
Jazzy - a Java library for Spell Checking
Copyright (C) 2001 Mindaugas Idzelis
Full text of license can be found in LICENSE.txt

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
*/
package com.swabunga.spell.event;

import com.swabunga.spell.engine.Configuration;
import com.swabunga.spell.engine.SpellDictionary;
import com.swabunga.spell.engine.SpellDictionaryHashMap;
import com.swabunga.spell.engine.Word;
import com.swabunga.util.VectorUtility;

import java.io.IOException;
import java.util.*;


/**
 * This is the main class for spell checking (using the new event based spell
 * checking). 
 * <p/>
 * By default, the class makes a user dictionary to accumulate added words.
 * Since this user directory has no file assign to persist added words, they
 * will be retained for the duration of the spell checker instance.
 * If you set a user dictionary like 
 * {@link com.swabunga.spell.engine.SpellDictionaryHashMap SpellDictionaryHashMap}
 * to persist the added word, the user dictionary will have the possibility to
 * grow and be available across differents invocations of the spell checker.
 *
 * @author     Jason Height (jheight@chariot.net.au)
 * 19 June 2002
 */
public class SpellChecker {
  /** Flag indicating that the Spell Check completed without any errors present*/
  public static final int SPELLCHECK_OK = -1;
  /** Flag indicating that the Spell Check completed due to user cancellation*/
  public static final int SPELLCHECK_CANCEL = -2;

  private Vector eventListeners = new Vector();
  private Vector dictionaries = new Vector();
  private SpellDictionary userdictionary;

  private Configuration config = Configuration.getConfiguration();

  /**This variable holds all of the words that are to be always ignored */
  private Vector ignoredWords = new Vector();
  private Hashtable autoReplaceWords = new Hashtable();
  
  // added caching - bd
  // For cached operation a separate user dictionary is required
  private Map cache;
  private int threshold = 0;
  private int cacheSize = 0;
  

  /**
   * Constructs the SpellChecker.
   */
  public SpellChecker() {
    try {
      userdictionary = new SpellDictionaryHashMap();
    } catch (IOException e) {
      throw new RuntimeException("this exception should never happen because we are using null phonetic file");
    }
  }

  /**
   * Constructs the SpellChecker. The default threshold is used
   *
   * @param  dictionary  The dictionary used for looking up words.
   */
  public SpellChecker(SpellDictionary dictionary) {
    this();
    addDictionary(dictionary);
  }


  /**
   * Constructs the SpellChecker with a threshold
   *
   * @param  dictionary  the dictionary used for looking up words.
   * @param  threshold   the cost value above which any suggestions are 
   *                     thrown away
   */
  public SpellChecker(SpellDictionary dictionary, int threshold) {
    this(dictionary);
    config.setInteger(Configuration.SPELL_THRESHOLD, threshold);
  }

  /**
   * Accumulates a dictionary at the end of the dictionaries list used
   * for looking up words. Adding a dictionary give the flexibility to
   * assign the base language dictionary, then a more technical, then...
   *
   * @param dictionary the dictionary to add at the end of the dictionary list.
   */
  public void addDictionary(SpellDictionary dictionary) {
    if (dictionary == null) {
      throw new IllegalArgumentException("dictionary must be non-null");
    }
    this.dictionaries.addElement(dictionary);
  }

  /**
   * Registers the user dictionary to which words are added.
   *
   * @param dictionary the dictionary to use when the user specify a new word
   * to add.
   */
  public void setUserDictionary(SpellDictionary dictionary) {
    userdictionary = dictionary;
  }

  /**
   * Supply the instance of the configuration holding the spell checking engine
   * parameters.
   *
   * @return Current Configuration
   */
  public Configuration getConfiguration() {
    return config;
  }

  /**
   * Adds a SpellCheckListener to the listeners list.
   *
   * @param  listener  The feature to be added to the SpellCheckListener attribute
   */
  public void addSpellCheckListener(SpellCheckListener listener) {
    eventListeners.addElement(listener);
  }


  /**
   * Removes a SpellCheckListener from the listeners list.
   *
   * @param  listener  The listener to be removed from the listeners list.
   */
  public void removeSpellCheckListener(SpellCheckListener listener) {
    eventListeners.removeElement(listener);
  }


  /**
   * Fires off a spell check event to the listeners.
   *
   * @param  event  The event that need to be processed by the spell checking
   * system.
   */
  protected void fireSpellCheckEvent(SpellCheckEvent event) {
    for (int i = eventListeners.size() - 1; i >= 0; i--) {
      ((SpellCheckListener) eventListeners.elementAt(i)).spellingError(event);
    }
  }


  /**
   * This method clears the words that are currently being remembered as
   *  <code>Ignore All</code> words and <code>Replace All</code> words.
   */
  public void reset() {
    ignoredWords = new Vector();
    autoReplaceWords = new Hashtable();
  }


  /**
   * Checks the text string.
   *  <p>
   *  Returns the corrected string.
   *
   * @param  text   The text that need to be spelled checked
   * @return        The text after spell checking
   * @deprecated    use checkSpelling(WordTokenizer)
   */
  public String checkString(String text) {
    StringWordTokenizer tokens = new StringWordTokenizer(text);
    checkSpelling(tokens);
    return tokens.getContext();
  }


  /**
   * Verifies if the word that is being spell checked contains at least a
   * digit.
   * Returns true if this word contains a digit.
   *
   * @param  word  The word to analyze for digit.
   * @return       true if the word contains at least a digit.
   */
  private final static boolean isDigitWord(String word) {
    for (int i = word.length() - 1; i >= 0; i--) {
      if (Character.isDigit(word.charAt(i))) {
        return true;
      }
    }
    return false;
  }


  /**
   * Verifies if the word that is being spell checked contains an Internet 
   * address. The method look for typical protocol or the habitual string 
   * in the word:
   * <ul>
   * <li>http://</li>
   * <li>ftp://</li>
   * <li>https://</li>
   * <li>ftps://</li>
   * <li>www.</li>
   * </ul>
   *
   * One limitation is that this method cannot currently recognize email
   * addresses. Since the 'word' that is passed in, may in fact contain
   * the rest of the document to be checked, it is not (yet!) a good
   * idea to scan for the @ character.
   *
   * @param  word  The word to analyze for an Internet address.
   * @return       true if this word looks like an Internet address.
   */
    public final static boolean isINETWord(String word) {
        String lowerCaseWord = word.toLowerCase();
        return lowerCaseWord.startsWith("http://") ||
              lowerCaseWord.startsWith("www.") ||
              lowerCaseWord.startsWith("ftp://") ||
              lowerCaseWord.startsWith("https://") ||
              lowerCaseWord.startsWith("ftps://");
  }


  /**
   * Verifies if the word that is being spell checked contains all
   * uppercases characters.
   *
   * @param  word  The word to analyze for uppercases characters
   * @return       true if this word contains all upper case characters
   */
  private final static boolean isUpperCaseWord(String word) {
    for (int i = word.length() - 1; i >= 0; i--) {
      if (Character.isLowerCase(word.charAt(i))) {
        return false;
      }
    }
    return true;
  }


  /**
   * Verifies if the word that is being spell checked contains lower and
   * upper cased characters. Note that a phrase beginning with an upper cased
   * character is not considered a mixed case word.
   *
   * @param  word  The word to analyze for mixed cases characters
   * @param startsSentence True if this word is at the start of a sentence
   * @return       true if this word contains mixed case characters
   */
  private final static boolean isMixedCaseWord(String word, boolean startsSentence) {
    int strLen = word.length();
    boolean isUpper = Character.isUpperCase(word.charAt(0));
    //Ignore the first character if this word starts the sentence and the first
    //character was upper cased, since this is normal behaviour
    if ((startsSentence) && isUpper && (strLen > 1))
      isUpper = Character.isUpperCase(word.charAt(1));
    if (isUpper) {
12 下一页
💿 文件大小 231 K
👤 上传用户 haoding306
📂 所属分类编辑器/阅读器
🏷️ 相关标签

#自动
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -