📄 spellchecker.java
字号:
/*
Jazzy - a Java library for Spell Checking
Copyright (C) 2001 Mindaugas Idzelis
Full text of license can be found in LICENSE.txt
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package com.swabunga.spell.event;
import com.swabunga.spell.engine.Configuration;
import com.swabunga.spell.engine.SpellDictionary;
import com.swabunga.spell.engine.SpellDictionaryHashMap;
import com.swabunga.spell.engine.Word;
import com.swabunga.util.VectorUtility;
import java.io.IOException;
import java.util.*;
/**
* This is the main class for spell checking (using the new event based spell
* checking).
* <p/>
* By default, the class makes a user dictionary to accumulate added words.
* Since this user directory has no file assign to persist added words, they
* will be retained for the duration of the spell checker instance.
* If you set a user dictionary like
* {@link com.swabunga.spell.engine.SpellDictionaryHashMap SpellDictionaryHashMap}
* to persist the added word, the user dictionary will have the possibility to
* grow and be available across differents invocations of the spell checker.
*
* @author Jason Height (jheight@chariot.net.au)
* 19 June 2002
*/
public class SpellChecker {
/** Flag indicating that the Spell Check completed without any errors present*/
public static final int SPELLCHECK_OK = -1;
/** Flag indicating that the Spell Check completed due to user cancellation*/
public static final int SPELLCHECK_CANCEL = -2;
private Vector eventListeners = new Vector();
private Vector dictionaries = new Vector();
private SpellDictionary userdictionary;
private Configuration config = Configuration.getConfiguration();
/**This variable holds all of the words that are to be always ignored */
private Vector ignoredWords = new Vector();
private Hashtable autoReplaceWords = new Hashtable();
// added caching - bd
// For cached operation a separate user dictionary is required
private Map cache;
private int threshold = 0;
private int cacheSize = 0;
/**
* Constructs the SpellChecker.
*/
public SpellChecker() {
try {
userdictionary = new SpellDictionaryHashMap();
} catch (IOException e) {
throw new RuntimeException("this exception should never happen because we are using null phonetic file");
}
}
/**
* Constructs the SpellChecker. The default threshold is used
*
* @param dictionary The dictionary used for looking up words.
*/
public SpellChecker(SpellDictionary dictionary) {
this();
addDictionary(dictionary);
}
/**
* Constructs the SpellChecker with a threshold
*
* @param dictionary the dictionary used for looking up words.
* @param threshold the cost value above which any suggestions are
* thrown away
*/
public SpellChecker(SpellDictionary dictionary, int threshold) {
this(dictionary);
config.setInteger(Configuration.SPELL_THRESHOLD, threshold);
}
/**
* Accumulates a dictionary at the end of the dictionaries list used
* for looking up words. Adding a dictionary give the flexibility to
* assign the base language dictionary, then a more technical, then...
*
* @param dictionary the dictionary to add at the end of the dictionary list.
*/
public void addDictionary(SpellDictionary dictionary) {
if (dictionary == null) {
throw new IllegalArgumentException("dictionary must be non-null");
}
this.dictionaries.addElement(dictionary);
}
/**
* Registers the user dictionary to which words are added.
*
* @param dictionary the dictionary to use when the user specify a new word
* to add.
*/
public void setUserDictionary(SpellDictionary dictionary) {
userdictionary = dictionary;
}
/**
* Supply the instance of the configuration holding the spell checking engine
* parameters.
*
* @return Current Configuration
*/
public Configuration getConfiguration() {
return config;
}
/**
* Adds a SpellCheckListener to the listeners list.
*
* @param listener The feature to be added to the SpellCheckListener attribute
*/
public void addSpellCheckListener(SpellCheckListener listener) {
eventListeners.addElement(listener);
}
/**
* Removes a SpellCheckListener from the listeners list.
*
* @param listener The listener to be removed from the listeners list.
*/
public void removeSpellCheckListener(SpellCheckListener listener) {
eventListeners.removeElement(listener);
}
/**
* Fires off a spell check event to the listeners.
*
* @param event The event that need to be processed by the spell checking
* system.
*/
protected void fireSpellCheckEvent(SpellCheckEvent event) {
for (int i = eventListeners.size() - 1; i >= 0; i--) {
((SpellCheckListener) eventListeners.elementAt(i)).spellingError(event);
}
}
/**
* This method clears the words that are currently being remembered as
* <code>Ignore All</code> words and <code>Replace All</code> words.
*/
public void reset() {
ignoredWords = new Vector();
autoReplaceWords = new Hashtable();
}
/**
* Checks the text string.
* <p>
* Returns the corrected string.
*
* @param text The text that need to be spelled checked
* @return The text after spell checking
* @deprecated use checkSpelling(WordTokenizer)
*/
public String checkString(String text) {
StringWordTokenizer tokens = new StringWordTokenizer(text);
checkSpelling(tokens);
return tokens.getContext();
}
/**
* Verifies if the word that is being spell checked contains at least a
* digit.
* Returns true if this word contains a digit.
*
* @param word The word to analyze for digit.
* @return true if the word contains at least a digit.
*/
private final static boolean isDigitWord(String word) {
for (int i = word.length() - 1; i >= 0; i--) {
if (Character.isDigit(word.charAt(i))) {
return true;
}
}
return false;
}
/**
* Verifies if the word that is being spell checked contains an Internet
* address. The method look for typical protocol or the habitual string
* in the word:
* <ul>
* <li>http://</li>
* <li>ftp://</li>
* <li>https://</li>
* <li>ftps://</li>
* <li>www.</li>
* </ul>
*
* One limitation is that this method cannot currently recognize email
* addresses. Since the 'word' that is passed in, may in fact contain
* the rest of the document to be checked, it is not (yet!) a good
* idea to scan for the @ character.
*
* @param word The word to analyze for an Internet address.
* @return true if this word looks like an Internet address.
*/
public final static boolean isINETWord(String word) {
String lowerCaseWord = word.toLowerCase();
return lowerCaseWord.startsWith("http://") ||
lowerCaseWord.startsWith("www.") ||
lowerCaseWord.startsWith("ftp://") ||
lowerCaseWord.startsWith("https://") ||
lowerCaseWord.startsWith("ftps://");
}
/**
* Verifies if the word that is being spell checked contains all
* uppercases characters.
*
* @param word The word to analyze for uppercases characters
* @return true if this word contains all upper case characters
*/
private final static boolean isUpperCaseWord(String word) {
for (int i = word.length() - 1; i >= 0; i--) {
if (Character.isLowerCase(word.charAt(i))) {
return false;
}
}
return true;
}
/**
* Verifies if the word that is being spell checked contains lower and
* upper cased characters. Note that a phrase beginning with an upper cased
* character is not considered a mixed case word.
*
* @param word The word to analyze for mixed cases characters
* @param startsSentence True if this word is at the start of a sentence
* @return true if this word contains mixed case characters
*/
private final static boolean isMixedCaseWord(String word, boolean startsSentence) {
int strLen = word.length();
boolean isUpper = Character.isUpperCase(word.charAt(0));
//Ignore the first character if this word starts the sentence and the first
//character was upper cased, since this is normal behaviour
if ((startsSentence) && isUpper && (strLen > 1))
isUpper = Character.isUpperCase(word.charAt(1));
if (isUpper) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -