📄 spellchecker.java

📁 自动拼写检查的实现
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
      for (int i = word.length() - 1; i > 0; i--) {
        if (Character.isLowerCase(word.charAt(i))) {
          return true;
        }
      }
    } else {
      for (int i = word.length() - 1; i > 0; i--) {
        if (Character.isUpperCase(word.charAt(i))) {
          return true;
        }
      }
    }
    return false;
  }


  /**
   * This method will fire the spell check event and then handle the event
   *  action that has been selected by the user.
   *
   * @param  tokenizer        Description of the Parameter
   * @param  event            The event to handle
   * @return                  Returns true if the event action is to cancel the current spell checking, false if the spell checking should continue
   */
  protected boolean fireAndHandleEvent(WordTokenizer tokenizer, SpellCheckEvent event) {
    fireSpellCheckEvent(event);
    String word = event.getInvalidWord();
    //Work out what to do in response to the event.
    switch (event.getAction()) {
      case SpellCheckEvent.INITIAL:
        break;
      case SpellCheckEvent.IGNORE:
        break;
      case SpellCheckEvent.IGNOREALL:
        ignoreAll(word);
        break;
      case SpellCheckEvent.REPLACE:
        tokenizer.replaceWord(event.getReplaceWord());
        break;
      case SpellCheckEvent.REPLACEALL:
        String replaceAllWord = event.getReplaceWord();
        if (!autoReplaceWords.containsKey(word)) {
          autoReplaceWords.put(word, replaceAllWord);
        }
        tokenizer.replaceWord(replaceAllWord);
        break;
      case SpellCheckEvent.ADDTODICT:
        String addWord = event.getReplaceWord();
        if (!addWord.equals(word))
          tokenizer.replaceWord(addWord);
        userdictionary.addWord(addWord);
        break;
      case SpellCheckEvent.CANCEL:
        return true;
      default:
        throw new IllegalArgumentException("Unhandled case.");
    }
    return false;
  }

  /**
   * Adds a word to the list of ignored words
   * @param word The text of the word to ignore
   */
  public void ignoreAll(String word) {
    if (!ignoredWords.contains(word)) {
      ignoredWords.addElement(word);
    }
  }
  
  /**
   * Adds a word to the user dictionary
   * @param word The text of the word to add
   */
  public void addToDictionary(String word) {
    if (!userdictionary.isCorrect(word))
      userdictionary.addWord(word);
  }
  
  /**
   * Indicates if a word is in the list of ignored words
   * @param word The text of the word check
   */
  public boolean isIgnored(String word){
  	return ignoredWords.contains(word);
  }
  
  /**
   * Verifies if the word to analyze is contained in dictionaries. The order 
   * of dictionary lookup is:
   * <ul>
   * <li>The default user dictionary or the one set through 
   * {@link SpellChecker#setUserDictionary}</li>
   * <li>The dictionary specified at construction time, if any.</li>
   * <li>Any dictionary in the order they were added through 
   * {@link SpellChecker#addDictionary}</li>
   * </ul>
   *
   * @param word The word to verify that it's spelling is known.
   * @return true if the word is in a dictionary.
   */
  public boolean isCorrect(String word) {
    if (userdictionary.isCorrect(word)) return true;
    for (Enumeration e = dictionaries.elements(); e.hasMoreElements();) {
      SpellDictionary dictionary = (SpellDictionary) e.nextElement();
      if (dictionary.isCorrect(word)) return true;
    }
    return false;
  }

  /**
   * Produces a list of suggested word after looking for suggestions in various
   * dictionaries. The order of dictionary lookup is:
   * <ul>
   * <li>The default user dictionary or the one set through 
   * {@link SpellChecker#setUserDictionary}</li>
   * <li>The dictionary specified at construction time, if any.</li>
   * <li>Any dictionary in the order they were added through 
   * {@link SpellChecker#addDictionary}</li>
   * </ul>
   *
   * @param word The word for which we want to gather suggestions
   * @param threshold the cost value above which any suggestions are 
   *                  thrown away
   * @return the list of words suggested
   */
  public List getSuggestions(String word, int threshold) {
    if (this.threshold != threshold && cache != null) {
       this.threshold = threshold;
       cache.clear();
    }
    
    ArrayList suggestions = null;
    
    if (cache != null)
       suggestions = (ArrayList) cache.get(word);

    if (suggestions == null) {
       suggestions = new ArrayList(50);
    
       for (Enumeration e = dictionaries.elements(); e.hasMoreElements();) {
           SpellDictionary dictionary = (SpellDictionary) e.nextElement();
           
           if (dictionary != userdictionary)
              VectorUtility.addAll(suggestions, dictionary.getSuggestions(word, threshold), false);
       }

       if (cache != null && cache.size() < cacheSize)
         cache.put(word, suggestions);
    }
    
    VectorUtility.addAll(suggestions, userdictionary.getSuggestions(word, threshold), false);
    suggestions.trimToSize();
    
    return suggestions;
  }

  /**
  * Activates a cache with the maximum number of entries set to 300
  */
  public void setCache() {
    setCache(300);
  }

  /**
  * Activates a cache with specified size
  * @param size - max. number of cache entries (0 to disable chache)
  */
  public void setCache(int size) {
    cacheSize = size;
    if (size == 0)
      cache = null;
   else
     cache = new HashMap((size + 2) / 3 * 4);
  }

  /**
   * This method is called to check the spelling of the words that are returned
   * by the WordTokenizer.
   * <p/>
   * For each invalid word the action listeners will be informed with a new 
   * SpellCheckEvent.<p>
   *
   * @param  tokenizer  The media containing the text to analyze.
   * @return Either SPELLCHECK_OK, SPELLCHECK_CANCEL or the number of errors found. The number of errors are those that
   * are found BEFORE any corrections are made.
   */
  public final int checkSpelling(WordTokenizer tokenizer) {
    int errors = 0;
    boolean terminated = false;
    //Keep track of the previous word
//    String previousWord = null;
    while (tokenizer.hasMoreWords() && !terminated) {
      String word = tokenizer.nextWord();
      //Check the spelling of the word
      if (!isCorrect(word)) {
          if ((config.getBoolean(Configuration.SPELL_IGNOREMIXEDCASE) && isMixedCaseWord(word, tokenizer.isNewSentence())) ||
            (config.getBoolean(Configuration.SPELL_IGNOREUPPERCASE) && isUpperCaseWord(word)) ||
            (config.getBoolean(Configuration.SPELL_IGNOREDIGITWORDS) && isDigitWord(word)) ||
            (config.getBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES) && isINETWord(word))) {
          //Null event. Since we are ignoring this word due
          //to one of the above cases.
        } else {
          //We cant ignore this misspelt word
          //For this invalid word are we ignoring the misspelling?
          if (!isIgnored(word)) {
            errors++;
            //Is this word being automagically replaced
            if (autoReplaceWords.containsKey(word)) {
              tokenizer.replaceWord((String) autoReplaceWords.get(word));
            } else {
              //JMH Need to somehow capitalise the suggestions if
              //ignoreSentenceCapitalisation is not set to true
              //Fire the event.
              List suggestions = getSuggestions(word, config.getInteger(Configuration.SPELL_THRESHOLD));
              if (capitalizeSuggestions(word, tokenizer))
                suggestions = makeSuggestionsCapitalized(suggestions);
              SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestions, tokenizer);
              terminated = fireAndHandleEvent(tokenizer, event);
            }
          }
        }
      } else {
        //This is a correctly spelt word. However perform some extra checks
        /*
         *  JMH TBD          //Check for multiple words
         *  if (!ignoreMultipleWords &&) {
         *  }
         */
        //Check for capitalisation
        if (isSupposedToBeCapitalized(word, tokenizer)) {
          errors++;
          StringBuffer buf = new StringBuffer(word);
          buf.setCharAt(0, Character.toUpperCase(word.charAt(0)));
          Vector suggestion = new Vector();
          suggestion.addElement(new Word(buf.toString(), 0));
          SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestion, tokenizer);
          terminated = fireAndHandleEvent(tokenizer, event);
        }
      }
    }
    if (terminated)
      return SPELLCHECK_CANCEL;
    else if (errors == 0)
      return SPELLCHECK_OK;
    else
      return errors;
  }
  
  
  private List makeSuggestionsCapitalized(List suggestions) {
    Iterator iterator = suggestions.iterator();
    while(iterator.hasNext()) {
      Word word = (Word)iterator.next();
      String suggestion = word.getWord();
      StringBuffer stringBuffer = new StringBuffer(suggestion);
      stringBuffer.setCharAt(0, Character.toUpperCase(suggestion.charAt(0)));
      word.setWord(stringBuffer.toString());
    }
    return suggestions;
  }

    
   private boolean isSupposedToBeCapitalized(String word, WordTokenizer wordTokenizer) {
     boolean configCapitalize = !config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION);
     return configCapitalize && wordTokenizer.isNewSentence() && Character.isLowerCase(word.charAt(0));
  } 

   private boolean capitalizeSuggestions(String word, WordTokenizer wordTokenizer) {
   // if SPELL_IGNORESENTENCECAPITALIZATION and the initial word is capitalized, suggestions should also be capitalized
   // if !SPELL_IGNORESENTENCECAPITALIZATION, capitalize suggestions only for the first word in a sentence
     boolean configCapitalize = !config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION);
     boolean uppercase = Character.isUpperCase(word.charAt(0));
     return (configCapitalize && wordTokenizer.isNewSentence()) || (!configCapitalize && uppercase);
   }
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -