⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lovinsstemmer.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * LovinsStemmer.java * Copyright (C) 2001 Eibe Frank * */package weka.core.stemmers;import weka.core.TechnicalInformation;import weka.core.TechnicalInformation.Type;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformationHandler;import java.util.HashMap;/** <!-- globalinfo-start --> * A stemmer based on the Lovins stemmer, described here:<br/> * <br/> * Julie Beth Lovins (1968). Development of a stemming algorithm. Mechanical Translation and Computational Linguistics. 11:22-31. * <p/> <!-- globalinfo-end --> *  <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;article{Lovins1968, *    author = {Julie Beth Lovins}, *    journal = {Mechanical Translation and Computational Linguistics}, *    pages = {22-31}, *    title = {Development of a stemming algorithm}, *    volume = {11}, *    year = {1968} * } * </pre> * <p/> <!-- technical-bibtex-end --> * * @author  Eibe Frank (eibe at cs dot waikato dot ac dot nz) * @version $Revision: 1.5 $ */public class LovinsStemmer   implements Stemmer, TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = -6113024782588197L;    /** Enters C version compatibility mode if set to true (emulates    features of the original C implementation that are inconsistent    with the algorithm as described in Lovins's paper) */  private static boolean m_CompMode = false;  /** The hash tables containing the list of endings. */  private static HashMap m_l11 = null;  private static HashMap m_l10 = null;  private static HashMap m_l9 = null;  private static HashMap m_l8 = null;  private static HashMap m_l7 = null;  private static HashMap m_l6 = null;  private static HashMap m_l5 = null;  private static HashMap m_l4 = null;  private static HashMap m_l3 = null;  private static HashMap m_l2 = null;  private static HashMap m_l1 = null;  static {    m_l11 = new HashMap();    m_l11.put("alistically", "B");    m_l11.put("arizability", "A");    m_l11.put("izationally", "B");    m_l10 = new HashMap();    m_l10.put("antialness", "A");    m_l10.put("arisations", "A");    m_l10.put("arizations", "A");    m_l10.put("entialness", "A");    m_l9 = new HashMap();    m_l9.put("allically", "C");    m_l9.put("antaneous", "A");    m_l9.put("antiality", "A");    m_l9.put("arisation", "A");    m_l9.put("arization", "A");    m_l9.put("ationally", "B");    m_l9.put("ativeness", "A");    m_l9.put("eableness", "E");    m_l9.put("entations", "A");    m_l9.put("entiality", "A");    m_l9.put("entialize", "A");    m_l9.put("entiation", "A");    m_l9.put("ionalness", "A");    m_l9.put("istically", "A");    m_l9.put("itousness", "A");    m_l9.put("izability", "A");    m_l9.put("izational", "A");    m_l8 = new HashMap();    m_l8.put("ableness", "A");    m_l8.put("arizable", "A");    m_l8.put("entation", "A");    m_l8.put("entially", "A");    m_l8.put("eousness", "A");    m_l8.put("ibleness", "A");    m_l8.put("icalness", "A");    m_l8.put("ionalism", "A");    m_l8.put("ionality", "A");    m_l8.put("ionalize", "A");    m_l8.put("iousness", "A");    m_l8.put("izations", "A");    m_l8.put("lessness", "A");    m_l7 = new HashMap();    m_l7.put("ability", "A");    m_l7.put("aically", "A");    m_l7.put("alistic", "B");    m_l7.put("alities", "A");    m_l7.put("ariness", "E");    m_l7.put("aristic", "A");    m_l7.put("arizing", "A");    m_l7.put("ateness", "A");    m_l7.put("atingly", "A");    m_l7.put("ational", "B");    m_l7.put("atively", "A");    m_l7.put("ativism", "A");    m_l7.put("elihood", "E");    m_l7.put("encible", "A");    m_l7.put("entally", "A");    m_l7.put("entials", "A");    m_l7.put("entiate", "A");    m_l7.put("entness", "A");    m_l7.put("fulness", "A");    m_l7.put("ibility", "A");    m_l7.put("icalism", "A");    m_l7.put("icalist", "A");    m_l7.put("icality", "A");    m_l7.put("icalize", "A");    m_l7.put("ication", "G");    m_l7.put("icianry", "A");    m_l7.put("ination", "A");    m_l7.put("ingness", "A");    m_l7.put("ionally", "A");    m_l7.put("isation", "A");    m_l7.put("ishness", "A");    m_l7.put("istical", "A");    m_l7.put("iteness", "A");    m_l7.put("iveness", "A");    m_l7.put("ivistic", "A");    m_l7.put("ivities", "A");    m_l7.put("ization", "F");    m_l7.put("izement", "A");    m_l7.put("oidally", "A");    m_l7.put("ousness", "A");    m_l6 = new HashMap();    m_l6.put("aceous", "A");    m_l6.put("acious", "B");    m_l6.put("action", "G");    m_l6.put("alness", "A");    m_l6.put("ancial", "A");    m_l6.put("ancies", "A");    m_l6.put("ancing", "B");    m_l6.put("ariser", "A");    m_l6.put("arized", "A");    m_l6.put("arizer", "A");    m_l6.put("atable", "A");    m_l6.put("ations", "B");    m_l6.put("atives", "A");    m_l6.put("eature", "Z");    m_l6.put("efully", "A");    m_l6.put("encies", "A");    m_l6.put("encing", "A");    m_l6.put("ential", "A");    m_l6.put("enting", "C");    m_l6.put("entist", "A");    m_l6.put("eously", "A");    m_l6.put("ialist", "A");    m_l6.put("iality", "A");    m_l6.put("ialize", "A");    m_l6.put("ically", "A");    m_l6.put("icance", "A");    m_l6.put("icians", "A");    m_l6.put("icists", "A");    m_l6.put("ifully", "A");    m_l6.put("ionals", "A");    m_l6.put("ionate", "D");    m_l6.put("ioning", "A");    m_l6.put("ionist", "A");    m_l6.put("iously", "A");    m_l6.put("istics", "A");    m_l6.put("izable", "E");    m_l6.put("lessly", "A");    m_l6.put("nesses", "A");    m_l6.put("oidism", "A");    m_l5 = new HashMap();    m_l5.put("acies", "A");    m_l5.put("acity", "A");    m_l5.put("aging", "B");    m_l5.put("aical", "A");    if (!m_CompMode) {      m_l5.put("alist", "A");    }    m_l5.put("alism", "B");    m_l5.put("ality", "A");    m_l5.put("alize", "A");    m_l5.put("allic", "b");    m_l5.put("anced", "B");    m_l5.put("ances", "B");    m_l5.put("antic", "C");    m_l5.put("arial", "A");    m_l5.put("aries", "A");    m_l5.put("arily", "A");    m_l5.put("arity", "B");    m_l5.put("arize", "A");    m_l5.put("aroid", "A");    m_l5.put("ately", "A");    m_l5.put("ating", "I");    m_l5.put("ation", "B");    m_l5.put("ative", "A");    m_l5.put("ators", "A");    m_l5.put("atory", "A");    m_l5.put("ature", "E");    m_l5.put("early", "Y");    m_l5.put("ehood", "A");    m_l5.put("eless", "A");    if (!m_CompMode) {      m_l5.put("elily", "A");    } else {      m_l5.put("elity", "A");    }    m_l5.put("ement", "A");    m_l5.put("enced", "A");    m_l5.put("ences", "A");    m_l5.put("eness", "E");    m_l5.put("ening", "E");    m_l5.put("ental", "A");    m_l5.put("ented", "C");    m_l5.put("ently", "A");    m_l5.put("fully", "A");    m_l5.put("ially", "A");    m_l5.put("icant", "A");    m_l5.put("ician", "A");    m_l5.put("icide", "A");    m_l5.put("icism", "A");    m_l5.put("icist", "A");    m_l5.put("icity", "A");    m_l5.put("idine", "I");    m_l5.put("iedly", "A");    m_l5.put("ihood", "A");    m_l5.put("inate", "A");    m_l5.put("iness", "A");    m_l5.put("ingly", "B");    m_l5.put("inism", "J");    m_l5.put("inity", "c");    m_l5.put("ional", "A");    m_l5.put("ioned", "A");    m_l5.put("ished", "A");    m_l5.put("istic", "A");    m_l5.put("ities", "A");    m_l5.put("itous", "A");    m_l5.put("ively", "A");    m_l5.put("ivity", "A");    m_l5.put("izers", "F");    m_l5.put("izing", "F");    m_l5.put("oidal", "A");    m_l5.put("oides", "A");    m_l5.put("otide", "A");    m_l5.put("ously", "A");    m_l4 = new HashMap();    m_l4.put("able", "A");    m_l4.put("ably", "A");    m_l4.put("ages", "B");    m_l4.put("ally", "B");    m_l4.put("ance", "B");    m_l4.put("ancy", "B");    m_l4.put("ants", "B");    m_l4.put("aric", "A");    m_l4.put("arly", "K");    m_l4.put("ated", "I");    m_l4.put("ates", "A");    m_l4.put("atic", "B");    m_l4.put("ator", "A");    m_l4.put("ealy", "Y");    m_l4.put("edly", "E");    m_l4.put("eful", "A");    m_l4.put("eity", "A");    m_l4.put("ence", "A");    m_l4.put("ency", "A");    m_l4.put("ened", "E");    m_l4.put("enly", "E");    m_l4.put("eous", "A");    m_l4.put("hood", "A");    m_l4.put("ials", "A");    m_l4.put("ians", "A");    m_l4.put("ible", "A");    m_l4.put("ibly", "A");    m_l4.put("ical", "A");    m_l4.put("ides", "L");    m_l4.put("iers", "A");    m_l4.put("iful", "A");    m_l4.put("ines", "M");    m_l4.put("ings", "N");    m_l4.put("ions", "B");    m_l4.put("ious", "A");    m_l4.put("isms", "B");    m_l4.put("ists", "A");    m_l4.put("itic", "H");    m_l4.put("ized", "F");    m_l4.put("izer", "F");    m_l4.put("less", "A");    m_l4.put("lily", "A");    m_l4.put("ness", "A");    m_l4.put("ogen", "A");    m_l4.put("ward", "A");    m_l4.put("wise", "A");    m_l4.put("ying", "B");    m_l4.put("yish", "A");    m_l3 = new HashMap();    m_l3.put("acy", "A");    m_l3.put("age", "B");    m_l3.put("aic", "A");    m_l3.put("als", "b");    m_l3.put("ant", "B");    m_l3.put("ars", "O");    m_l3.put("ary", "F");    m_l3.put("ata", "A");    m_l3.put("ate", "A");    m_l3.put("eal", "Y");    m_l3.put("ear", "Y");    m_l3.put("ely", "E");    m_l3.put("ene", "E");    m_l3.put("ent", "C");    m_l3.put("ery", "E");    m_l3.put("ese", "A");    m_l3.put("ful", "A");    m_l3.put("ial", "A");    m_l3.put("ian", "A");    m_l3.put("ics", "A");    m_l3.put("ide", "L");    m_l3.put("ied", "A");    m_l3.put("ier", "A");    m_l3.put("ies", "P");    m_l3.put("ily", "A");    m_l3.put("ine", "M");    m_l3.put("ing", "N");    m_l3.put("ion", "Q");    m_l3.put("ish", "C");    m_l3.put("ism", "B");    m_l3.put("ist", "A");    m_l3.put("ite", "a");    m_l3.put("ity", "A");    m_l3.put("ium", "A");    m_l3.put("ive", "A");    m_l3.put("ize", "F");    m_l3.put("oid", "A");    m_l3.put("one", "R");    m_l3.put("ous", "A");    m_l2 = new HashMap();    m_l2.put("ae", "A");     m_l2.put("al", "b");    m_l2.put("ar", "X");    m_l2.put("as", "B");    m_l2.put("ed", "E");    m_l2.put("en", "F");    m_l2.put("es", "E");    m_l2.put("ia", "A");    m_l2.put("ic", "A");    m_l2.put("is", "A");    m_l2.put("ly", "B");    m_l2.put("on", "S");    m_l2.put("or", "T");    m_l2.put("um", "U");    m_l2.put("us", "V");    m_l2.put("yl", "R");    m_l2.put("s\'", "A");    m_l2.put("\'s", "A");    m_l1 = new HashMap();    m_l1.put("a", "A");    m_l1.put("e", "A");    m_l1.put("i", "A");    m_l1.put("o", "A");    m_l1.put("s", "W");    m_l1.put("y", "B");	  }  /**   * Returns a string describing the stemmer   * @return a description suitable for   *         displaying in the explorer/experimenter gui   */  public String globalInfo() {    return         "A stemmer based on the Lovins stemmer, described here:\n\n"      + getTechnicalInformation().toString();  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.ARTICLE);    result.setValue(Field.AUTHOR, "Julie Beth Lovins");    result.setValue(Field.YEAR, "1968");    result.setValue(Field.TITLE, "Development of a stemming algorithm");    result.setValue(Field.JOURNAL, "Mechanical Translation and Computational Linguistics");    result.setValue(Field.VOLUME, "11");    result.setValue(Field.PAGES, "22-31");    return result;  }  /**   * Finds and removes ending from given word.   *    * @param word	the word to work on   * @return 		the processed word   */  private String removeEnding(String word) {    int length = word.length();    int el = 11;    while (el > 0) {      if (length - el > 1) {        String ending = word.substring(length - el);        String conditionCode = null;        switch (el) {          case 11: conditionCode = (String)m_l11.get(ending);                   break;          case 10: conditionCode = (String)m_l10.get(ending);                   break;           case 9: conditionCode = (String)m_l9.get(ending);                  break;          case 8: conditionCode = (String)m_l8.get(ending);                  break;             case 7: conditionCode = (String)m_l7.get(ending);                  break;             case 6: conditionCode = (String)m_l6.get(ending);                  break;             case 5: conditionCode = (String)m_l5.get(ending);                  break;             case 4: conditionCode = (String)m_l4.get(ending);                  break;             case 3: conditionCode = (String)m_l3.get(ending);                  break;             case 2: conditionCode = (String)m_l2.get(ending);                  break;             case 1: conditionCode = (String)m_l1.get(ending);                  break;             default:        }        if (conditionCode != null) {          switch (conditionCode.charAt(0)) {            case 'A':              return word.substring(0, length - el);            case 'B':              if (length - el > 2) {                return word.substring(0, length - el);              }              break;            case 'C':              if (length - el > 3) {                return word.substring(0, length - el);              }              break;            case 'D':              if (length - el > 4) {                return word.substring(0, length - el);              }              break;            case 'E':              if (word.charAt(length - el - 1) != 'e') {                return word.substring(0, length - el);              }              break;            case 'F':              if ((length - el > 2) &&                  (word.charAt(length - el - 1) != 'e')) {                return word.substring(0, length - el);                  }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -