⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 snowballstemmer.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * SnowballStemmer.java * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand * */package weka.core.stemmers;import weka.core.Option;import weka.core.OptionHandler;import weka.core.ClassDiscovery;import weka.core.Utils;import java.lang.reflect.Method;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * A wrapper class for the Snowball stemmers. Only available if the Snowball classes are in the classpath.<br/> * For more information visit these web sites:<br/> *   http://www.cs.waikato.ac.nz/~ml/weka/stemmers/<br/> *   http://snowball.tartarus.org/<br/> * <p/> <!-- globalinfo-end --> *  <!-- options-start --> * Valid options are: <p/> *  * <pre> -S &lt;name&gt; *  The name of the snowball stemmer (default 'porter'). *  available stemmers: *     danish, dutch, english, finnish, french, german, italian,  *     norwegian, porter, portuguese, russian, spanish, swedish * </pre> *  <!-- options-end --> * * @author    FracPete (fracpete at waikato dot ac dot nz) * @version   $Revision: 1.6 $ */public class SnowballStemmer   implements Stemmer, OptionHandler {    /** for serialization */  static final long serialVersionUID = -6111170431963015178L;    /** the package name for snowball */  public final static String PACKAGE = "org.tartarus.snowball";    /** the package name where the stemmers are located */  public final static String PACKAGE_EXT = PACKAGE + ".ext";  /** the snowball program, all stemmers are derived from */  protected final static String SNOWBALL_PROGRAM = PACKAGE + ".SnowballProgram";    /** whether the snowball stemmers are in the Classpath */  protected static boolean m_Present = false;  /** contains the all the found stemmers (language names) */  protected static Vector m_Stemmers;  /** the current stemmer */  protected Object m_Stemmer;  /** the stem method */  protected transient Method m_StemMethod;  /** the setCurrent method */  protected transient Method m_SetCurrentMethod;  /** the getCurrent method */  protected transient Method m_GetCurrentMethod;     /** check for Snowball statically (needs only to be done once) */  static {    checkForSnowball();    loadStemmers();  }  /**   * initializes the stemmer ("porter")   */  public SnowballStemmer() {    this("porter");  }  /**   * initializes the stemmer with the given stemmer   *   * @param name        the name of the stemmer   */  public SnowballStemmer(String name) {    super();          setStemmer(name);  }  /**   * checks whether Snowball is present in the classpath   */  private static void checkForSnowball() {    try {      Class.forName(SNOWBALL_PROGRAM);      m_Present = true;    }    catch (Exception e) {      m_Present = false;    }  }  /**   * Returns a string describing the stemmer   * @return a description suitable for   *         displaying in the explorer/experimenter gui   */  public String globalInfo() {    return         "A wrapper class for the Snowball stemmers. Only available if the "      + "Snowball classes are in the classpath.\n"      + "For more information visit these web sites:\n"      + "  http://www.cs.waikato.ac.nz/~ml/weka/stemmers/\n"      + "  http://snowball.tartarus.org/\n";  }    /**   * Returns an enumeration describing the available options.   *   * @return an enumeration of all the available options.   */  public Enumeration listOptions() {    Vector        result;        result = new Vector();        result.addElement(new Option(        "\tThe name of the snowball stemmer (default 'porter').\n"        + "\tavailable stemmers:\n"         + getStemmerList(65, "\t   "),        "S", 1, "-S <name>"));        return result.elements();  }    /**   * Parses the options. <p/>   *   <!-- options-start -->   * Valid options are: <p/>   *    * <pre> -S &lt;name&gt;   *  The name of the snowball stemmer (default 'porter').   *  available stemmers:   *     danish, dutch, english, finnish, french, german, italian,    *     norwegian, porter, portuguese, russian, spanish, swedish   * </pre>   *    <!-- options-end -->   *   * @param options	the options to parse   * @throws Exception 	if parsing fails   */  public void setOptions(String[] options) throws Exception {    String      tmpStr;        tmpStr = Utils.getOption('S', options);    if (tmpStr.length() != 0)      setStemmer(tmpStr);    else      setStemmer("porter");  }    /**   * Gets the current settings of the classifier.   *   * @return an array of strings suitable for passing to setOptions   */  public String[] getOptions() {    Vector        result;        result  = new Vector();        if (getStemmer() != null) {      result.add("-S");      result.add("" + getStemmer());    }        return (String[]) result.toArray(new String[result.size()]);  }  /**   * extracts the stemmer name form the classname   *    * @param classname     the full classname of the stemmer   * @return              the name of the stemmer   */  private static String getStemmerName(String classname) {    return classname.replaceAll(".*\\.", "").replaceAll("Stemmer$", "");  }  /**   * returns the full classname of the stemmer   *   * @param name          the name of the stemmer   * @return              the full classname of the stemmer   * @see                 #PACKAGE_EXT   */  private static String getStemmerClassname(String name) {    return PACKAGE_EXT + "." + name + "Stemmer";  }  /**   * retrieves the language names of the availabel stemmers   */  private static void loadStemmers() {    Vector        classnames;    int           i;        m_Stemmers = new Vector();        if (!m_Present)      return;    classnames = ClassDiscovery.find(SNOWBALL_PROGRAM, PACKAGE_EXT);    for (i = 0; i < classnames.size(); i++)      m_Stemmers.add(getStemmerName(classnames.get(i).toString()));  }  /**   * returns whether Snowball is present or not, i.e. whether the classes are   * in the classpath or not   *   * @return whether Snowball is available   */  public static boolean isPresent() {    return m_Present;  }  /**   * returns an enumeration over all currently stored stemmer names   *    * @return all available stemmers   */  public static Enumeration listStemmers() {    return m_Stemmers.elements();  }  /**   * generates a comma list of the available stemmers   *    * @param lineLength    the max line length, before a linefeed is inserted   *                      (0 is unlimited)   * @param indention     the indention of a line   * @return              the generated list   */  private static String getStemmerList(int lineLength, String indention) {    String        result;    Enumeration   enm;    String        name;    String        line;        result = "";    line   = "";    enm    = listStemmers();    while (enm.hasMoreElements()) {      name = enm.nextElement().toString();      if (line.length() > 0)        line += ", ";      if ( (lineLength > 0) && (line.length() + name.length() > lineLength) ) {        result += indention + line + "\n";        line    = "";      }      line += name;    }    if (line.length() > 0)      result += indention + line + "\n";        return result;  }  /**   * returns the name of the current stemmer, null if none is set   *    * @return the name of the stemmer   */  public String getStemmer() {    if (m_Stemmer == null)      return null;    else      return getStemmerName(m_Stemmer.getClass().getName());  }  /**   * sets the stemmer with the given name, e.g., "porter"   *   * @param name        the name of the stemmer, e.g., "porter"   */  public void setStemmer(String name) {    Class       snowballClass;    Class[]     argClasses;        if (m_Stemmers.contains(name)) {      try {        snowballClass = Class.forName(getStemmerClassname(name));        m_Stemmer     = snowballClass.newInstance();        // methods        argClasses         = new Class[0];        m_StemMethod       = snowballClass.getMethod("stem", argClasses);                argClasses         = new Class[1];        argClasses[0]      = String.class;        m_SetCurrentMethod = snowballClass.getMethod("setCurrent", argClasses);                argClasses         = new Class[0];        m_GetCurrentMethod = snowballClass.getMethod("getCurrent", argClasses);      }      catch (Exception e) {        System.out.println(              "Error initializing stemmer '" + name + "'!"            + e.getMessage());        m_Stemmer = null;      }    }    else {      System.err.println("Stemmer '" + name + "' unknown!");      m_Stemmer = null;    }  }  /**   * Returns the tip text for this property.   *   * @return tip text for this property suitable for   * displaying in the explorer/experimenter gui   */  public String stemmerTipText() {    return "The Snowball stemmer to use, available: " + getStemmerList(0, "");  }  /**   * Returns the word in its stemmed form.   *   * @param word      the unstemmed word   * @return          the stemmed word   */  public String stem(String word) {    String      result;    Object[]    args;        if (m_Stemmer == null) {      result = new String(word);    }    else {      try {        // set word        args    = new Object[1];        args[0] = word;        m_SetCurrentMethod.invoke(m_Stemmer, args);        // stem word        args = new Object[0];        m_StemMethod.invoke(m_Stemmer, args);        // get word        args   = new Object[0];        result = (String) m_GetCurrentMethod.invoke(m_Stemmer, args);      }      catch (Exception e) {        e.printStackTrace();        result = word;      }    }          return result;  }  /**   * returns a string representation of the stemmer   *    * @return a string representation of the stemmer   */  public String toString() {    String      result;    result  = getClass().getName();    result += " " + Utils.joinOptions(getOptions());    return result.trim();  }  /**   * Runs the stemmer with the given options   *   * @param args      the options   */  public static void main(String[] args) {    try {      Stemming.useStemmer(new SnowballStemmer(), args);    }    catch (Exception e) {      e.printStackTrace();    }  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -