📄 snowballstemmer.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * SnowballStemmer.java * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand * */package weka.core.stemmers;import weka.core.Option;import weka.core.OptionHandler;import weka.core.ClassDiscovery;import weka.core.Utils;import java.lang.reflect.Method;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * A wrapper class for the Snowball stemmers. Only available if the Snowball classes are in the classpath.<br/> * For more information visit these web sites:<br/> * http://www.cs.waikato.ac.nz/~ml/weka/stemmers/<br/> * http://snowball.tartarus.org/<br/> * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S <name> * The name of the snowball stemmer (default 'porter'). * available stemmers: * danish, dutch, english, finnish, french, german, italian, * norwegian, porter, portuguese, russian, spanish, swedish * </pre> * <!-- options-end --> * * @author FracPete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.6 $ */public class SnowballStemmer implements Stemmer, OptionHandler { /** for serialization */ static final long serialVersionUID = -6111170431963015178L; /** the package name for snowball */ public final static String PACKAGE = "org.tartarus.snowball"; /** the package name where the stemmers are located */ public final static String PACKAGE_EXT = PACKAGE + ".ext"; /** the snowball program, all stemmers are derived from */ protected final static String SNOWBALL_PROGRAM = PACKAGE + ".SnowballProgram"; /** whether the snowball stemmers are in the Classpath */ protected static boolean m_Present = false; /** contains the all the found stemmers (language names) */ protected static Vector m_Stemmers; /** the current stemmer */ protected Object m_Stemmer; /** the stem method */ protected transient Method m_StemMethod; /** the setCurrent method */ protected transient Method m_SetCurrentMethod; /** the getCurrent method */ protected transient Method m_GetCurrentMethod; /** check for Snowball statically (needs only to be done once) */ static { checkForSnowball(); loadStemmers(); } /** * initializes the stemmer ("porter") */ public SnowballStemmer() { this("porter"); } /** * initializes the stemmer with the given stemmer * * @param name the name of the stemmer */ public SnowballStemmer(String name) { super(); setStemmer(name); } /** * checks whether Snowball is present in the classpath */ private static void checkForSnowball() { try { Class.forName(SNOWBALL_PROGRAM); m_Present = true; } catch (Exception e) { m_Present = false; } } /** * Returns a string describing the stemmer * @return a description suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "A wrapper class for the Snowball stemmers. Only available if the " + "Snowball classes are in the classpath.\n" + "For more information visit these web sites:\n" + " http://www.cs.waikato.ac.nz/~ml/weka/stemmers/\n" + " http://snowball.tartarus.org/\n"; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector result; result = new Vector(); result.addElement(new Option( "\tThe name of the snowball stemmer (default 'porter').\n" + "\tavailable stemmers:\n" + getStemmerList(65, "\t "), "S", 1, "-S <name>")); return result.elements(); } /** * Parses the options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S <name> * The name of the snowball stemmer (default 'porter'). * available stemmers: * danish, dutch, english, finnish, french, german, italian, * norwegian, porter, portuguese, russian, spanish, swedish * </pre> * <!-- options-end --> * * @param options the options to parse * @throws Exception if parsing fails */ public void setOptions(String[] options) throws Exception { String tmpStr; tmpStr = Utils.getOption('S', options); if (tmpStr.length() != 0) setStemmer(tmpStr); else setStemmer("porter"); } /** * Gets the current settings of the classifier. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { Vector result; result = new Vector(); if (getStemmer() != null) { result.add("-S"); result.add("" + getStemmer()); } return (String[]) result.toArray(new String[result.size()]); } /** * extracts the stemmer name form the classname * * @param classname the full classname of the stemmer * @return the name of the stemmer */ private static String getStemmerName(String classname) { return classname.replaceAll(".*\\.", "").replaceAll("Stemmer$", ""); } /** * returns the full classname of the stemmer * * @param name the name of the stemmer * @return the full classname of the stemmer * @see #PACKAGE_EXT */ private static String getStemmerClassname(String name) { return PACKAGE_EXT + "." + name + "Stemmer"; } /** * retrieves the language names of the availabel stemmers */ private static void loadStemmers() { Vector classnames; int i; m_Stemmers = new Vector(); if (!m_Present) return; classnames = ClassDiscovery.find(SNOWBALL_PROGRAM, PACKAGE_EXT); for (i = 0; i < classnames.size(); i++) m_Stemmers.add(getStemmerName(classnames.get(i).toString())); } /** * returns whether Snowball is present or not, i.e. whether the classes are * in the classpath or not * * @return whether Snowball is available */ public static boolean isPresent() { return m_Present; } /** * returns an enumeration over all currently stored stemmer names * * @return all available stemmers */ public static Enumeration listStemmers() { return m_Stemmers.elements(); } /** * generates a comma list of the available stemmers * * @param lineLength the max line length, before a linefeed is inserted * (0 is unlimited) * @param indention the indention of a line * @return the generated list */ private static String getStemmerList(int lineLength, String indention) { String result; Enumeration enm; String name; String line; result = ""; line = ""; enm = listStemmers(); while (enm.hasMoreElements()) { name = enm.nextElement().toString(); if (line.length() > 0) line += ", "; if ( (lineLength > 0) && (line.length() + name.length() > lineLength) ) { result += indention + line + "\n"; line = ""; } line += name; } if (line.length() > 0) result += indention + line + "\n"; return result; } /** * returns the name of the current stemmer, null if none is set * * @return the name of the stemmer */ public String getStemmer() { if (m_Stemmer == null) return null; else return getStemmerName(m_Stemmer.getClass().getName()); } /** * sets the stemmer with the given name, e.g., "porter" * * @param name the name of the stemmer, e.g., "porter" */ public void setStemmer(String name) { Class snowballClass; Class[] argClasses; if (m_Stemmers.contains(name)) { try { snowballClass = Class.forName(getStemmerClassname(name)); m_Stemmer = snowballClass.newInstance(); // methods argClasses = new Class[0]; m_StemMethod = snowballClass.getMethod("stem", argClasses); argClasses = new Class[1]; argClasses[0] = String.class; m_SetCurrentMethod = snowballClass.getMethod("setCurrent", argClasses); argClasses = new Class[0]; m_GetCurrentMethod = snowballClass.getMethod("getCurrent", argClasses); } catch (Exception e) { System.out.println( "Error initializing stemmer '" + name + "'!" + e.getMessage()); m_Stemmer = null; } } else { System.err.println("Stemmer '" + name + "' unknown!"); m_Stemmer = null; } } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String stemmerTipText() { return "The Snowball stemmer to use, available: " + getStemmerList(0, ""); } /** * Returns the word in its stemmed form. * * @param word the unstemmed word * @return the stemmed word */ public String stem(String word) { String result; Object[] args; if (m_Stemmer == null) { result = new String(word); } else { try { // set word args = new Object[1]; args[0] = word; m_SetCurrentMethod.invoke(m_Stemmer, args); // stem word args = new Object[0]; m_StemMethod.invoke(m_Stemmer, args); // get word args = new Object[0]; result = (String) m_GetCurrentMethod.invoke(m_Stemmer, args); } catch (Exception e) { e.printStackTrace(); result = word; } } return result; } /** * returns a string representation of the stemmer * * @return a string representation of the stemmer */ public String toString() { String result; result = getClass().getName(); result += " " + Utils.joinOptions(getOptions()); return result.trim(); } /** * Runs the stemmer with the given options * * @param args the options */ public static void main(String[] args) { try { Stemming.useStemmer(new SnowballStemmer(), args); } catch (Exception e) { e.printStackTrace(); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -