📄 stopwords.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * Stopwords.java * Copyright (C) 2001 Eibe Frank */package weka.core;import java.util.*;/** * Class that can test whether a given string is a stop word. * Lowercases all words before the test. * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Ashraf M. Kibriya (amk14@cs.waikato.ac.nz) * @version $Revision: 1.2 $ */public class Stopwords { /** The hashtable containing the list of stopwords */ private static Hashtable m_Stopwords = null; static { if (m_Stopwords == null) { m_Stopwords = new Hashtable(); Double dummy = new Double(0); //Stopwords list from Rainbow m_Stopwords.put("a", dummy); m_Stopwords.put("able", dummy); m_Stopwords.put("about", dummy); m_Stopwords.put("above", dummy); m_Stopwords.put("according", dummy); m_Stopwords.put("accordingly", dummy); m_Stopwords.put("across", dummy); m_Stopwords.put("actually", dummy); m_Stopwords.put("after", dummy); m_Stopwords.put("afterwards", dummy); m_Stopwords.put("again", dummy); m_Stopwords.put("against", dummy); m_Stopwords.put("all", dummy); m_Stopwords.put("allow", dummy); m_Stopwords.put("allows", dummy); m_Stopwords.put("almost", dummy); m_Stopwords.put("alone", dummy); m_Stopwords.put("along", dummy); m_Stopwords.put("already", dummy); m_Stopwords.put("also", dummy); m_Stopwords.put("although", dummy); m_Stopwords.put("always", dummy); m_Stopwords.put("am", dummy); m_Stopwords.put("among", dummy); m_Stopwords.put("amongst", dummy); m_Stopwords.put("an", dummy); m_Stopwords.put("and", dummy); m_Stopwords.put("another", dummy); m_Stopwords.put("any", dummy); m_Stopwords.put("anybody", dummy); m_Stopwords.put("anyhow", dummy); m_Stopwords.put("anyone", dummy); m_Stopwords.put("anything", dummy); m_Stopwords.put("anyway", dummy); m_Stopwords.put("anyways", dummy); m_Stopwords.put("anywhere", dummy); m_Stopwords.put("apart", dummy); m_Stopwords.put("appear", dummy); m_Stopwords.put("appreciate", dummy); m_Stopwords.put("appropriate", dummy); m_Stopwords.put("are", dummy); m_Stopwords.put("around", dummy); m_Stopwords.put("as", dummy); m_Stopwords.put("aside", dummy); m_Stopwords.put("ask", dummy); m_Stopwords.put("asking", dummy); m_Stopwords.put("associated", dummy); m_Stopwords.put("at", dummy); m_Stopwords.put("available", dummy); m_Stopwords.put("away", dummy); m_Stopwords.put("awfully", dummy); m_Stopwords.put("b", dummy); m_Stopwords.put("be", dummy); m_Stopwords.put("became", dummy); m_Stopwords.put("because", dummy); m_Stopwords.put("become", dummy); m_Stopwords.put("becomes", dummy); m_Stopwords.put("becoming", dummy); m_Stopwords.put("been", dummy); m_Stopwords.put("before", dummy); m_Stopwords.put("beforehand", dummy); m_Stopwords.put("behind", dummy); m_Stopwords.put("being", dummy); m_Stopwords.put("believe", dummy); m_Stopwords.put("below", dummy); m_Stopwords.put("beside", dummy); m_Stopwords.put("besides", dummy); m_Stopwords.put("best", dummy); m_Stopwords.put("better", dummy); m_Stopwords.put("between", dummy); m_Stopwords.put("beyond", dummy); m_Stopwords.put("both", dummy); m_Stopwords.put("brief", dummy); m_Stopwords.put("but", dummy); m_Stopwords.put("by", dummy); m_Stopwords.put("c", dummy); m_Stopwords.put("came", dummy); m_Stopwords.put("can", dummy); m_Stopwords.put("cannot", dummy); m_Stopwords.put("cant", dummy); m_Stopwords.put("cause", dummy); m_Stopwords.put("causes", dummy); m_Stopwords.put("certain", dummy); m_Stopwords.put("certainly", dummy); m_Stopwords.put("changes", dummy); m_Stopwords.put("clearly", dummy); m_Stopwords.put("co", dummy); m_Stopwords.put("com", dummy); m_Stopwords.put("come", dummy); m_Stopwords.put("comes", dummy); m_Stopwords.put("concerning", dummy); m_Stopwords.put("consequently", dummy); m_Stopwords.put("consider", dummy); m_Stopwords.put("considering", dummy); m_Stopwords.put("contain", dummy); m_Stopwords.put("containing", dummy); m_Stopwords.put("contains", dummy); m_Stopwords.put("corresponding", dummy); m_Stopwords.put("could", dummy); m_Stopwords.put("course", dummy); m_Stopwords.put("currently", dummy); m_Stopwords.put("d", dummy); m_Stopwords.put("definitely", dummy); m_Stopwords.put("described", dummy); m_Stopwords.put("despite", dummy); m_Stopwords.put("did", dummy); m_Stopwords.put("different", dummy); m_Stopwords.put("do", dummy); m_Stopwords.put("does", dummy); m_Stopwords.put("doing", dummy); m_Stopwords.put("done", dummy); m_Stopwords.put("down", dummy); m_Stopwords.put("downwards", dummy); m_Stopwords.put("during", dummy); m_Stopwords.put("e", dummy); m_Stopwords.put("each", dummy); m_Stopwords.put("edu", dummy); m_Stopwords.put("eg", dummy); m_Stopwords.put("eight", dummy); m_Stopwords.put("either", dummy); m_Stopwords.put("else", dummy); m_Stopwords.put("elsewhere", dummy); m_Stopwords.put("enough", dummy); m_Stopwords.put("entirely", dummy); m_Stopwords.put("especially", dummy); m_Stopwords.put("et", dummy); m_Stopwords.put("etc", dummy); m_Stopwords.put("even", dummy); m_Stopwords.put("ever", dummy); m_Stopwords.put("every", dummy); m_Stopwords.put("everybody", dummy); m_Stopwords.put("everyone", dummy); m_Stopwords.put("everything", dummy); m_Stopwords.put("everywhere", dummy); m_Stopwords.put("ex", dummy); m_Stopwords.put("exactly", dummy); m_Stopwords.put("example", dummy); m_Stopwords.put("except", dummy); m_Stopwords.put("f", dummy); m_Stopwords.put("far", dummy); m_Stopwords.put("few", dummy); m_Stopwords.put("fifth", dummy); m_Stopwords.put("first", dummy); m_Stopwords.put("five", dummy); m_Stopwords.put("followed", dummy); m_Stopwords.put("following", dummy); m_Stopwords.put("follows", dummy); m_Stopwords.put("for", dummy); m_Stopwords.put("former", dummy); m_Stopwords.put("formerly", dummy); m_Stopwords.put("forth", dummy); m_Stopwords.put("four", dummy); m_Stopwords.put("from", dummy); m_Stopwords.put("further", dummy); m_Stopwords.put("furthermore", dummy); m_Stopwords.put("g", dummy); m_Stopwords.put("get", dummy); m_Stopwords.put("gets", dummy); m_Stopwords.put("getting", dummy); m_Stopwords.put("given", dummy); m_Stopwords.put("gives", dummy); m_Stopwords.put("go", dummy); m_Stopwords.put("goes", dummy); m_Stopwords.put("going", dummy); m_Stopwords.put("gone", dummy); m_Stopwords.put("got", dummy); m_Stopwords.put("gotten", dummy); m_Stopwords.put("greetings", dummy); m_Stopwords.put("h", dummy); m_Stopwords.put("had", dummy); m_Stopwords.put("happens", dummy); m_Stopwords.put("hardly", dummy); m_Stopwords.put("has", dummy); m_Stopwords.put("have", dummy); m_Stopwords.put("having", dummy); m_Stopwords.put("he", dummy); m_Stopwords.put("hello", dummy); m_Stopwords.put("help", dummy); m_Stopwords.put("hence", dummy); m_Stopwords.put("her", dummy); m_Stopwords.put("here", dummy); m_Stopwords.put("hereafter", dummy); m_Stopwords.put("hereby", dummy); m_Stopwords.put("herein", dummy); m_Stopwords.put("hereupon", dummy); m_Stopwords.put("hers", dummy); m_Stopwords.put("herself", dummy); m_Stopwords.put("hi", dummy); m_Stopwords.put("him", dummy); m_Stopwords.put("himself", dummy); m_Stopwords.put("his", dummy); m_Stopwords.put("hither", dummy); m_Stopwords.put("hopefully", dummy); m_Stopwords.put("how", dummy); m_Stopwords.put("howbeit", dummy); m_Stopwords.put("however", dummy); m_Stopwords.put("i", dummy); m_Stopwords.put("ie", dummy); m_Stopwords.put("if", dummy); m_Stopwords.put("ignored", dummy); m_Stopwords.put("immediate", dummy); m_Stopwords.put("in", dummy); m_Stopwords.put("inasmuch", dummy); m_Stopwords.put("inc", dummy); m_Stopwords.put("indeed", dummy); m_Stopwords.put("indicate", dummy); m_Stopwords.put("indicated", dummy); m_Stopwords.put("indicates", dummy); m_Stopwords.put("inner", dummy); m_Stopwords.put("insofar", dummy); m_Stopwords.put("instead", dummy); m_Stopwords.put("into", dummy); m_Stopwords.put("inward", dummy); m_Stopwords.put("is", dummy); m_Stopwords.put("it", dummy); m_Stopwords.put("its", dummy); m_Stopwords.put("itself", dummy); m_Stopwords.put("j", dummy); m_Stopwords.put("just", dummy); m_Stopwords.put("k", dummy); m_Stopwords.put("keep", dummy); m_Stopwords.put("keeps", dummy); m_Stopwords.put("kept", dummy); m_Stopwords.put("know", dummy); m_Stopwords.put("knows", dummy); m_Stopwords.put("known", dummy); m_Stopwords.put("l", dummy); m_Stopwords.put("last", dummy); m_Stopwords.put("lately", dummy); m_Stopwords.put("later", dummy); m_Stopwords.put("latter", dummy); m_Stopwords.put("latterly", dummy); m_Stopwords.put("least", dummy); m_Stopwords.put("less", dummy); m_Stopwords.put("lest", dummy); m_Stopwords.put("let", dummy); m_Stopwords.put("like", dummy); m_Stopwords.put("liked", dummy); m_Stopwords.put("likely", dummy); m_Stopwords.put("little", dummy); m_Stopwords.put("ll", dummy); //added to avoid words like you'll,I'll etc. m_Stopwords.put("look", dummy); m_Stopwords.put("looking", dummy); m_Stopwords.put("looks", dummy); m_Stopwords.put("ltd", dummy); m_Stopwords.put("m", dummy); m_Stopwords.put("mainly", dummy);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -