📄 stopwords.java
字号:
add("relatively"); add("respectively"); add("right"); add("s"); add("said"); add("same"); add("saw"); add("say"); add("saying"); add("says"); add("second"); add("secondly"); add("see"); add("seeing"); add("seem"); add("seemed"); add("seeming"); add("seems"); add("seen"); add("self"); add("selves"); add("sensible"); add("sent"); add("serious"); add("seriously"); add("seven"); add("several"); add("shall"); add("she"); add("should"); add("since"); add("six"); add("so"); add("some"); add("somebody"); add("somehow"); add("someone"); add("something"); add("sometime"); add("sometimes"); add("somewhat"); add("somewhere"); add("soon"); add("sorry"); add("specified"); add("specify"); add("specifying"); add("still"); add("sub"); add("such"); add("sup"); add("sure"); add("t"); add("take"); add("taken"); add("tell"); add("tends"); add("th"); add("than"); add("thank"); add("thanks"); add("thanx"); add("that"); add("thats"); add("the"); add("their"); add("theirs"); add("them"); add("themselves"); add("then"); add("thence"); add("there"); add("thereafter"); add("thereby"); add("therefore"); add("therein"); add("theres"); add("thereupon"); add("these"); add("they"); add("think"); add("third"); add("this"); add("thorough"); add("thoroughly"); add("those"); add("though"); add("three"); add("through"); add("throughout"); add("thru"); add("thus"); add("to"); add("together"); add("too"); add("took"); add("toward"); add("towards"); add("tried"); add("tries"); add("truly"); add("try"); add("trying"); add("twice"); add("two"); add("u"); add("un"); add("under"); add("unfortunately"); add("unless"); add("unlikely"); add("until"); add("unto"); add("up"); add("upon"); add("us"); add("use"); add("used"); add("useful"); add("uses"); add("using"); add("usually"); add("uucp"); add("v"); add("value"); add("various"); add("ve"); //added to avoid words like I've,you've etc. add("very"); add("via"); add("viz"); add("vs"); add("w"); add("want"); add("wants"); add("was"); add("way"); add("we"); add("welcome"); add("well"); add("went"); add("were"); add("what"); add("whatever"); add("when"); add("whence"); add("whenever"); add("where"); add("whereafter"); add("whereas"); add("whereby"); add("wherein"); add("whereupon"); add("wherever"); add("whether"); add("which"); add("while"); add("whither"); add("who"); add("whoever"); add("whole"); add("whom"); add("whose"); add("why"); add("will"); add("willing"); add("wish"); add("with"); add("within"); add("without"); add("wonder"); add("would"); add("would"); add("x"); add("y"); add("yes"); add("yet"); add("you"); add("your"); add("yours"); add("yourself"); add("yourselves"); add("z"); add("zero"); } /** * removes all stopwords */ public void clear() { m_Words.clear(); } /** * adds the given word to the stopword list (is automatically converted to * lower case and trimmed) * * @param word the word to add */ public void add(String word) { if (word.trim().length() > 0) m_Words.add(word.trim().toLowerCase()); } /** * removes the word from the stopword list * * @param word the word to remove * @return true if the word was found in the list and then removed */ public boolean remove(String word) { return m_Words.remove(word); } /** * Returns true if the given string is a stop word. * * @param word the word to test * @return true if the word is a stopword */ public boolean is(String word) { return m_Words.contains(word.toLowerCase()); } /** * Returns a sorted enumeration over all stored stopwords * * @return the enumeration over all stopwords */ public Enumeration elements() { Iterator iter; Vector list; iter = m_Words.iterator(); list = new Vector(); while (iter.hasNext()) list.add(iter.next()); // sort list Collections.sort(list); return list.elements(); } /** * Generates a new Stopwords object from the given file * * @param filename the file to read the stopwords from * @throws Exception if reading fails */ public void read(String filename) throws Exception { read(new File(filename)); } /** * Generates a new Stopwords object from the given file * * @param file the file to read the stopwords from * @throws Exception if reading fails */ public void read(File file) throws Exception { read(new BufferedReader(new FileReader(file))); } /** * Generates a new Stopwords object from the reader. The reader is * closed automatically. * * @param reader the reader to get the stopwords from * @throws Exception if reading fails */ public void read(BufferedReader reader) throws Exception { String line; clear(); while ((line = reader.readLine()) != null) { line = line.trim(); // comment? if (line.startsWith("#")) continue; add(line); } reader.close(); } /** * Writes the current stopwords to the given file * * @param filename the file to write the stopwords to * @throws Exception if writing fails */ public void write(String filename) throws Exception { write(new File(filename)); } /** * Writes the current stopwords to the given file * * @param file the file to write the stopwords to * @throws Exception if writing fails */ public void write(File file) throws Exception { write(new BufferedWriter(new FileWriter(file))); } /** * Writes the current stopwords to the given writer. The writer is closed * automatically. * * @param writer the writer to get the stopwords from * @throws Exception if writing fails */ public void write(BufferedWriter writer) throws Exception { Enumeration enm; // header writer.write("# generated " + new Date()); writer.newLine(); enm = elements(); while (enm.hasMoreElements()) { writer.write(enm.nextElement().toString()); writer.newLine(); } writer.flush(); writer.close(); } /** * returns the current stopwords in a string * * @return the current stopwords */ public String toString() { Enumeration enm; StringBuffer result; result = new StringBuffer(); enm = elements(); while (enm.hasMoreElements()) { result.append(enm.nextElement().toString()); if (enm.hasMoreElements()) result.append(","); } return result.toString(); } /** * Returns true if the given string is a stop word. * * @param str the word to test * @return true if the word is a stopword */ public static boolean isStopword(String str) { return m_Stopwords.is(str.toLowerCase()); } /** * Accepts the following parameter: <p/> * * -i file <br/> * loads the stopwords from the given file <p/> * * -o file <br/> * saves the stopwords to the given file <p/> * * -p <br/> * outputs the current stopwords on stdout <p/> * * Any additional parameters are interpreted as words to test as stopwords. * * @param args commandline parameters * @throws Exception if something goes wrong */ public static void main(String[] args) throws Exception { String input = Utils.getOption('i', args); String output = Utils.getOption('o', args); boolean print = Utils.getFlag('p', args); // words to process? Vector words = new Vector(); for (int i = 0; i < args.length; i++) { if (args[i].trim().length() > 0) words.add(args[i].trim()); } Stopwords stopwords = new Stopwords(); // load from file? if (input.length() != 0) stopwords.read(input); // write to file? if (output.length() != 0) stopwords.write(output); // output to stdout? if (print) { System.out.println("\nStopwords:"); Enumeration enm = stopwords.elements(); int i = 0; while (enm.hasMoreElements()) { System.out.println((i+1) + ". " + enm.nextElement()); i++; } } // check words for being a stopword if (words.size() > 0) { System.out.println("\nChecking for stopwords:"); for (int i = 0; i < words.size(); i++) { System.out.println( (i+1) + ". " + words.get(i) + ": " + stopwords.is(words.get(i).toString())); } } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -