📄 mmchineseanalyzer.java
字号:
/*
* MMChineseAnalyzer.java
*
* Created on 2007年1月4日, 上午1:49
*
* To change this template, choose Tools | Template Manager
* and open the template in the editor.
*/
package util.word;
import java.io.Reader;
import java.util.Set;
/*
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
*/
/**
*
* @author JinfengLee
*/
public class MMChineseAnalyzer{
public final static String[] STOP_WORDS = {"我","个","的","一","不","在","有","是","为","以","于","而","之","来","及","了","因","可","多","您",
"到","由","这","与","也","此","但","并","其","已","无","们","起","最","再","去","只","又","或","很","亦","某","把","那","吗","吧","给","少",
"你","乃","它","成","和","等","却","对","被","都","还","呢","叫","使","每", " ","<",">","©","|","[","]","(",")",":","-",",",".","/","+","=",
"\"","?","!","《","》","0","1","2","3","4","5","6","7","8","9"};
private Set stopTable;
protected MMChineseTokenizer mmtk;
public MMChineseAnalyzer(String dictionaryPath) {
stopTable = StopFilter.makeStopSet(STOP_WORDS);
mmtk = new MMChineseTokenizer(dictionaryPath);
}
public TokenStream tokenStream(Reader reader) {
mmtk.setReader(reader);
return new StopFilter(mmtk, stopTable);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -