📄 matchchineseanalyzer.java
字号:
/*
* @作者:Hades , 创建日期:2007-1-25
*
* 汕头大学03计算机本科
*
*/
package edu.stu.cn.lucene.analysis;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import edu.stu.cn.segment.matching.processor.SegmentProcessorImpl;
/**
* @author Hades Guan 基于正向最大匹配的Lucene分析器
*/
public class MatchChineseAnalyzer extends Analyzer
{
/**
* 一些不常用于搜索的词汇
*/
public final static String[] STOP_WORDS =
{ "a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in",
"into", "is", "it", "no", "not", "of", "on", "or", "s", "such",
"t", "that", "the", "their", "then", "there", "these", "they",
"this", "to", "was", "will", "with", "", "www" };
/**
* 分词构件
*/
private SegmentProcessorImpl processor = null;
/**
* 分隔符列表
*/
private Set stopTable;
/**
* 使用默认分隔符的构造函数
*
* @param processor
* 需要使用分词构件
*/
public MatchChineseAnalyzer(SegmentProcessorImpl processor)
{
this.processor = processor;
this.stopTable = StopWordMaker.retreive();
}
/**
* 使用指定分隔符的构造函数
*
* @param processor
* 需要使用分词构件
* @param stopWords
* 指定的分隔符
*/
public MatchChineseAnalyzer(SegmentProcessorImpl processor,
String[] stopWords)
{
this.processor = processor;
this.stopTable = StopFilter.makeStopSet(stopWords);
}
/**
* @return 返回 processor。
*/
public SegmentProcessorImpl getProcessor()
{
return processor;
}
/**
* @param processor
* 要设置的 processor。
*/
public void setProcessor(SegmentProcessorImpl processor)
{
this.processor = processor;
}
/*
* (非 Javadoc)
*
* @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String,
* java.io.Reader)
*/
@Override
public TokenStream tokenStream(String fieldName, Reader reader)
{
return new StopFilter(
new MatchChineseTokenizer(this.processor, reader),
this.stopTable);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -