📄 cwordtokenizer.java
字号:
package org.apache.lucene.analysis.cw;import java.io.Reader;import org.apache.lucene.analysis.*;import org.apache.lucene.analysis.Token;import org.apache.lucene.analysis.standard.ParseException;import org.apache.lucene.analysis.standard.StandardTokenizer;import java.lang.*;import java.io.*;import java.util.*;import java.util.logging.*;/** * Title: CWordTokenizer * Description: * Company: * * @author * @version 1.0 * */public final class CWordTokenizer extends Tokenizer { public static segmenter mainsegmenter; private static Logger logger = Logger.getLogger(CWordTokenizer.class.getName()); public TokenStream tokenizer; private Reader input; public CWordTokenizer(Reader in) { try{ logger.info("Initializing sementer ... "); mainsegmenter = SegmenterUtils.getSegmenter("zword.obj"); logger.info("sementer initialized. "); }catch(Exception e){ logger.warning("Failed loading segmenter!"); } if (mainsegmenter != null){ BufferedReader bin = new BufferedReader(in); StringBuffer buffer = new StringBuffer(); String dataline; String processed; logger.info("reading src inputstream "); try{ while ((dataline = bin.readLine()) != null){ processed = mainsegmenter.segmentLine(dataline, " "); logger.info("process line: " + dataline); logger.info("processed line: " + processed); buffer.append(processed + "\n"); } }catch(IOException ioe){ // ignored }finally{ try{ bin.close(); in.close(); }catch(Exception e){} } logger.info("create StandardAnalyzer and pass the job to it "); input = new StringReader(buffer.toString()); }else{ logger.warning("work w/o segmenter "); input = in; } tokenizer = new CStandardTokenizer(input); } public final void close() throws IOException{ tokenizer.close(); } public final Token next() throws IOException { return tokenizer.next(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -