📄 cwordtokenizer.java
字号:
package org.apache.lucene.analysis.cw;
import java.io.Reader;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.standard.ParseException;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import java.lang.*;
import java.io.*;
import java.util.*;
import java.util.logging.*;
/**
* Title: CWordTokenizer
* Description:
* Company:
*
* @author
* @version 1.0
*
*/
public final class CWordTokenizer extends Tokenizer {
public static segmenter mainsegmenter;
private static Logger logger = Logger.getLogger(CWordTokenizer.class.getName());
public TokenStream tokenizer;
private Reader input;
public CWordTokenizer(Reader in) {
try{
logger.info("Initializing sementer ... ");
mainsegmenter = SegmenterUtils.getSegmenter("zword.obj");
logger.info("sementer initialized. ");
}catch(Exception e){
logger.warning("Failed loading segmenter!");
}
if (mainsegmenter != null){
BufferedReader bin = new BufferedReader(in);
StringBuffer buffer = new StringBuffer();
String dataline;
String processed;
logger.info("reading src inputstream ");
try{
while ((dataline = bin.readLine()) != null){
processed = mainsegmenter.segmentLine(dataline, " ");
logger.info("process line: " + dataline);
logger.info("processed line: " + processed);
buffer.append(processed + "\n");
}
}catch(IOException ioe){
// ignored
}finally{
try{
bin.close();
in.close();
}catch(Exception e){}
}
logger.info("create StandardAnalyzer and pass the job to it ");
input = new StringReader(buffer.toString());
}else{
logger.warning("work w/o segmenter ");
input = in;
}
tokenizer = new CStandardTokenizer(input);
}
public final void close() throws IOException{
tokenizer.close();
}
public final Token next() throws IOException {
return tokenizer.next();
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -