⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cwordtokenizer.java

📁 一个jsp写的bbs
💻 JAVA
字号:
package org.apache.lucene.analysis.cw;

import java.io.Reader;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.standard.ParseException;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import java.lang.*;
import java.io.*;
import java.util.*;
import java.util.logging.*;

/**
 * Title: CWordTokenizer
 * Description: 
 * Company:
 *
 * @author 
 * @version 1.0
 *
 */

public final class CWordTokenizer extends Tokenizer {
    public static segmenter mainsegmenter;
    private static Logger logger = Logger.getLogger(CWordTokenizer.class.getName());
    public TokenStream tokenizer;
    
    private Reader input;
    
    public CWordTokenizer(Reader in) {
        try{
            logger.info("Initializing sementer ... ");
            mainsegmenter = SegmenterUtils.getSegmenter("zword.obj");
            logger.info("sementer initialized. ");
        }catch(Exception e){
            logger.warning("Failed loading segmenter!");
        }

        
        if (mainsegmenter != null){
            BufferedReader bin = new BufferedReader(in);
            StringBuffer buffer = new StringBuffer();
            String dataline;
            String processed;
            logger.info("reading src inputstream ");
            try{
                while ((dataline = bin.readLine()) != null){
                    processed = mainsegmenter.segmentLine(dataline, " ");
                    logger.info("process line: " + dataline);
                    logger.info("processed line: " + processed);
                    buffer.append(processed + "\n");
                }
    
            }catch(IOException ioe){
                // ignored
            }finally{
                try{
                    bin.close();
                    in.close();
                }catch(Exception e){}
            }
            
            logger.info("create StandardAnalyzer and pass the job to it ");
            input = new StringReader(buffer.toString());
        }else{
             logger.warning("work w/o segmenter ");
             input = in;
        }
        tokenizer = new CStandardTokenizer(input);
    }
    
    public final void close() throws IOException{
        tokenizer.close();
    }

    public final Token next() throws IOException {
        return tokenizer.next();
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -