⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trecfilereader.java

📁 dragontoolkit用于机器学习
💻 JAVA
字号:
package dragon.onlinedb.trec;

import dragon.onlinedb.*;
import dragon.util.*;
import java.io.*;
/**
 * <p>TREC data file reader</p>
 * <p> </p>
 * <p>Copyright: Copyright (c) 2005</p>
 * <p>Company: IST, Drexel University</p>
 * @author Davis Zhou
 * @version 1.0
 */

public class TrecFileReader implements CollectionReader{
    private ArticleParser parser;
    private BufferedReader reader;
    private StringBuffer sb;
    private char[] buf;
    private String filename;
    private File colFile;
    private long curArticleOffset, deletedBytes;
    private int curArticleLength;
    private boolean done;

    public TrecFileReader(ArticleParser parser){
        this(null,parser);
    }

    public TrecFileReader(File colFile, ArticleParser parser) {
        this.parser =parser;
        buf = new char[10240];
        loadCollection(colFile);
    }

    public boolean loadFile(String colFile){
        return loadCollection(new File(colFile));
    }

    public boolean loadCollection(File colFile){
        try{
            deletedBytes=0;
            curArticleOffset=-1;
            curArticleLength=0;
            this.colFile =colFile;

            if(colFile==null || !testCollectionFile(colFile)){
                done=true;
                reader=null;
                sb=null;
                filename=null;
                return false;
            }
            else{
                filename=colFile.getName();
                reader = FileUtil.getTextReader(colFile);
                done = false;
                sb = new StringBuffer(10240);
                return true;
            }
        }
        catch (Exception e) {
            e.printStackTrace();
            reader = null;
            done = true;
            return false;
        }
    }

    public ArticleParser getArticleParser(){
        return parser;
    }

    public void setArticleParser(ArticleParser parser){
        this.parser =parser;
    }

    public Article getArticleByKey(String key){
        return null;
    }

    public Article getNextArticle(){
        Article article;
        int len, start, end;

        try {
            if(reader==null || sb==null)
                return null;

            end=sb.indexOf("</DOC>");
            while(end<0 && !done){
                len = reader.read(buf);
                if(len<buf.length)
                    done=true;
                start=sb.length();
                sb.append(buf,0,len);
                end=sb.indexOf("</DOC>",start);
            }
            if(end<0) return null;

            end=end+6;
            start=sb.lastIndexOf("<DOC>",end);
            if(start<0) return null;

            curArticleOffset=deletedBytes+start;
            curArticleLength=end-start;
            article=parser.parse(sb.substring(start,end));
            sb.delete(0,end);
            deletedBytes+=end;
            return article;
        }
        catch (Exception e)
        {
            e.printStackTrace();
            return null;
        }
    }

    public long getArticleOffset(){
        return curArticleOffset;
    }

    public int getArticleLength(){
        return curArticleLength;
    }

    public String getArticleFilename(){
        return filename;
    }

    public void close(){
        try{
            if (reader != null)
                reader.close();
        }
        catch(Exception e){
            e.printStackTrace();
        }
    }

    private boolean testCollectionFile(File file){
        BufferedReader br;
        boolean ret;

        try{
            if(!file.exists() || file.isDirectory())
                return false;
            br=FileUtil.getTextReader(file);
            ret=br.readLine().trim().equalsIgnoreCase("<DOC>");
            br.close();
            return ret;
        }
        catch(Exception e){
            return false;
        }
    }

    public boolean supportArticleKeyRetrieval(){
        return false;
    }

    public void restart(){
        loadCollection(colFile);
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -