citeuliketagquery.java

来自「dragontoolkit用于机器学习」· Java 代码 · 共 134 行

JAVA
134
字号
package dragon.onlinedb.citeulike;

import dragon.onlinedb.*;
import dragon.util.HttpUtil;
/**
 * <p>CiteULike Tag Query</p>
 * <p></p>
 * <p>Copyright: Copyright (c) 2003</p>
 * <p>Company: IST, Drexel University</p>
 * @author Davis Zhou
 * @version 1.0
 */

public class CiteULikeTagQuery extends AbstractQuery{
    protected HttpUtil http;
    protected String term;
    protected String[] arrPaper;
    private ArticleParser parser;

    public static void main(String[] args) {
        CiteULikeTagQuery query;
        Article article;
        int i,top;

        query=new CiteULikeTagQuery("network");
        query.initQuery();

        top=10;
        for(i=0;i<top && query.moveToNextArticle();i++){
            article=query.getArticle();
            System.out.println(query.getArticle().getKey()+" "+article.getTitle());
        }
    }

    public CiteULikeTagQuery(){
        this(null);
    }

    public CiteULikeTagQuery(String term){
        super(50);
        parser=new CiteULikeArticleParser();
        arrPaper=new String[pageWidth];
        this.term=term;
        http=new HttpUtil("www.citeulike.org");
    }

    public boolean supportArticleKeyRetrieval(){
        return true;
    }

    public void setSearchTerm(String term){
        this.term=term;
    }

    public  boolean initQuery(){
        curPageNo=-1;
        curArticle=null;
        curPageWidth=0;
        pageNum=1;
        return true;
    }

    public boolean moveToPage(int pageNo){
        String curUrl;
        String content;

        if (pageNo >= pageNum || pageNum==0)
            return false;
        if(pageNo==curPageNo) return true;

        curUrl ="/search/all?f=tag&q="+term;
        if(pageNo>0)
            curUrl=curUrl+"&page="+(pageNo+1);
        content=http.get(curUrl);
        if (content == null)
            return false;
        return processPage(pageNo,content);
    }

    private boolean processPage(int pageNo, String content){
        int start, end, count;

        count = 0;
        start = content.indexOf("class=\"title\"");
        while (start >= 0) {
            start=content.indexOf("article",start);
            if(start<0)
                break;
            start=start+8;
            end=content.indexOf('\"',start);
            arrPaper[count] = content.substring(start,end);
            count = count + 1;
            start = content.indexOf("class=\"title\"",end);
        }
        curPageNo=pageNo;
        curPageWidth = count;
        curArticleNo = 0;
        if(curPageWidth==0)
            return false;
        curArticle=getArticleByKey(arrPaper[curArticleNo]);

        //adjust page number
        end=content.indexOf(">Next<");
        if(end<0)
            pageNum=pageNo+1;
        else{
            end=content.lastIndexOf("</a>",end);
            start=content.lastIndexOf('>',end);
            pageNum=Integer.parseInt(content.substring(start+1,end));
        }
        return true;
    }

    public Article getArticleByKey(String id){
        String curUrl;
        String content;
        try{
            curUrl = "/article/" + id;
            content = http.get(curUrl);
            if (content == null)
                return null;
            return parser.parse(content);
        }
        catch(Exception e){
            e.printStackTrace();
            return null;
        }
    }

    protected Article getArticle(int articleNo){
        return getArticleByKey(arrPaper[articleNo]);
    }
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?