⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 catchpage.java

📁 用java实现的一个bbs的portal
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
        } catch (IOException e) {
        e.printStackTrace();  //To change body of catch statement use Options | File Templates.
        }*/
        String listPage = "";
        try {
            listPage = BotTool.doGet(EntryURL);
        } catch (IOException e) {
            //To change body of catch statement use Options | File Templates.
            System.out.println("exception: " + e.toString());
            return false;
        }
        if (listPage.equals("")) {
            System.out.println("listPage is error");
            return false;
        }

        do {
            if ((listPage != null) && (!listPage.equals(""))) {
                System.out.println("ready to get Items on page"+nowPageUrl);
                catchItemOnPage(listPage);

                if (PageOk) {
                    String tempPage = listPage;
                    listPage = this.getNextPage(listPage);
                    if (tempPage.equalsIgnoreCase(listPage)) {
                        PageOk = false;
                    }
                }
            } else {
                if(nowPageUrlTimes>10){
                    System.err.println("listPage is still empty,over time to finish");
                    return false;
                }
                System.err.println("listPage is empty,Try to go to page:"+nowPageUrl+"\nfor the "+Integer.toString(
                        ++nowPageUrlTimes)+" time");
                try {
                    listPage = BotTool.doGet(nowPageUrl);
                } catch (IOException e) {
                    //To change body of catch statement use Options | File Templates.
                    System.out.println("exception: " + e.toString());
                }
            }
            if (firstArticleDate!=null){
                System.out.println("firstdate = " + BotTool.Date2String(firstArticleDate,"yyyy-MM-dd HH:mm:ss"));
            }
        } while (PageOk);
        return true;
    }

    public String getNextPage(String listPage) {

        int i = listPage.indexOf(pageMask, 0);
        if (i != -1) {
            int hrefIdx = listPage.lastIndexOf(pageHerfMask, i);
            int beginIdx = listPage.indexOf(pageHerfBeginMask, hrefIdx);
            int endIdx = listPage.indexOf(pageHerfEndMask, beginIdx + 1);
            String tempString = listPage.substring(beginIdx + 1, endIdx);
            String itemPageUrl = URLbase + tempString;
            String itemPage = null;
            try {
                System.out.println("Go to Page:"+itemPageUrl);
                nowPageUrl=itemPageUrl;
                nowPageUrlTimes=0;
                itemPage = BotTool.doGet(itemPageUrl);
            } catch (Exception e) {
                System.out.println("此页面没有读出来");
            }
            return itemPage;
        }
        return null;
    }




    public void catchItemOnPage(String listPage) {

        //String BookMask=new String("<a href=\"ProductIntroduce.asp?ProductNo");
        //int beginBook;

        String itemPageUrl = "";
        int i = listPage.length();
        do {

            i = listPage.lastIndexOf(articleMask, i);
            if (i != -1) {
                try{
                    int hrefIdx = listPage.indexOf(articleHerfMask, i);
                    int beginIdx = listPage.indexOf(articleHerfBeginMask, hrefIdx);
                    int endIdx = listPage.indexOf(articleHerfEndMask, beginIdx + 1);
                    String tempString = listPage.substring(beginIdx + articleHerfBeginMask.length(), endIdx);
                    if (!articleHerfTitleBeginMask.equals("")){
                        int hrefBeginIdx=tempString.indexOf(articleHerfTitleBeginMask);
                        int hrefEndIdx=tempString.indexOf(articleHerfTitleEndMask,hrefBeginIdx+1);
                        String title=tempString.substring(hrefBeginIdx+articleHerfTitleBeginMask.length(),hrefEndIdx);
                        try {
                            String encodeTitle=URLEncoder.encode(title,encoderStr);
                            tempString=BotTool.replaceString(tempString,title,encodeTitle);
                        } catch (UnsupportedEncodingException e) {
                            e.printStackTrace();  //To change body of catch statement use Options | File Templates.
                        }
                    }
                    //clover
                    itemPageUrl = URLbase + tempString;
                    if (articleTitleMask.equals("")){
                        if (articleHerfNoneMask.equals("")||itemPageUrl.indexOf(articleHerfNoneMask)==-1) {
                            System.out.println("URL:"+itemPageUrl+" Has been put into urlVector");
                            urlVector.add(itemPageUrl);
                        }
                        else{
                            System.out.println("Article's URL:"+itemPageUrl+" is not put into urlVector because its URL isn't needed");
                        }
                    }
                    else {
                        int titleBeginIdx = listPage.indexOf(articleTitleBeginMask, hrefIdx);
                        int titleEndIdx = listPage.indexOf(articleTitleEndMask, titleBeginIdx + 1);
                        String titleString = listPage.substring(titleBeginIdx + articleTitleBeginMask.length(), titleEndIdx);
                        if (titleString.indexOf(articleTitleMask)!=-1){
                            if (articleTitleNoneMask.equals("")||titleString.indexOf(articleTitleNoneMask)==-1) {
                                if (articleHerfNoneMask.equals("")||itemPageUrl.indexOf(articleHerfNoneMask)==-1) {
                                    System.out.println("Article:"+titleString+" Has been put into urlVector");
                                    urlVector.add(itemPageUrl);
                                }
                                else{
                                    System.out.println("Article:"+titleString+" is not put into urlVector because its URL isn't needed");
                                }
                            }
                            else{
                                System.out.println("Article:"+titleString+" is not put into urlVector because its TITLE isn't needed");
                            }
                        }
                    }
                    //String textString=this.getTextFromHtml(itemPage);
                }
                catch(Exception exp){
                    System.out.println("Exception happened when PageUrl near "+itemPageUrl+"\n"+exp.toString());
                }
                Num++;
                i = i - articleMask.length();
            }

        } while (i != -1);


    }
    public synchronized  String getUrlFromVector(){
        return (String)urlVector.remove(0);
    }

    public synchronized void updateFirstArticleDate(Date date){
        if ((firstArticleDate==null)||(date.after(firstArticleDate))){
            System.out.println("===Date===========");
            System.out.println(date);
            System.out.println("===Date===========");
            firstArticleDate=date;
        }
    }

    public boolean noURL(){
        return urlVector.isEmpty();
    }

    public void setPageOk(boolean pageOk){
        PageOk=pageOk;
    }

    public String getDirPath() {
        return dirPath;
    }


    public static void main(String[] args) {
        Date latestDate=BotTool.String2Date("2003-09-15 07:00:00","yyyy-MM-dd HH:mm:ss");
        CatchPage catPage = new CatchPage(latestDate,"SMTH","SecondHand",2);
        catPage.catchItemPage();
        //CatchThread catThread=new CatchThread(catPage,"0");
        //catThread.catchItem("http://bbs.pku.edu.cn/cgi-bin/bbstcon?board=SecondHand&to=%5B%D7%AA%C8%C3%5D%CE%DE%CF%DF%CD%F8%BF%A8&num=3666");
//        Vector artVec=new Vector();
//        artVec.add("pap13#162.105.203.40#实验商品#sale");
//        artVec.add("pap14#162.105.203.40#实验商品#sale");
//        artVec.add("pap15#162.105.203.40#实验商品#sale");
//        artVec.add("pap16#162.105.203.40#实验商品#sale");
//        artVec.add("pap17#162.105.203.40#实验商品#sale");
//        artVec.add("pap18#162.105.203.40#实验商品#sale");
//        artVec.add("pap19#162.105.203.40#实验商品#sale");
//        artVec.add("pap20#162.105.203.40#实验商品#sale");
//        artVec.add("pap21#162.105.203.40#实验商品#sale");
//        artVec.add("pap22#162.105.203.40#实验商品#sale");
//        artVec.add("pap23#162.105.203.40#实验商品#sale");
//        artVec.add("pap24#162.105.203.40#实验商品#sale");
//   catPage.postPage("PAP","papstart","1",artVec);
    }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -