⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 search.java

📁 一个Web爬虫(机器人
💻 JAVA
字号:
/* * WebSPHINX web crawling toolkit * Copyright (C) 1998,1999 Carnegie Mellon University  *  * This library is free software; you can redistribute it * and/or modify it under the terms of the GNU Library * General Public License as published by the Free Software  * Foundation, version 2. * * WebSPHINX homepage: http://www.cs.cmu.edu/~rcm/websphinx/ */package websphinx.searchengine;import websphinx.*;import java.util.Vector;import java.util.Enumeration;import java.util.NoSuchElementException;public class Search extends Crawler implements Enumeration {    int maxResults;    int walkedResults; // approximate number of results walked to    Vector results = new Vector();  // vector of SearchEngineResults    int nextResult = 0; // next result to be returned by the enumeration    int approxCount = -1; // (approximate) total number of results    boolean crawling = false;    public Search () {        this (Integer.MAX_VALUE);    }    public Search (int maxResults) {        this.maxResults = maxResults;        setDepthFirst (false);        setMaxDepth (Integer.MAX_VALUE);        EventLog.monitor (this); // FIX: debugging only    }    public Search (SearchEngine engine, String keywords, int maxResults) {        this (maxResults);        addQuery (engine, keywords);        search ();    }    public Search (SearchEngine engine, String keywords) {        this (engine, keywords, Integer.MAX_VALUE);    }    public void addQuery (SearchEngine engine, String keywords) {        addRoot (new Link (engine.makeQuery (keywords)));        addClassifier (engine);        walkedResults += engine.getResultsPerPage ();    }    public void search () {        crawling = true;        Thread thread = new Thread (this, "Search");        thread.setDaemon (true);        thread.start ();    }    public int count () {        synchronized (results) {            // block until count is ready            try {                while (approxCount == -1 && crawling)                    results.wait ();            } catch (InterruptedException e) {}            return approxCount;        }    }        public boolean hasMoreElements () {        synchronized (results) {            try {                while (nextResult >= results.size() && crawling)                    results.wait ();            } catch (InterruptedException e) {}            return nextResult < results.size ();        }    }    public Object nextElement () {        return nextResult ();    }    public SearchEngineResult nextResult () {        if (!hasMoreElements ())            throw new NoSuchElementException ();        synchronized (results) {            SearchEngineResult result = (SearchEngineResult)results.elementAt (nextResult++);            if (result.rank == 0)               result.rank = nextResult;            return result;        }    }    public void run () {        super.run ();        synchronized (results) {            if (approxCount == -1)                approxCount = 0;            crawling = false;            results.notify ();        }    }    public void visit (Page page) {        synchronized (results) {            if (approxCount == -1)                approxCount = page.getNumericLabel ("searchengine.count", new Integer(0)).intValue();                        Region[] ser = page.getFields ("searchengine.results");            for (int i=0; i<ser.length; ++i) {                if (results.size() == maxResults) {                    stop ();                    return;                }                results.addElement (ser[i]);            }            results.notify ();        }    }        public boolean shouldVisit (Link link) {        if (walkedResults >= maxResults            || !link.hasLabel ("searchengine.more-results"))            return false;        SearchEngine engine = (SearchEngine)link.getSource().getObjectLabel("searchengine.source");        walkedResults += engine.getResultsPerPage ();        return true;    }        public static void main (String[] args) throws Exception {        if (args.length == 0) {            System.err.println ("Search <search engine classname> [-max n]  <keywords>*");            return;        }        SearchEngine engine = (SearchEngine) Class.forName (args[0]).newInstance ();        int max = Integer.MAX_VALUE;        int firstKeyword = 1;        if (args[1].equals ("-max")) {            max = Integer.parseInt (args[2]);            firstKeyword = 3;        }        Search ms = new Search (max);        ms.addQuery (engine, concat (args, firstKeyword));        ms.search ();        while (ms.hasMoreElements ())            System.out.println (ms.nextResult ());    }    static String concat (String[] args, int start) {        StringBuffer buf = new StringBuffer ();        for (int i=start; i<args.length; ++i) {            if (buf.length() > 0)                buf.append (' ');            buf.append (args[i]);        }        return buf.toString ();    }    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -