⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 webgraph.java

📁 一个用java语言编写的网络爬虫程序
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
/* * WebSPHINX web crawling toolkit * Copyright (C) 1998,1999 Carnegie Mellon University  *  * This library is free software; you can redistribute it * and/or modify it under the terms of the GNU Library * General Public License as published by the Free Software  * Foundation, version 2. * * WebSPHINX homepage: http://www.cs.cmu.edu/~rcm/websphinx/ */package websphinx.workbench;import websphinx.*;import java.awt.*;import java.util.Hashtable;import java.util.Vector;import java.applet.Applet;import java.applet.AppletContext;import java.net.URL;import java.net.MalformedURLException;import websphinx.util.Colors;import websphinx.util.GraphLayout;import websphinx.util.ClosableFrame;import java.awt.image.MemoryImageSource;import graph.Graph;import websphinx.util.Constrain;import websphinx.util.PopupDialog;// FIX: connect ALREADY_VISITED links to pagepublic class WebGraph extends GraphLayout implements CrawlListener, LinkListener {    Hashtable links = new Hashtable ();       // maps Link -> WebNode (for root links) or WebEdge (for internal links)    /**     * Make a WebGraph.     */    public WebGraph () {        setPageIcon (defaultPageIcon);        setLinkIcon (defaultLinkIcon);        setRetrievingIcon (defaultRetrievingIcon);        setErrorIcon (defaultErrorIcon);    }    // Filtering of a node's outgoing links     static final int NO_LINKS = 0;                 // Show no outgoing links    static final int RETRIEVED_LINKS = 1;        // Show only links that crawler started to retrieve    static final int WALKED_LINKS = 2;        // Show RETRIEVED_LINKS, plus links queued for retrieval    static final int TREE_LINKS = 3;        // Show WALKED_LINKS, plus links skipped by walk()    static final int ALL_LINKS = 4;        // Show TREE_LINKS, plus links to already-visited pages    int defaultFilter = RETRIEVED_LINKS;    // Change the filter of a node    synchronized void setLinkFilter (WebNode node, int filter) {        if (filter == node.filter)            return;        Page page = node.link.getPage ();        if (page != null) {            Link[] linkarray = page.getLinks ();            if (filter < node.filter) {                // new mode is more restrictive; delete undesired edges                for (int j=0; j<linkarray.length; ++j) {                    if (!shouldDisplay (filter, linkarray[j].getStatus())) {                        WebEdge edge = (WebEdge)links.get (linkarray[j]);                        if (edge != null) {                            removeNode ((WebNode)edge.to);                            removeEdge (edge);                            links.remove (linkarray[j]);                        }                       }                }            }            else if (filter > node.filter) {                // new mode is less restrictive; add edges                for (int j=0; j<linkarray.length; ++j) {                    update (linkarray[j]); // update() will check shouldDisplay()                }            }        }        node.filter = filter;    }    // Change the filter of ALL nodes    synchronized void setLinkFilter (int filter) {        defaultFilter = filter;        Graph graph = getGraph ();        for (int i=0; i<graph.sizeNodes; ++i) {            WebNode n = (WebNode)graph.nodes[i];            setLinkFilter (n, filter);        }    }    // Node rendering    static final int ICON = 0;        // Show an icon    static final int TITLE = 1;        // Show page title (or URL if not downloaded)    static final int ABSOLUTE_URL = 2;        // Show absolute URL    static final int RELATIVE_URL = 3;        // Show URL relative to parent    int defaultRendering = ICON;    // Change the rendering of a node    void setNodeRendering (WebNode n, int r) {        n.rendering = r;        update(n);        repaint ();    }    // Change the rendering of ALL nodes    synchronized void setNodeRendering (int r) {        defaultRendering = r;        Graph graph = getGraph ();        for (int i=0; i<graph.sizeNodes; ++i) {            WebNode n = (WebNode)graph.nodes[i];            n.rendering = r;            update (n);        }        changedGraph ();    }    /**     * Show control panel for changing graph layout parameters.     */    public void showControlPanel () {        new WorkbenchControlPanel (this, null).show ();    }    /**     * Clear the graph display.     */    public synchronized void clear () {        links.clear ();        super.clear ();    }    /**     * Notify that the crawler started.     */    public void started (CrawlEvent event) {    }    /**     * Notify that the crawler has stopped.     */    public void stopped (CrawlEvent event) {    }    /**     * Notify that the crawler's state was cleared.     */    public void cleared (CrawlEvent event) {        clear ();    }    /**     * Notify that the crawler has timed out     */    public void timedOut (CrawlEvent event) {    }    /**     * Notify that the crawler is paused     */    public void paused (CrawlEvent event) {    }    /**     * Notify that a crawling event has occured.     */    public void crawled (LinkEvent event) {        update (event.getLink ());    }    // check whether we want to display a link with this status    boolean shouldDisplay (int filter, int status) {        switch (status) {           case LinkEvent.QUEUED:           case LinkEvent.TOO_DEEP:             return (filter > RETRIEVED_LINKS);           case LinkEvent.SKIPPED:             return (filter > WALKED_LINKS);           case LinkEvent.ALREADY_VISITED:             return (filter > TREE_LINKS);          case LinkEvent.RETRIEVING:          case LinkEvent.DOWNLOADED:          case LinkEvent.VISITED:          case LinkEvent.ERROR:            return true;          default:            return false;        }    }    /**     * Update all the links that the crawler reached from this link.     * Any reachable links not present in the graph are added.     */    public void updateClosure (Link[] links) {        if (links == null)            return;        for (int i=0; i < links.length; ++i) {            Link link = links[i];            int status = link.getStatus();            if (status == LinkEvent.NONE)                continue;            update (link);            if (status == LinkEvent.DOWNLOADED || status == LinkEvent.VISITED) {                Page page = link.getPage();                if (page != null)                    updateClosure (page.getLinks ());            }        }    }    /**     * Update the edge and node associated with a link.     * If the link is not present in the graph, it is added.     */    public synchronized void update (Link link) {        Object obj = links.get (link);        if (obj == null) {            add (link);        }        else if (obj instanceof WebEdge) {            WebEdge e = (WebEdge) obj;            update (e);            update ((WebNode)e.to);        }        else {            // obj instanceof WebNode            update ((WebNode)obj);        }        repaint ();    }    synchronized void add (Link link) {        WebNode n = new WebNode (link, defaultFilter, defaultRendering);        WebNode parent = findParent (link);                if (parent == null) {            links.put (link, n);            update (n);            addNode (n);            if (getGraph().sizeNodes == 1) {                // root node of first tree -- put it at the origin and fix it

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -