⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 weboutline.java

📁 一个Web爬虫(机器人
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* * WebSPHINX web crawling toolkit * Copyright (C) 1998,1999 Carnegie Mellon University  *  * This library is free software; you can redistribute it * and/or modify it under the terms of the GNU Library * General Public License as published by the Free Software  * Foundation, version 2. * * WebSPHINX homepage: http://www.cs.cmu.edu/~rcm/websphinx/ */package websphinx.workbench;import websphinx.*;import java.awt.*;import java.util.Hashtable;import java.util.Vector;import java.net.URL;import java.net.MalformedURLException;import websphinx.util.ClosableFrame;import symantec.itools.awt.TreeView2;import symantec.itools.awt.TreeNode2;import java.awt.image.MemoryImageSource;import websphinx.util.Constrain;import websphinx.util.PopupDialog;import websphinx.util.Colors;public class WebOutline extends TreeView2 implements CrawlListener, LinkListener {    Hashtable links = new Hashtable ();       // maps Link -> TreeNode2           /**     * Make a WebOutline.     */    public WebOutline () {        setPageIcon (defaultPageIcon);        setLinkIcon (defaultRetrievingIcon);        setRetrievingIcon (defaultRetrievingIcon);        setErrorIcon (defaultErrorIcon);    }    /**     * Show control panel for changing layout parameters.     */    public void showControlPanel () {        new WorkbenchControlPanel (null, this).show ();    }    /**     * Clear the outline.     */    public synchronized void clear () {        super.clear ();        links.clear ();    }    /**     * Notify that the crawler started.     */    public void started (CrawlEvent event) {    }    /**     * Notify that the crawler has stopped.     */    public void stopped (CrawlEvent event) {    }    /**     * Notify that the crawler's state was cleared.     */    public void cleared (CrawlEvent event) {        clear ();    }    /**     * Notify that the crawler has timed out     */    public void timedOut (CrawlEvent event) {    }    /**     * Notify that the crawler is paused     */    public void paused (CrawlEvent event) {    }    /**     * Notify that a crawling event has occured.     */    public void crawled (LinkEvent event) {        update (event.getLink ());    }    // Page filter        static final int NO_LINKS = 0;                 // Show no outgoing links    static final int RETRIEVED_LINKS = 1;        // Show only links that crawler started to retrieve    static final int WALKED_LINKS = 2;        // Show RETRIEVED_LINKS, plus links queued for retrieval    static final int TREE_LINKS = 3;        // Show WALKED_LINKS, plus links skipped by walk()    static final int ALL_LINKS = 4;        // Show TREE_LINKS, plus links to already-visited pages    int defaultFilter = RETRIEVED_LINKS;    // Change the filter of ALL nodes    synchronized void setLinkFilter (int filter) {        if (filter == defaultFilter)            return;                   int old = defaultFilter;        defaultFilter = filter;                reFilter (getRootNode (), old > filter);        triggerRedraw ();    }            void reFilter (TreeNode2 n, boolean restrict) {        for (; n != null; n = n.getSibling ()) {            Link link = (Link)n.getDataObject();            Page page = link.getPage ();            if (page != null) {                Link[] linkarray = page.getLinks ();                if (restrict) {                    // new mode is more restrictive; delete undesired children                    for (int j=0; j<linkarray.length; ++j) {                        if (!shouldDisplay (linkarray[j].getStatus())) {                            TreeNode2 child = findNode (linkarray[j]);                            if (child != null)                                remove (child);                        }                    }                }                else {                    // new mode is less restrictive; add children                    for (int j=0; j<linkarray.length; ++j) {                        update (linkarray[j]); // update() will check shouldDisplay()                    }                }            }                        TreeNode2 c = n.getChild();            if (c != null)                reFilter (c, restrict);        }    }        // check whether we want to display a link with this status    boolean shouldDisplay (int status) {        switch (status) {           case LinkEvent.QUEUED:           case LinkEvent.TOO_DEEP:             return (defaultFilter > RETRIEVED_LINKS);           case LinkEvent.SKIPPED:             return (defaultFilter > WALKED_LINKS);           case LinkEvent.ALREADY_VISITED:             return false;          case LinkEvent.RETRIEVING:          case LinkEvent.DOWNLOADED:          case LinkEvent.VISITED:          case LinkEvent.ERROR:            return true;          default:            return false;        }    }    // Node rendering    static final int TITLE = 0;        // Show page title (or URL if not downloaded)    static final int ABSOLUTE_URL = 1;        // Show absolute URL    static final int RELATIVE_URL = 2;        // Show URL relative to parent    int defaultRendering = TITLE;    // Change the rendering of ALL nodes    synchronized void setNodeRendering (int r) {        defaultRendering = r;        reRender (getRootNode ());        triggerRedraw ();    }    void reRender (TreeNode2 n) {        for (; n != null; n = n.getSibling ()) {            update (n);                        TreeNode2 c = n.getChild();            if (c != null)                reRender (c);        }    }        /**     * Update all the links that the crawler reached from this link.     * Any reachable links not present in the graph are added.     */    public void updateClosure (Link[] links) {        if (links == null)            return;        for (int i=0; i < links.length; ++i) {            Link link = links[i];            int status = link.getStatus();            if (status == LinkEvent.NONE)                continue;            update (link);            if (status == LinkEvent.DOWNLOADED || status == LinkEvent.VISITED) {                Page page = link.getPage();                if (page != null)                    updateClosure (page.getLinks ());            }        }    }    /**     * Update the edge and node associated with a link.     * If the link is not present in the graph, it is added.     */    public synchronized void update (Link link) {        if (!shouldDisplay (link.getStatus ()))            return;        TreeNode2 n = findNode (link);        if (n == null)            add (link);        else            update (n);        redraw ();    }    synchronized void add (Link link) {        TreeNode2 n = new TreeNode2 ("");        n.setDataObject (link);        Page source = link.getSource ();        Link origin = source.getOrigin ();        TreeNode2 parent = findNode (origin);        if (parent == null) {            update (n);            append (n);        }        else {            update (n);            insert (n, parent, CHILD);            parent.expand ();        }        links.put (link, n);    }    void update (TreeNode2 n) {        Link link = (Link)n.getDataObject ();        Page page = link.getPage ();        int status = link.getStatus ();        Image icon = getIcon (LinkEvent.eventName[status]);        n.setExpandedImage (icon);        n.setCollapsedImage (icon);        if (page == null) {            // not downloaded yet            String name = "";            switch (defaultRendering) {                case TITLE:                case ABSOLUTE_URL:                    name = link.getURL().toString();                    break;                case RELATIVE_URL: {                    Link origin = link.getSource().getOrigin();                    if (origin != null)                        name = Link.relativeTo (origin.getURL(), link.getURL());                    else                        name = link.getURL().toString();                    break;                }            }            n.setText (name);            n.setColor (Colors.parseColor (link.getLabel ("Workbench.color")));        }        else {            String name = "";            switch (defaultRendering) {                case TITLE: {                    name = page.getTitle ();                    if (name == null)                        name = link.getURL().toString();                    break;                }                case ABSOLUTE_URL:                    name = link.getURL().toString();                    break;                case RELATIVE_URL: {                    Link origin = link.getSource().getOrigin();                    if (origin != null)                        name = Link.relativeTo (origin.getURL(), link.getURL());                    else                        name = link.getURL().toString();                    break;                }            }            n.setText (name);            n.setColor (Colors.parseColor (page.getLabel ("Workbench.color")));        }    }    TreeNode2 findNode (Link l) {        if (l == null)            return null;        else            return (TreeNode2)links.get (l);    }    /*     * LinkView listeners     */    private Vector listeners = new Vector ();    /**     * Add a listener for LinkViewEvents.  A LinkViewEvent is sent every time a     * node or edge in the graph is double-clicked.     * @param listener Object that wants to receive LinkViewEvents      */    public void addLinkViewListener (LinkViewListener listener) {        if (!listeners.contains (listener))

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -