📄 webgraph.java
字号:
/* * WebSPHINX web crawling toolkit * Copyright (C) 1998,1999 Carnegie Mellon University * * This library is free software; you can redistribute it * and/or modify it under the terms of the GNU Library * General Public License as published by the Free Software * Foundation, version 2. * * WebSPHINX homepage: http://www.cs.cmu.edu/~rcm/websphinx/ */package websphinx.workbench;import websphinx.*;import java.awt.*;import java.util.Hashtable;import java.util.Vector;import java.applet.Applet;import java.applet.AppletContext;import java.net.URL;import java.net.MalformedURLException;import websphinx.util.Colors;import websphinx.util.GraphLayout;import websphinx.util.ClosableFrame;import java.awt.image.MemoryImageSource;import graph.Graph;import websphinx.util.Constrain;import websphinx.util.PopupDialog;// FIX: connect ALREADY_VISITED links to pagepublic class WebGraph extends GraphLayout implements CrawlListener, LinkListener { Hashtable links = new Hashtable (); // maps Link -> WebNode (for root links) or WebEdge (for internal links) /** * Make a WebGraph. */ public WebGraph () { setPageIcon (defaultPageIcon); setLinkIcon (defaultLinkIcon); setRetrievingIcon (defaultRetrievingIcon); setErrorIcon (defaultErrorIcon); } // Filtering of a node's outgoing links static final int NO_LINKS = 0; // Show no outgoing links static final int RETRIEVED_LINKS = 1; // Show only links that crawler started to retrieve static final int WALKED_LINKS = 2; // Show RETRIEVED_LINKS, plus links queued for retrieval static final int TREE_LINKS = 3; // Show WALKED_LINKS, plus links skipped by walk() static final int ALL_LINKS = 4; // Show TREE_LINKS, plus links to already-visited pages int defaultFilter = RETRIEVED_LINKS; // Change the filter of a node synchronized void setLinkFilter (WebNode node, int filter) { if (filter == node.filter) return; Page page = node.link.getPage (); if (page != null) { Link[] linkarray = page.getLinks (); if (filter < node.filter) { // new mode is more restrictive; delete undesired edges for (int j=0; j<linkarray.length; ++j) { if (!shouldDisplay (filter, linkarray[j].getStatus())) { WebEdge edge = (WebEdge)links.get (linkarray[j]); if (edge != null) { removeNode ((WebNode)edge.to); removeEdge (edge); links.remove (linkarray[j]); } } } } else if (filter > node.filter) { // new mode is less restrictive; add edges for (int j=0; j<linkarray.length; ++j) { update (linkarray[j]); // update() will check shouldDisplay() } } } node.filter = filter; } // Change the filter of ALL nodes synchronized void setLinkFilter (int filter) { defaultFilter = filter; Graph graph = getGraph (); for (int i=0; i<graph.sizeNodes; ++i) { WebNode n = (WebNode)graph.nodes[i]; setLinkFilter (n, filter); } } // Node rendering static final int ICON = 0; // Show an icon static final int TITLE = 1; // Show page title (or URL if not downloaded) static final int ABSOLUTE_URL = 2; // Show absolute URL static final int RELATIVE_URL = 3; // Show URL relative to parent int defaultRendering = ICON; // Change the rendering of a node void setNodeRendering (WebNode n, int r) { n.rendering = r; update(n); repaint (); } // Change the rendering of ALL nodes synchronized void setNodeRendering (int r) { defaultRendering = r; Graph graph = getGraph (); for (int i=0; i<graph.sizeNodes; ++i) { WebNode n = (WebNode)graph.nodes[i]; n.rendering = r; update (n); } changedGraph (); } /** * Show control panel for changing graph layout parameters. */ public void showControlPanel () { new WorkbenchControlPanel (this, null).show (); } /** * Clear the graph display. */ public synchronized void clear () { links.clear (); super.clear (); } /** * Notify that the crawler started. */ public void started (CrawlEvent event) { } /** * Notify that the crawler has stopped. */ public void stopped (CrawlEvent event) { } /** * Notify that the crawler's state was cleared. */ public void cleared (CrawlEvent event) { clear (); } /** * Notify that the crawler has timed out */ public void timedOut (CrawlEvent event) { } /** * Notify that the crawler is paused */ public void paused (CrawlEvent event) { } /** * Notify that a crawling event has occured. */ public void crawled (LinkEvent event) { update (event.getLink ()); } // check whether we want to display a link with this status boolean shouldDisplay (int filter, int status) { switch (status) { case LinkEvent.QUEUED: case LinkEvent.TOO_DEEP: return (filter > RETRIEVED_LINKS); case LinkEvent.SKIPPED: return (filter > WALKED_LINKS); case LinkEvent.ALREADY_VISITED: return (filter > TREE_LINKS); case LinkEvent.RETRIEVING: case LinkEvent.DOWNLOADED: case LinkEvent.VISITED: case LinkEvent.ERROR: return true; default: return false; } } /** * Update all the links that the crawler reached from this link. * Any reachable links not present in the graph are added. */ public void updateClosure (Link[] links) { if (links == null) return; for (int i=0; i < links.length; ++i) { Link link = links[i]; int status = link.getStatus(); if (status == LinkEvent.NONE) continue; update (link); if (status == LinkEvent.DOWNLOADED || status == LinkEvent.VISITED) { Page page = link.getPage(); if (page != null) updateClosure (page.getLinks ()); } } } /** * Update the edge and node associated with a link. * If the link is not present in the graph, it is added. */ public synchronized void update (Link link) { Object obj = links.get (link); if (obj == null) { add (link); } else if (obj instanceof WebEdge) { WebEdge e = (WebEdge) obj; update (e); update ((WebNode)e.to); } else { // obj instanceof WebNode update ((WebNode)obj); } repaint (); } synchronized void add (Link link) { WebNode n = new WebNode (link, defaultFilter, defaultRendering); WebNode parent = findParent (link); if (parent == null) { links.put (link, n); update (n); addNode (n); if (getGraph().sizeNodes == 1) { // root node of first tree -- put it at the origin and fix it
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -