⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 main.java

📁 java的经典例子
💻 JAVA
字号:
import java.net.*;
import java.io.*;
import java.util.*;

class Main implements Observer {
    Main(String u, int depth) {
        try {
            URL url = new URL(Spider.adjustIfDir(u));
            Spider spider = new Spider(url, depth);

            spider.addObserver(this);
            spider.thread.join(); // Wait for spider to finish.
        } catch (MalformedURLException e) {
        } catch (InterruptedException e) {
        }
    }

    // This method is called immediately whenever the spider
    // discovers a new URL.  It should return as quickly as
    // possible since it is holding up the spider.
    public void update(Observable o, Object arg) {
        SpiderArgs warg = (SpiderArgs)arg;
        for (int i=0; i<warg.depth; i++) {
            System.out.print("   ");
        }
        System.out.println(warg.dst);
    }

    public static void main(String[] args) {
        if (args.length != 2) {
            System.err.println("Usage: java Main <url> <depth>");
        } else {
            new Main(args[0], Integer.parseInt(args[1]));
        }
    }
}

class Spider extends Observable implements Runnable {
    Hashtable walked = new Hashtable();
    int maxDepth;
    URL homeURL;
    String host;
    int port;
    Thread thread;

    Spider(URL url, int depth) {
        homeURL = url;
        maxDepth = depth;
        host = url.getHost();
        port = getPort(url);

        // Start spider thread.
        thread = new Thread(this);
        thread.start();
    }

    void walk(URL url, int curDepth) throws IOException {
        Vector v = findLinks(url);
        
        // Remove duplicates
        for (int i=v.size()-1; i>=0; i--) {
            try {
                URL ur = new URL(url, (String)v.elementAt(i));
                if (walked.get(ur) != null 
                        || !ur.getProtocol().equals("http")
                        || !(getPort(ur) == port)
                        || !ur.getHost().equals(host)) {
                    v.removeElementAt(i);
                } else {
                    walked.put(ur, ur);
                    setChanged();
                    notifyObservers(new SpiderArgs(url, ur, curDepth));
                }
            } catch (MalformedURLException e) {
            }
        }

        // Now walk each of the links in url.
        if (curDepth < maxDepth) {
            for (int i=0; i<v.size(); i++) {
                URL ur = null;
                try {
                    ur = new URL(url, (String)v.elementAt(i));
                    walk(ur, curDepth + 1);
                } catch (MalformedURLException e) {
                } catch (IOException e) {
                    System.out.println("*** " + url + " -> " + ur);
                }
            }
        }
    }

    // Finds all the links in 'url' and returns them in a vector.
    Vector findLinks(URL url) throws IOException {
        Vector v = new Vector();
        BufferedReader in = new BufferedReader(
            new InputStreamReader(url.openStream()));
        String line;
        String lineLC;

        while ((line = in.readLine()) != null) {
            while (line != null) {
                int p = line.indexOf("<a ");
                if (p < 0) {
                    p = line.indexOf("<A ");
                    if (p < 0) {
                        break;
                    }
                }

                // Make sure the > is on the same line.
                int q = 0;
                while ((q=line.indexOf(">", p)) < 0) {
                    String l = in.readLine();
                    if (l == null) { // EOF reached.
                        return v;
                    }
                    line += l;
                }
                String u = getLink(in, line, p);

                if (u != null && u.length() > 0) {
                    v.addElement(adjustIfDir(u));
                }
                // Continue looking for links on the line.
                line = line.substring(q+1); 
            }
        }
        in.close();
        return v;
    }

    // Returns the port number of 'url'.  If the port number is 
    // not defined, returns the default HTTP port number.
    int getPort(URL url) {
        int p = url.getPort();
        if (p == -1) {
            p = 80;
        }
        return p;
    }

    // This method implements a heuristic for URLs that are probably
    // directories.  If the last component of the URL does not contain
    // a dot and does not end with a "/", then it is explicitly
    // converted to a directory by appending a "/".
    static String adjustIfDir(String s) {
        int p = s.lastIndexOf("/") + 1;

        if (!s.endsWith("/") && s.indexOf(".", p) < 0) {
            s += "/";
        }
        return s;
    }

    // Extracts the <a> tag from s and then returns the remainder of
    // the line.
    String getLink(BufferedReader in, String s, int p) 
            throws IOException {
        int e;

        // Find the href attribute.
        p = s.indexOf("href=");
        if (p < 0) {
            p = s.indexOf("HREF=");
            if (p < 0) {
                // No href so skip the tag.
                return null;
            }
        }

        // Skip the "href=" 
        p += 5;
        int q = -1;
        if (s.charAt(p) == '"') {
            p++;
            q = s.indexOf('"', p);
        } else {
            q = s.indexOf(' ', p);
            int q2 = s.indexOf('>', p);
            if (Math.min(q, q2) < 0 && Math.max(q, q2) >= 0) {
                // If one is > 0 and the other < 0, use the > 0 one.
                q = Math.max(q, q2);
            } 

            // Use the smaller of the two.
            q = Math.min(q, q2);
        }

        // Could not complete the href tag for some reason
        // so skip the tag.
        if (q < 0) {
            return null;
        }
        s = s.substring(p, q);

        // Remove the reference, if any.
        p = s.indexOf('#');
        if (p == 0) {
            return null;
        } else if (p > 0) {
            s = s.substring(0, p);
        }
        return s;
    }

    public void run() {
        try {
            walk(homeURL, 0);
        } catch (IOException e) {
            System.out.println("*** " + homeURL);
        }
    }
}

class SpiderArgs {
    SpiderArgs(URL src, URL dst, int depth) {
        this.src = src;
        this.dst = dst;
        this.depth = depth;
    }

    URL src;
    URL dst;
    int depth;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -