⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 getsite.java

📁 < 网络机器人java编程指南>>的配套源程序
💻 JAVA
字号:
import java.awt.*;import java.util.*;import javax.swing.*;import java.io.*;import com.heaton.bot.*;import java.net.*;/** * Example program from Chapter 8 * Programming Spiders, Bots and Aggregators in Java * Copyright 2001 by Jeff Heaton * * * This example program will download all of the HTML files * of a website to a local drive. This shows how a spider can * be used to map/download a site. * * @author Jeff Heaton * @version 1.0 */public class GetSite extends javax.swing.JFrame implements ISpiderReportable {  /**   * The underlying spider object.   */  Spider _spider = null;  /**   * The current page count.   */  int _pagesCount;  /**   * The constructor. Set up the visual Swing   * components that make up the user interface   * for this program.   */  public GetSite()  {    //{{INIT_CONTROLS    setTitle("Download Site");    getContentPane().setLayout(null);    setSize(405,268);    setVisible(false);    D.setHorizontalTextPosition(                               javax.swing.SwingConstants.LEFT);    D.setVerticalTextPosition(                             javax.swing.SwingConstants.TOP);    D.setVerticalAlignment(                          javax.swing.SwingConstants.TOP);    D.setText("Download pages of:");    getContentPane().add(D);    D.setBounds(12,12,384,24);    JLabel2.setText("URL:");    getContentPane().add(JLabel2);    JLabel2.setBounds(12,36,36,24);    getContentPane().add(_url);    _url.setBounds(48,36,348,24);    JLabel3.setText("Select local path to download files");    getContentPane().add(JLabel3);    JLabel3.setBounds(12,72,384,24);    getContentPane().add(_save);    _save.setBounds(12,96,384,24);    _go.setText("GO!");    getContentPane().add(_go);    _go.setBounds(96,228,216,24);    getContentPane().add(_current);    _current.setBounds(12,204,384,12);    JLabel4.setText("Number of pages:");    getContentPane().add(JLabel4);    JLabel4.setBounds(12,180,120,12);    _pages.setText("0");    getContentPane().add(_pages);    _pages.setBounds(120,180,108,12);    JLabel6.setText(     "Select local path(and filename) to write log to(optional):");    getContentPane().add(JLabel6);    JLabel6.setBounds(12,120,384,24);    _logPath.setText("./spider.log");    getContentPane().add(_logPath);    _logPath.setBounds(12,144,384,24);    _go.setActionCommand("jbutton");    //}}    //{{INIT_MENUS    //}}    //{{REGISTER_LISTENERS    SymAction lSymAction = new SymAction();    _go.addActionListener(lSymAction);    SymWindow aSymWindow = new SymWindow();    this.addWindowListener(aSymWindow);    //}}    setLocation(32,32);  }  /**   * Added by Visual Cafe.   *   * @param b   */  public void setVisible(boolean b)  {    if ( b )      setLocation(50, 50);    super.setVisible(b);  }  /**   * Program entry point, causes the main   * window to be displayed.   *   * @param args Command line arguments are not used.   */  static public void main(String args[])  {    (new GetSite()).setVisible(true);  }  /**   * Added by Visual Cafe.   */  public void addNotify()  {    // Record the size of the window prior    // to calling parents addNotify.    Dimension size = getSize();    super.addNotify();    if ( frameSizeAdjusted )      return;    frameSizeAdjusted = true;    // Adjust size of frame according to the insets and menu bar    Insets insets = getInsets();    javax.swing.JMenuBar menuBar = getRootPane().getJMenuBar();    int menuBarHeight = 0;    if ( menuBar != null )      menuBarHeight = menuBar.getPreferredSize().height;    setSize(insets.left +            insets.right +            size.width,            insets.top +            insets.bottom +            size.height + menuBarHeight);  }  // Used by addNotify  boolean frameSizeAdjusted = false;  //{{DECLARE_CONTROLS  javax.swing.JLabel D = new javax.swing.JLabel();  javax.swing.JLabel JLabel2 = new javax.swing.JLabel();  /**   * The URL to spider.   */  javax.swing.JTextField _url = new javax.swing.JTextField();  javax.swing.JLabel JLabel3 = new javax.swing.JLabel();  /**   * The directory to save the files to.   */  javax.swing.JTextField _save = new javax.swing.JTextField();  /**   * The go button.   */  javax.swing.JButton _go = new javax.swing.JButton();  /**   * Displays the current page.   */  javax.swing.JLabel _current = new javax.swing.JLabel();  javax.swing.JLabel JLabel4 = new javax.swing.JLabel();  /**   * A count of how many pages have been   * downloaded.   */  javax.swing.JLabel _pages = new javax.swing.JLabel();  javax.swing.JLabel JLabel6 = new javax.swing.JLabel();  /**   * Used to specify the path to store the   * log to.   */  javax.swing.JTextField _logPath = new javax.swing.JTextField();  //}}  //{{DECLARE_MENUS  //}}  /**   * An event handler class, generated by Visual Cafe.   *   * @author Visual Cafe   */  class SymAction implements java.awt.event.ActionListener {    public void actionPerformed(java.awt.event.ActionEvent event)    {      Object object = event.getSource();      if ( object == _go )        Go_actionPerformed(event);    }  }  /**   * As the files of the website are located,   * this method is called to save them to disk.   *   * @param file The HTTP object corrisponding to the page   * just visited.   */  protected void processFile(HTTP file)  {    try {      if ( _save.getText().length()>0 ) {        URL url = new URL(file.getURL());        String targetPath = url.getPath();        targetPath = URLUtility.convertFilename(_save.getText(),targetPath);        FileOutputStream fso =              new FileOutputStream(              new File(targetPath) );        fso.write( file.getBodyBytes() );        fso.close();       }    } catch ( Exception e ) {      Log.logException("Can't save output file: ",e);    }  }  /**   * This is where most of the action takes place. This   * method is called when the GO! button is pressed.   *   * @param event The event   */  void Go_actionPerformed(java.awt.event.ActionEvent event)  {    IWorkloadStorable wl = new SpiderInternalWorkload();    if ( _spider!=null ) {      Runnable doLater = new Runnable()      {        public void run()        {          _go.setText("Canceling...");        }      };      SwingUtilities.invokeLater(doLater);      _spider.halt();      return;    }    try {      if ( _url.getText().length()>0 ) {        HTTPSocket http = new HTTPSocket();        http.send(_url.getText(),null);      } else {        _current.setText("<<distributed mode>>");      }    } catch ( Exception e ) {      JOptionPane.showMessageDialog(this,              e,              "Error",              JOptionPane.OK_CANCEL_OPTION,              null );      return;    }    Runnable doLater = new Runnable()    {      public void run()      {        _go.setText("Cancel");        _current.setText("Loading....");      }    };    SwingUtilities.invokeLater(doLater);    // Prepare to start the spider    _pagesCount = 0;    if ( _logPath.getText().length()>0 ) {      File file = new File(_logPath.getText());      file.delete();      Log.setLevel(Log.LOG_LEVEL_NORMAL);      Log.setFile(true);      Log.setConsole(false);      Log.setPath(_logPath.getText());    }// NOTE: To use SQL based workload management,// uncomment the following lines and include a// valid data source./*    try    {      wl = new SpiderSQLWorkload(        "sun.jdbc.odbc.JdbcOdbcDriver",        "jdbc:odbc:WORKLOAD");    }    catch(Exception e)    {      JOptionPane.showMessageDialog(this,        e,        "Error",        JOptionPane.OK_CANCEL_OPTION,        null );    }*/    _spider    = new Spider( this,                  _url.getText(),                  new HTTPSocket(),                  100,                  wl);    _spider.setMaxBody(200);    _spider.start();  }  /**   * This method is called by the spider when an   * internal link is found.   *   * @param url The URL of the link that was found. This   * link is passed in fully resolved.   * @return True if the spider should add this link to   * its visitation list.   */  public boolean foundInternalLink(String url)  {    return true;  }  /**   * This method is called by the spider when an   * external link is found. An external link is   * one that points to a different host.   *   * @param url The URL of the link that was found. This   * link is passed in fully resolved.   * @return True if the spider should add this link to   * its visitation list.   */  public boolean foundExternalLink(String url)  {    return false;  }  /**   * This method is called by the spider when an   * other type link is found. Links such as email   * addresses are sent to this method.   *   * @param url The URL of the link that was found. This   * link is passed in fully resolved.   * @return True if the spider should add this link to   * its visitation list.   */  public boolean foundOtherLink(String url)  {    return false;  }  /**   * A simple class used to update the current   * URL target. This is necessary, because Swing   * only allows GUI compoents to be updated by the   * main thread.   *   * @author Jeff Heaton   * @version 1.0   */  class UpdateTarget implements Runnable {    public String _t;    public void run()    {      _current.setText(_t);      _pages.setText( "" + _pagesCount );    }  }  /**   * Called by the spider when a page has been   * loaded, and should be processed. For the   * example, this method will save this file   * to disk.   *   * @param page The HTTP object that corrispondeds to the   * page just visited.   */  public void processPage(HTTP page)  {    _pagesCount++;    UpdateTarget ut = new UpdateTarget();    ut._t = page.getURL();    SwingUtilities.invokeLater(ut);    processFile(page);  }  /**   * Not used. This must be implemented because   * of the interface. Called when a page completes.   *   * @param page The page that just completed.   * @param error True if the completion of this page   * resulted in an error.   */  public void completePage(HTTP page,boolean error)  {  }  /**   * This method is called to determine if   * query strings should be stripped.   *   * @return Returns true if query strings(the part of   * the URL after the ?) should be stripped.   */  public boolean getRemoveQuery()  {    return true;  }  /**   * This method is called once the spider   * has no more work to do.   */  public void spiderComplete()  {    if ( _spider.isHalted() ) {      JOptionPane.showMessageDialog(this,        "Download of site has been canceled. " +        "Check log file for any errors.",        "Done",        JOptionPane.OK_CANCEL_OPTION,        null );    } else {      JOptionPane.showMessageDialog(this,        "Download of site is complete. " +        "Check log file for any errors.",        "Done",        JOptionPane.OK_CANCEL_OPTION,        null );    }    _spider=null;    Runnable doLater = new Runnable()    {      public void run()      {        _go.setText("GO!!");      }    };    SwingUtilities.invokeLater(doLater);  }  /**   * An event handler class, generated by Visual Cafe.   *   * @author Visual Cafe   */  class SymWindow extends java.awt.event.WindowAdapter {    public void windowClosed(java.awt.event.WindowEvent event)    {      Object object = event.getSource();      if ( object == GetSite.this )        GetSite_windowClosed(event);    }  }  /**   * Called to close the window.   *   * @param event The event.   */  void GetSite_windowClosed(java.awt.event.WindowEvent event)  {    System.exit(0);  }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -