⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 webspider.java~

📁 不错的网络蜘蛛
💻 JAVA~
字号:
import javax.swing.*;import java.net.*;import java.io.*;import javax.swing.text.html.*;import java.util.*;/* * WebSpider.java * * Created on March 31, 2004, 9:01 AM *//** * * @author  mpenderg */public class WebSpider extends javax.swing.JFrame {        /** Creates new form WebSpider */    public WebSpider() {        initComponents();        setTitle("Demo Spider");        setSize(800,600);    }        /** This method is called from within the constructor to     * initialize the form.     * WARNING: Do NOT modify this code. The content of this method is     * always regenerated by the Form Editor.     */    private void initComponents() {//GEN-BEGIN:initComponents        centerPanel = new javax.swing.JScrollPane();        resultArea = new javax.swing.JTextArea();        northPanel = new javax.swing.JPanel();        urlLabel = new javax.swing.JLabel();        urlField = new javax.swing.JTextField();        browseButton = new javax.swing.JButton();        keywordLabel = new javax.swing.JLabel();        keywordField = new javax.swing.JTextField();        searchButton = new javax.swing.JButton();        errorLabel = new javax.swing.JLabel();        addWindowListener(new java.awt.event.WindowAdapter() {            public void windowClosing(java.awt.event.WindowEvent evt) {                exitForm(evt);            }        });        centerPanel.setViewportView(resultArea);        getContentPane().add(centerPanel, java.awt.BorderLayout.CENTER);        northPanel.setLayout(new java.awt.FlowLayout(java.awt.FlowLayout.LEFT));        urlLabel.setFont(new java.awt.Font("Dialog", 1, 10));        urlLabel.setText("URL: ");        northPanel.add(urlLabel);        urlField.setColumns(20);        urlField.setFont(new java.awt.Font("Dialog", 1, 10));        northPanel.add(urlField);        browseButton.setFont(new java.awt.Font("Dialog", 1, 10));        browseButton.setText("Browse");        browseButton.setMargin(new java.awt.Insets(2, 2, 2, 2));        browseButton.addActionListener(new java.awt.event.ActionListener() {            public void actionPerformed(java.awt.event.ActionEvent evt) {                browseButtonActionPerformed(evt);            }        });        northPanel.add(browseButton);        keywordLabel.setFont(new java.awt.Font("Dialog", 1, 10));        keywordLabel.setText("Keyword: ");        northPanel.add(keywordLabel);        keywordField.setColumns(15);        keywordField.setFont(new java.awt.Font("Dialog", 1, 10));        northPanel.add(keywordField);        searchButton.setFont(new java.awt.Font("Dialog", 1, 10));        searchButton.setText("Search");        searchButton.setMargin(new java.awt.Insets(2, 2, 2, 2));        searchButton.addActionListener(new java.awt.event.ActionListener() {            public void actionPerformed(java.awt.event.ActionEvent evt) {                searchButtonActionPerformed(evt);            }        });        northPanel.add(searchButton);        getContentPane().add(northPanel, java.awt.BorderLayout.NORTH);        errorLabel.setHorizontalAlignment(javax.swing.SwingConstants.LEFT);        errorLabel.setText(" ");        getContentPane().add(errorLabel, java.awt.BorderLayout.SOUTH);        pack();    }//GEN-END:initComponentspublic boolean isHref(String str){     String strlc = str.toLowerCase();     int hrefindex = strlc.indexOf("href");     if(!(strlc.startsWith("a") && hrefindex >= 0)) // is this an A HREF tag??\          return false; // no       int equalindex = str.indexOf('=',hrefindex+1);     if(equalindex < 0)         return false;  // no equal sign     return true;}public String getHrefLocation(String str){    // find equal sign        int equalindex = str.indexOf('=');    if(equalindex < 0)        return null;   //  // if the next non space character is a " or ', then location is between quotes, else its   // its between spaces   //    int locstart, locend;    for(locstart = equalindex+1; locstart < str.length()-1; locstart++)       if(str.charAt(locstart) != ' ')          break;    if(locstart >= str.length())         return null;           if(str.charAt(locstart) == '"' || str.charAt(locstart) == '\'') // did it start with a quote?   {      locend = str.indexOf(str.charAt(locstart),locstart+1); // find ending quote     locstart++; // skip over first quote   }   else // no just look for next space.      locend = str.indexOf(' ',locstart+1); // find next space   if(locend < 0)      locend = str.length();  // assume the end of the string if not found   return(str.substring(locstart,locend).trim());   }public String resolveHTTPReference(URL url, String location){    //     // determine if location is relative or absolute    //      String loclc = location.toLowerCase();    //    // is there a protocol?    //    if(!loclc.startsWith("file") && !loclc.startsWith("http") && !loclc.startsWith("ftp")&&!loclc.startsWith("www"))    {     // no, then its relative or else a fully qualified path on a local disk       if(loclc.length() > 1 && loclc.charAt(1) == ':') // check for the colon on C:, etc        location = "file:///"+location; // create a url from it       else // must be relative.  Add on this url and try that       {        String path = url.getPath();        // strip filename off path        if(path.length() > 0)        {         if(!path.endsWith("\\") && !path.endsWith("/"))         {          int lastbk = path.lastIndexOf('/');          int lastfw = path.lastIndexOf('\\');          path = path.substring(0,Math.max(lastfw,lastbk)+1);         }        }        // if location begins with a / or \, then it is relative to host, else it         // is relative to current path        if((location.startsWith("\\") || location.startsWith("/")))         location = url.getProtocol()+"://"+url.getHost()+location;  // relative top host        else         location = url.getProtocol()+"://"+url.getHost()+path+location; // relative to current page      }    }    else      if(loclc.startsWith("www"))          location = "http://"+location;             int poundindex = location.indexOf('#');     if(poundindex >= 0)        location = location.substring(0,poundindex);  // strip off anchors              return location;}public boolean urlHasBeenVisited(String urlstring){  String str = urlstring.trim().toLowerCase();  str = str.replaceAll("\\\\","/");  if(!str.endsWith("/"))    str = str+"/";    if(visited.contains(str)) // have we been here    return true; // yes, just exit        visited.add(str);    return false;}/** * recursive routine to search the web */ Vector visited = new Vector(300,10);  // keeps track of where we've been Vector matchList = new Vector(300,10); // keeps track of urls that have the keyword  private void searchWeb(String urlstr, String indent, String keyword){   if(urlHasBeenVisited(urlstr)) // have we been here      return; // yes, just exit      if(visited.size() >= 30)  // only look at 30 sites      return;  //  // update result area  //   resultArea.append(indent + urlstr+"\n");  //  // now look in the file  //  try{     boolean matchfound = false;     keyword = keyword.toLowerCase();     URL url = new URL(urlstr); // create a url     System.out.println(urlstr);     InputStream in = url.openStream();     InputStreamReader isr = new InputStreamReader(in);     StreamTokenizer stok = new StreamTokenizer(isr);     stok.resetSyntax();     stok.wordChars(' ','~');     stok.whitespaceChars('<','<');     stok.whitespaceChars('>','>');     while(stok.nextToken() != StreamTokenizer.TT_EOF)     {        if(stok.ttype != StreamTokenizer.TT_WORD)          continue;  // skip stream token if not a word token                String match = stok.sval.toLowerCase();                if(match.indexOf(keyword) >= 0)            matchfound = true;                String str = stok.sval.trim(); // dispose of leading/trailing spaces        if(!isHref(str))           continue;  // skip tag if its not an href                String location = getHrefLocation(str);        if(location == null)           continue;               System.out.println(str+" [["+location+"]]"); // output for demo purposes       location = resolveHTTPReference(url,location);       System.out.println("    resolved url = "+location);                 if(indent.length() < 10) // only go ten deep         searchWeb(location,indent+"~",keyword);      }// end while     isr.close();     //     // record url in matchlist if it had the keyword     //     if(matchfound)          matchList.add(urlstr);  } // end try   catch(MalformedURLException ex)    {     resultArea.append(indent+"Bad URL : "+urlstr+"\n");        }    catch(IOException e)    {     resultArea.append(indent+"IOException : "+e.getMessage()+"\n");       }   return;    }    private void searchButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_searchButtonActionPerformed        // Add your handling code here:        String urlstr = urlField.getText();        if(urlstr.length() ==0)           return;// // check to see if its a remote page//        String urllc = urlstr.toLowerCase();        if(!urllc.startsWith("http://") && !urllc.startsWith("ftp://") &&           !urllc.startsWith("www."))        {         urlstr = "file:///"+urlstr;   // note you must have 3 slashes !        }        else // http missing ?         if(urllc.startsWith("www."))         {           urlstr = "http://"+urlstr; // tack on http://           }                 visited.removeAllElements();         matchList.removeAllElements();         resultArea.setText("");         resultArea.setText("Searching "+urlstr+" for "+keywordField.getText()+"\n");         searchWeb(urlstr,"",keywordField.getText());         resultArea.append("\n\nMatches found @ \n");         for(int i=0; i< matchList.size(); i++)            resultArea.append(matchList.get(i).toString()+"\n");                            }//GEN-LAST:event_searchButtonActionPerformed    private void browseButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_browseButtonActionPerformed        // Add your handling code here:        JFileChooser jc = new JFileChooser();        int result = jc.showOpenDialog(this);        if(result == JFileChooser.APPROVE_OPTION)        {         File f = jc.getSelectedFile();         String path = f.getPath();         urlField.setText(path);        }    }//GEN-LAST:event_browseButtonActionPerformed        /** Exit the Application */    private void exitForm(java.awt.event.WindowEvent evt) {//GEN-FIRST:event_exitForm        System.exit(0);    }//GEN-LAST:event_exitForm        /**     * @param args the command line arguments     */    public static void main(String args[]) {        new WebSpider().show();    }            // Variables declaration - do not modify//GEN-BEGIN:variables    private javax.swing.JPanel northPanel;    private javax.swing.JButton searchButton;    private javax.swing.JScrollPane centerPanel;    private javax.swing.JButton browseButton;    private javax.swing.JTextField keywordField;    private javax.swing.JLabel errorLabel;    private javax.swing.JTextField urlField;    private javax.swing.JLabel keywordLabel;    private javax.swing.JTextArea resultArea;    private javax.swing.JLabel urlLabel;    // End of variables declaration//GEN-END:variables    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -