📄 searchcrawler.java
字号:
import java.awt.*;
import java.awt.event.*;
import java.io.*;
import java.net.*;
import java.util.*;
import java.util.regex.*;
import javax.swing.*;
import javax.swing.table.*;
// The Search Web Crawler
public class SearchCrawler extends JFrame
{
// Max URLs drop down values.
private static final String[] MAX_URLS =
{"50", "100", "500", "1000"};
// Cache of robot disallow lists.
private HashMap disallowListCache = new HashMap();
// Search GUI controls.
private JTextField startTextField;
private JComboBox maxComboBox;
private JCheckBox limitCheckBox;
private JTextField logTextField;
private JTextField searchTextField;
private JCheckBox caseCheckBox;
private JButton searchButton;
// Search stats GUI controls.
private JLabel crawlingLabel2;
private JLabel crawledLabel2;
private JLabel toCrawlLabel2;
private JProgressBar progressBar;
private JLabel matchesLabel2;
// Table listing search matches.
private JTable table;
// Flag for whether or not crawling is underway.
private boolean crawling;
// Matches log file print writer.
private PrintWriter logFileWriter;
// Constructor for Search Web Crawler.
public SearchCrawler()
{
// Set application title.
setTitle("Search Crawler");
// Set window size.
setSize(600, 600);
// Handle window closing events.
addWindowListener(new WindowAdapter() {
public void windowClosing(WindowEvent e) {
actionExit();
}
});
// Set up file menu.
JMenuBar menuBar = new JMenuBar();
JMenu fileMenu = new JMenu("File");
fileMenu.setMnemonic(KeyEvent.VK_F);
JMenuItem fileExitMenuItem = new JMenuItem("Exit",
KeyEvent.VK_X);
fileExitMenuItem.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
actionExit();
}
});
fileMenu.add(fileExitMenuItem);
menuBar.add(fileMenu);
setJMenuBar(menuBar);
// Set up search panel.
JPanel searchPanel = new JPanel();
GridBagConstraints constraints;
GridBagLayout layout = new GridBagLayout();
searchPanel.setLayout(layout);
JLabel startLabel = new JLabel("Start URL:");
constraints = new GridBagConstraints();
constraints.anchor = GridBagConstraints.EAST;
constraints.insets = new Insets(5, 5, 0, 0);
layout.setConstraints(startLabel, constraints);
searchPanel.add(startLabel);
startTextField = new JTextField();
constraints = new GridBagConstraints();
constraints.fill = GridBagConstraints.HORIZONTAL;
constraints.gridwidth = GridBagConstraints.REMAINDER;
constraints.insets = new Insets(5, 5, 0, 5);
layout.setConstraints(startTextField, constraints);
searchPanel.add(startTextField);
JLabel maxLabel = new JLabel("Max URLs to Crawl:");
constraints = new GridBagConstraints();
constraints.anchor = GridBagConstraints.EAST;
constraints.insets = new Insets(5, 5, 0, 0);
layout.setConstraints(maxLabel, constraints);
searchPanel.add(maxLabel);
maxComboBox = new JComboBox(MAX_URLS);
maxComboBox.setEditable(true);
constraints = new GridBagConstraints();
constraints.insets = new Insets(5, 5, 0, 0);
layout.setConstraints(maxComboBox, constraints);
searchPanel.add(maxComboBox);
limitCheckBox =
new JCheckBox("Limit crawling to Start URL site");
constraints = new GridBagConstraints();
constraints.anchor = GridBagConstraints.WEST;
constraints.insets = new Insets(0, 10, 0, 0);
layout.setConstraints(limitCheckBox, constraints);
searchPanel.add(limitCheckBox);
JLabel blankLabel = new JLabel();
constraints = new GridBagConstraints();
constraints.gridwidth = GridBagConstraints.REMAINDER;
layout.setConstraints(blankLabel, constraints);
searchPanel.add(blankLabel);
JLabel logLabel = new JLabel("Matches Log File:");
constraints = new GridBagConstraints();
constraints.anchor = GridBagConstraints.EAST;
constraints.insets = new Insets(5, 5, 0, 0);
layout.setConstraints(logLabel, constraints);
searchPanel.add(logLabel);
String file =
System.getProperty("user.dir") +
System.getProperty("file.separator") +
"crawler.log";
logTextField = new JTextField(file);
constraints = new GridBagConstraints();
constraints.fill = GridBagConstraints.HORIZONTAL;
constraints.gridwidth = GridBagConstraints.REMAINDER;
constraints.insets = new Insets(5, 5, 0, 5);
layout.setConstraints(logTextField, constraints);
searchPanel.add(logTextField);
JLabel searchLabel = new JLabel("Search String:");
constraints = new GridBagConstraints();
constraints.anchor = GridBagConstraints.EAST;
constraints.insets = new Insets(5, 5, 0, 0);
layout.setConstraints(searchLabel, constraints);
searchPanel.add(searchLabel);
searchTextField = new JTextField();
constraints = new GridBagConstraints();
constraints.fill = GridBagConstraints.HORIZONTAL;
constraints.insets = new Insets(5, 5, 0, 0);
constraints.gridwidth= 2;
constraints.weightx = 1.0d;
layout.setConstraints(searchTextField, constraints);
searchPanel.add(searchTextField);
caseCheckBox = new JCheckBox("Case Sensitive");
constraints = new GridBagConstraints();
constraints.insets = new Insets(5, 5, 0, 5);
constraints.gridwidth = GridBagConstraints.REMAINDER;
layout.setConstraints(caseCheckBox, constraints);
searchPanel.add(caseCheckBox);
searchButton = new JButton("Search");
searchButton.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
actionSearch();
}
});
constraints = new GridBagConstraints();
constraints.gridwidth = GridBagConstraints.REMAINDER;
constraints.insets = new Insets(5, 5, 5, 5);
layout.setConstraints(searchButton, constraints);
searchPanel.add(searchButton);
JSeparator separator = new JSeparator();
constraints = new GridBagConstraints();
constraints.fill = GridBagConstraints.HORIZONTAL;
constraints.gridwidth = GridBagConstraints.REMAINDER;
constraints.insets = new Insets(5, 5, 5, 5);
layout.setConstraints(separator, constraints);
searchPanel.add(separator);
JLabel crawlingLabel1 = new JLabel("Crawling:");
constraints = new GridBagConstraints();
constraints.anchor = GridBagConstraints.EAST;
constraints.insets = new Insets(5, 5, 0, 0);
layout.setConstraints(crawlingLabel1, constraints);
searchPanel.add(crawlingLabel1);
crawlingLabel2 = new JLabel();
crawlingLabel2.setFont(
crawlingLabel2.getFont().deriveFont(Font.PLAIN));
constraints = new GridBagConstraints();
constraints.fill = GridBagConstraints.HORIZONTAL;
constraints.gridwidth = GridBagConstraints.REMAINDER;
constraints.insets = new Insets(5, 5, 0, 5);
layout.setConstraints(crawlingLabel2, constraints);
searchPanel.add(crawlingLabel2);
JLabel crawledLabel1 = new JLabel("Crawled URLs:");
constraints = new GridBagConstraints();
constraints.anchor = GridBagConstraints.EAST;
constraints.insets = new Insets(5, 5, 0, 0);
layout.setConstraints(crawledLabel1, constraints);
searchPanel.add(crawledLabel1);
crawledLabel2 = new JLabel();
crawledLabel2.setFont(
crawledLabel2.getFont().deriveFont(Font.PLAIN));
constraints = new GridBagConstraints();
constraints.fill = GridBagConstraints.HORIZONTAL;
constraints.gridwidth = GridBagConstraints.REMAINDER;
constraints.insets = new Insets(5, 5, 0, 5);
layout.setConstraints(crawledLabel2, constraints);
searchPanel.add(crawledLabel2);
JLabel toCrawlLabel1 = new JLabel("URLs to Crawl:");
constraints = new GridBagConstraints();
constraints.anchor = GridBagConstraints.EAST;
constraints.insets = new Insets(5, 5, 0, 0);
layout.setConstraints(toCrawlLabel1, constraints);
searchPanel.add(toCrawlLabel1);
toCrawlLabel2 = new JLabel();
toCrawlLabel2.setFont(
toCrawlLabel2.getFont().deriveFont(Font.PLAIN));
constraints = new GridBagConstraints();
constraints.fill = GridBagConstraints.HORIZONTAL;
constraints.gridwidth = GridBagConstraints.REMAINDER;
constraints.insets = new Insets(5, 5, 0, 5);
layout.setConstraints(toCrawlLabel2, constraints);
searchPanel.add(toCrawlLabel2);
JLabel progressLabel = new JLabel("Crawling Progress:");
constraints = new GridBagConstraints();
constraints.anchor = GridBagConstraints.EAST;
constraints.insets = new Insets(5, 5, 0, 0);
layout.setConstraints(progressLabel, constraints);
searchPanel.add(progressLabel);
progressBar = new JProgressBar();
progressBar.setMinimum(0);
progressBar.setStringPainted(true);
constraints = new GridBagConstraints();
constraints.fill = GridBagConstraints.HORIZONTAL;
constraints.gridwidth = GridBagConstraints.REMAINDER;
constraints.insets = new Insets(5, 5, 0, 5);
layout.setConstraints(progressBar, constraints);
searchPanel.add(progressBar);
JLabel matchesLabel1 = new JLabel("Search Matches:");
constraints = new GridBagConstraints();
constraints.anchor = GridBagConstraints.EAST;
constraints.insets = new Insets(5, 5, 10, 0);
layout.setConstraints(matchesLabel1, constraints);
searchPanel.add(matchesLabel1);
matchesLabel2 = new JLabel();
matchesLabel2.setFont(
matchesLabel2.getFont().deriveFont(Font.PLAIN));
constraints = new GridBagConstraints();
constraints.fill = GridBagConstraints.HORIZONTAL;
constraints.gridwidth = GridBagConstraints.REMAINDER;
constraints.insets = new Insets(5, 5, 10, 5);
layout.setConstraints(matchesLabel2, constraints);
searchPanel.add(matchesLabel2);
// Set up matches table.
table =
new JTable(new DefaultTableModel(new Object[][]{},
new String[]{"URL"}) {
public boolean isCellEditable(int row, int column)
{
return false;
}
});
// Set up matches panel.
JPanel matchesPanel = new JPanel();
matchesPanel.setBorder(
BorderFactory.createTitledBorder("Matches"));
matchesPanel.setLayout(new BorderLayout());
matchesPanel.add(new JScrollPane(table),
BorderLayout.CENTER);
// Add panels to display.
getContentPane().setLayout(new BorderLayout());
getContentPane().add(searchPanel, BorderLayout.NORTH);
getContentPane().add(matchesPanel, BorderLayout.CENTER);
}
// Exit this program.
private void actionExit() {
System.exit(0);
}
// Handle search/stop button being clicked.
private void actionSearch() {
// If stop button clicked, turn crawling flag off.
if (crawling) {
crawling = false;
return;
}
ArrayList errorList = new ArrayList();
// Validate that start URL has been entered.
String startUrl = startTextField.getText().trim();
if (startUrl.length() < 1) {
errorList.add("Missing Start URL.");
}
// Verify start URL.
else if (verifyUrl(startUrl) == null) {
errorList.add("Invalid Start URL.");
}
// Validate that max URLs is either empty or is a number.
int maxUrls = 0;
String max = ((String) maxComboBox.getSelectedItem()).trim();
if (max.length() > 0) {
try {
maxUrls = Integer.parseInt(max);
} catch (NumberFormatException e) {
}
if (maxUrls < 1) {
errorList.add("Invalid Max URLs value.");
}
}
// Validate that matches log file has been entered.
String logFile = logTextField.getText().trim();
if (logFile.length() < 1) {
errorList.add("Missing Matches Log File.");
}
// Validate that search string has been entered.
String searchString = searchTextField.getText().trim();
if (searchString.length() < 1) {
errorList.add("Missing Search String.");
}
// Show errors, if any, and return.
if (errorList.size() > 0) {
StringBuffer message = new StringBuffer();
// Concatenate errors into single message.
for (int i = 0; i < errorList.size(); i++) {
message.append(errorList.get(i));
if (i + 1 < errorList.size()) {
message.append("\n");
}
}
showError(message.toString());
return;
}
// Remove "www" from start URL if present.
startUrl = removeWwwFromUrl(startUrl);
// Start the search crawler.
search(logFile, startUrl, maxUrls, searchString);
}
private void search(final String logFile, final String startUrl,
final int maxUrls, final String searchString)
{
// Start the search in a new thread.
Thread thread = new Thread(new Runnable() {
public void run() {
// Show hour glass cursor while crawling is under way.
setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
// Disable search controls.
startTextField.setEnabled(false);
maxComboBox.setEnabled(false);
limitCheckBox.setEnabled(false);
logTextField.setEnabled(false);
searchTextField.setEnabled(false);
caseCheckBox.setEnabled(false);
// Switch search button to "Stop."
searchButton.setText("Stop");
// Reset stats.
table.setModel(new DefaultTableModel(new Object[][]{},
new String[]{"URL"}) {
public boolean isCellEditable(int row, int column)
{
return false;
}
});
updateStats(startUrl, 0, 0, maxUrls);
// Open matches log file.
try {
logFileWriter = new PrintWriter(new FileWriter(logFile));
} catch (Exception e) {
showError("Unable to open matches log file.");
return;
}
// Turn crawling flag on.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -