⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 test.java

📁 实现从某个网站的全部遍历
💻 JAVA
字号:
package com.yinyueku;

import java.sql.*;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

public class Test {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		// String htmfile = "C:\\search.htm";
		String searchUrl = "http://www.yinyueku.com";
		String strLink;
		String strSingerClassName;
		String strSingerClassUrl;
		String strSingerName;
		String strSingerUrl;

		try {
			Parser parser = new Parser(searchUrl);
			// System.out.println("Page Encoding = " + parser.getEncoding());
			parser.setEncoding("gb2312");
			HasAttributeFilter fil = new HasAttributeFilter();
			fil.setAttributeName("class");
			fil.setAttributeValue("white_14_b");

			NodeList list = parser.parse(fil);
			System.out.println(list.size());
			for (int i = 0; i < list.size(); i++) {
				System.out.println();
				Node node = list.elementAt(i);

				Music p = new Music();
				p.setParent(node);
				p.update();
				strSingerClassName = p.getstrSingerClassName();
				System.out.println(strSingerClassName);

				strSingerClassUrl = p.getstrSingerClassUrl();
				System.out.println(strSingerClassUrl);
				System.out.println(p.getstrSingerClassName() + "内容如下");

				Parser ps = new Parser(p.getstrSingerClassUrl());
				ps.setEncoding("gb2312");
				HasAttributeFilter f = new HasAttributeFilter();
				f.setAttributeName("class");
				f.setAttributeValue("listsinger");
				NodeList items = ps.extractAllNodesThatMatch(f);
				for (int j = 0; j < items.size(); j++) {
					Node n = items.elementAt(j);
					Node n1 = n.getFirstChild().getNextSibling();
					/*
					 * Music m =new Music(); m.setParent(n); m.update();
					 */
					strSingerName = n1.toPlainTextString();
					System.out.println(strSingerName);
					LinkTag linkTag = (LinkTag) n1;
					strLink = linkTag.getLink();
					strSingerUrl = strLink;
					System.out.println(strSingerUrl);
					
					try {
						Class.forName("com.microsoft.jdbc.sqlserver.SQLServerDriver");

						Connection con = DriverManager.getConnection(
										"jdbc:microsoft:sqlserver://localhost:1433;DatabaseName=chenlei",
										"sa", "12345");
						Statement stmt = con.createStatement();
						//stmt.executeUpdate("CREATE TABLE SINGERS "+ "(SINGER_NAME VARCHAR(32), SINGER_URL VARCHAR(64))");
					    String sql="Insert into SINGERS001 values('"+strSingerName+"','"+strSingerUrl+"')";
						System.out.println(sql);
						stmt.executeUpdate(sql);

						System.out.println("Insert into table ok!");
			           } catch (Exception e) {
						e.printStackTrace();
					   }
					
				}
			}

		} catch (ParserException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

	}

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -