📄 searcher.java
字号:
package com.yinyueku;
import java.sql.*;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
public class Searcher {
/**
* @param args
*/
// String searchUrl = "http://www.yinyueku.com/gequ/45.htm";
// String searchUrl;
String strSongName;
String strListenUrl;
String strZhuanji;
String strGeciUrl;
// String strGeciUrl1;
String strDownload;
// String strDownload1;
String strDownloadUrl;
// String strLingsheng;
// String strLink;
// String strSingerClassName;
// String strSingerClassUrl;
// String strSingerName;
// String strSingerUrl;
// 打印一行用来检查程序执行状况
// System.out.println("11111");
public Searcher(String url) {
try {
Parser parser = new Parser(url);
parser.setEncoding("gb2312");
HasAttributeFilter fil = new HasAttributeFilter();
fil.setAttributeName("align");
fil.setAttributeValue("left");
// 打印检查程序是否进入遍历指定网页
// System.out.println("22222");
// 将符合自定义属性的节点存取下来
NodeList list = parser.parse(fil);
System.out.println(list.size());
// 对每一个符合标准的属性进行操作
for (int i = 0; i < (list.size() - 1); i++) {
System.out.println();
Node node1 = list.elementAt((i + 1));
// if ((node1.getText().substring(0, 2)) == "TD") {
// i++;
// System.out.println(node1.getText());
// }else{
// 音乐名称
Node node11 = node1.getFirstChild().getFirstChild();
strSongName = node11.toPlainTextString();
System.out.println(strSongName);
// 试听地址
LinkTag linkTag1 = (LinkTag) node11;
strListenUrl = linkTag1.getLink();
System.out.println("试听地址:" + strListenUrl);
// 专辑名称
Node node2 = node1.getNextSibling().getNextSibling()
.getNextSibling().getNextSibling();
System.out.println(node2.getText());
// System.out.println("33333");
Node node21 = node2.getFirstChild();
strZhuanji = node21.toPlainTextString();
System.out.println("专辑名:" + strZhuanji);
// 歌词地址
Node node3 = node2.getNextSibling().getNextSibling();
// System.out.println("33333");
Node node31 = node3.getFirstChild();
LinkTag LinkTag3 = (LinkTag) node31;
// 还不是完整的地址,需要进一步进行字符串的处理
strGeciUrl = LinkTag3.getLink();
System.out.println(strGeciUrl);
if (strGeciUrl.length() > 0)
strGeciUrl = strGeciUrl.substring(strGeciUrl.indexOf("(")+1,strGeciUrl.indexOf(")"));
System.out.println("歌词地址:"
+ "http://www.yinyueku.com/user/showgeci.asp?id="
+ strGeciUrl);
// 下载页面地址
Node node4 = node3.getNextSibling().getNextSibling()
.getNextSibling().getNextSibling();
Node node41 = node4.getFirstChild();
LinkTag linkTag4 = (LinkTag) node41;
// 还不是完整的地址,需要进行字符串的处理
strDownload = linkTag4.getLink();
System.out.println(strDownload);
if (strDownload.length() > 5)
strDownload = strDownload.substring(strDownload.indexOf("(")+1,strDownload.indexOf(")"));
strDownload = "http://www.yinyueku.com/user/qq570_down.asp?id="
+ strDownload;
System.out.println("下载页面:" + strDownload);
System.out.println("本页歌曲相关信息搜索完毕");
// 进一步遍历下载页面,得到音乐文件的真正地址
/*
Parser ps = new Parser("c:\\下载地址.txt");
ps.setEncoding("gb2312");
HasAttributeFilter f = new HasAttributeFilter();
f.setAttributeName("height");
f.setAttributeValue("9%");
NodeList items = ps.parse(f);
System.out.println(items.size());
for (int j = 0; j < items.size(); j++) {
Node n = items.elementAt(j);
System.out.println(n.getText());
Node n1 = n.getFirstChild();
System.out.println(n1.getText());
LinkTag linkTag = (LinkTag)n1;
strDownloadUrl = linkTag.getLink();
System.out.println("下载地址:" + strDownloadUrl);
System.out.println("本首歌曲相关信息完毕");
}*/
// 插入数据库
/*
* try { //建立连接
* Class.forName("com.microsoft.jdbc.sqlserver.SQLServerDriver");
*
* Connection con = DriverManager.getConnection(
* "jdbc:microsoft:sqlserver://localhost:1433;DatabaseName=chenlei",
* "sa", "12345"); Statement stmt = con.createStatement();
* //stmt.executeUpdate("CREATE TABLE SINGERS "+ "(SINGER_NAME
* VARCHAR(32), SINGER_URL VARCHAR(64))"); String sql="Insert
* into SINGERS003
* values('"+strSingerName+"','"+strSingerUrl+"')";
* System.out.println(sql); stmt.executeUpdate(sql);
*
* System.out.println("Insert into table ok!"); } catch
* (Exception e) { e.printStackTrace(); } }
*/
}
// }
} catch (ParserException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try{
Parser ps = new Parser(url);
ps.setEncoding("gb2312");
HasAttributeFilter fil = new HasAttributeFilter();
fil.setAttributeName("color");
fil.setAttributeValue("#000000");
// 打印检查程序是否进入遍历指定网页
// System.out.println("22222");
// 将符合自定义属性的节点存取下来
//NodeList list = ps.parse (new TagNameFilter("td"));
NodeList list = ps.parse(fil);
System.out.println(list.size());
Node nodecenter=list.elementAt(0);
System.out.println(nodecenter.toHtml());
Node nodecenter1=nodecenter.getNextSibling();
System.out.println(nodecenter1.toHtml());
Node nodecenter2=nodecenter1.getNextSibling();
System.out.println(nodecenter2.toHtml());
Node nodecenter3=nodecenter2.getNextSibling();
System.out.println(nodecenter3.toHtml());
boolean b=nodecenter3.toHtml().endsWith("[后一页] [尾 页]");
System.out.println(b);
if(!b)
{ Node nodecenter4=nodecenter3.getNextSibling();
System.out.println(nodecenter4.toHtml());
LinkTag a=(LinkTag)nodecenter4;
System.out.println(a.getLink());
//Searcher search1 = new Searcher(a.getLink());
Searcher search1 = new Searcher(a.getLink());
System.out.println("c");
}
System.out.println("本歌手所有歌曲搜索完毕");
}catch(Exception e){
e.printStackTrace();
}
}
public static void main(String[] args) {
// TODO Auto-generated method stub
// String htmfile = "C:\\search.htm";
// String searchUrl = "c:\\45歌曲.txt";
String searchUrl = "http://www.yinyueku.com/gequ/1.htm";
Searcher search = new Searcher(searchUrl);
//String searchUrl2 = "c:\\45_2.htm";
//Searcher search2 = new Searcher(searchUrl);
/*try{
Parser ps = new Parser(searchUrl);
ps.setEncoding("gb2312");
HasAttributeFilter fil = new HasAttributeFilter();
fil.setAttributeName("color");
fil.setAttributeValue("#000000");
// 打印检查程序是否进入遍历指定网页
// System.out.println("22222");
// 将符合自定义属性的节点存取下来
//NodeList list = ps.parse (new TagNameFilter("td"));
NodeList list = ps.parse(fil);
System.out.println(list.size());
Node nodecenter=list.elementAt(0);
System.out.println(nodecenter.toHtml());
Node nodecenter1=nodecenter.getNextSibling();
System.out.println(nodecenter1.toHtml());
Node nodecenter2=nodecenter1.getNextSibling();
System.out.println(nodecenter2.toHtml());
Node nodecenter3=nodecenter2.getNextSibling();
System.out.println(nodecenter3.toHtml());
boolean b=nodecenter3.toHtml().endsWith("[后一页] [尾 页]");
System.out.println(b);
if(!b)
{ Node nodecenter4=nodecenter3.getNextSibling();
System.out.println(nodecenter4.toHtml());
LinkTag a=(LinkTag)nodecenter4;
System.out.println(a.getLink());
//Searcher search1 = new Searcher(a.getLink());
Searcher search1 = new Searcher(a.getLink());
System.out.println("c");}
//Node nodecenter1=nodecenter.getLastChild();
//System.out.println(nodecenter1.toHtml());
//Node nodecenter2=nodecenter1.getNextSibling().getNextSibling().getNextSibling().getNextSibling();
System.out.println("完毕");
}catch(Exception e){
e.printStackTrace();
}*/
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -