📄 testhtmlparserlink.java
字号:
package org.htmlparser.tests.mytest;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.StringFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.visitors.HtmlPage;
public class TestHTMLParserLink {
/**
* @param args
*/
public static void main(String[] args) {
try{
String sCurrentLine;
String sTotalString="";
java.io.InputStream l_urlStream;
java.net.URL l_url = new java.net.URL("http://www.chinahr66.com");
java.net.HttpURLConnection l_connection = (java.net.HttpURLConnection) l_url.openConnection();
l_connection.connect();
l_urlStream = l_connection.getInputStream();
java.io.BufferedReader l_reader = new java.io.BufferedReader(new java.io.InputStreamReader(l_urlStream));
while ((sCurrentLine = l_reader.readLine()) != null) {
sTotalString += sCurrentLine+"\r\n";
}
String htmlcode = sTotalString;
Parser parser = Parser.createParser(htmlcode, "GBK");
HtmlPage page = new HtmlPage(parser);
try
{
parser.visitAllNodesWith(page);}
catch (ParserException e1)
{ e1 = null;}
// TableTag[] tt = page.getTables();
// for (int i=0;i<tt.length;i++){
// System.out.println(tt[0].getStringText());
// }
NodeList nodelist = page.getBody();
NodeFilter filter = new StringFilter("a href=\"Apply_com_view.asp");
nodelist = nodelist.extractAllNodesThatMatch(filter, true);
for (int i = 0; i < nodelist.size(); i++)
{
LinkTag link=(LinkTag) nodelist.elementAt(i);
System.out.println(link.getAttribute("href") + "\n");
}
}catch(Exception e){
e.printStackTrace();
}
}
/**
* 获得链接
*
*/
public void getLink(){
try{
String sCurrentLine;
String sTotalString="";
java.io.InputStream l_urlStream;
java.net.URL l_url = new java.net.URL("http://www.chinahr66.com");
java.net.HttpURLConnection l_connection = (java.net.HttpURLConnection) l_url.openConnection();
l_connection.connect();
l_urlStream = l_connection.getInputStream();
java.io.BufferedReader l_reader = new java.io.BufferedReader(new java.io.InputStreamReader(l_urlStream));
while ((sCurrentLine = l_reader.readLine()) != null) {
sTotalString += sCurrentLine+"\r\n";
}
String htmlcode = sTotalString;
Parser parser = Parser.createParser(htmlcode, "GBK");
HtmlPage page = new HtmlPage(parser);
try
{
parser.visitAllNodesWith(page);}
catch (ParserException e1)
{ e1 = null;}
NodeList nodelist = page.getBody();
NodeFilter filter = new TagNameFilter("A");
nodelist = nodelist.extractAllNodesThatMatch(filter, true);
for (int i = 0; i < nodelist.size(); i++)
{
LinkTag link=(LinkTag) nodelist.elementAt(i);
System.out.println(link.getAttribute("href") + "\n");
}
}catch(Exception e){
e.printStackTrace();
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -