📄 htmlparsertest.java
字号:
package demo;import java.io.BufferedReader;import java.io.File;import java.io.FileReader;import java.util.ArrayList;import java.util.List;import org.htmlparser.Node;import org.htmlparser.NodeFilter;import org.htmlparser.Parser;import org.htmlparser.filters.AndFilter;import org.htmlparser.filters.HasAttributeFilter;import org.htmlparser.filters.HasChildFilter;import org.htmlparser.filters.NodeClassFilter;import org.htmlparser.filters.OrFilter;import org.htmlparser.filters.TagNameFilter;import org.htmlparser.nodes.TextNode;import org.htmlparser.tags.CompositeTag;import org.htmlparser.tags.LinkTag;import org.htmlparser.tags.TableTag;import org.htmlparser.tags.TitleTag;import org.htmlparser.util.NodeList;import org.htmlparser.util.ParserException;import org.htmlparser.visitors.HtmlPage;import org.htmlparser.visitors.ObjectFindingVisitor;public class HTMLParserTest { public static void main(String args[]) throws Exception { String path = "D:\\ticket\\demo_ liyidan_report.htm"; StringBuffer sbStr = new StringBuffer(); BufferedReader reader = new BufferedReader(new FileReader(new File(path))); String temp = ""; while((temp=reader.readLine())!=null) { sbStr.append(temp); sbStr.append("\r\n"); } reader.close(); String result = sbStr.toString(); //System.out.println(sbStr); //readAll(result); //readTextAndLink(result); // readByHtml(result); // readTextAndTitle(result); //readTable(result,"sessionline"); test7(result); } private static void test6(String resource) throws Exception { Parser myParser = new Parser(resource); myParser.setEncoding("GBK"); String filtertable = "table"; NodeFilter textfilter = new TagNameFilter(filtertable); NodeList textnodeList = myParser.extractAllNodesThatMatch(textfilter); for(int i=0;i<textnodeList.size();i++){ TableTag tabletag = (TableTag) textnodeList.elementAt(i); Node node=tabletag.getFirstChild(); NodeFilter childfilter=new HasChildFilter(); String line=tabletag.toHtml(); int linkstart=line.indexOf("href="); int linkend=line.indexOf("target"); String link=line.substring(linkstart+6,linkend-2); System.out.println(line); System.out.println("=============="); System.out.println(link); System.out.println("=============="); } } private static void test7(String result){ try { Parser parser = new Parser(result); parser.setEncoding("gbk"); TagNameFilter filter = new TagNameFilter("tr"); // width='600' border='0' align='center' cellpadding='0' // cellspacing='1' bgcolor='336699'> HasAttributeFilter tableAttribute2 = new HasAttributeFilter("class", "sessionline"); // HasAttributeFilter tableAttribute3 = new HasAttributeFilter("bgcolor", "#FFFFFF"); AndFilter anFilter = new AndFilter(); anFilter.setPredicates(new NodeFilter[] { filter, tableAttribute2}); NodeList nl = parser.extractAllNodesThatMatch(anFilter); for (int i = 0; i < nl.size(); i++) { NodeList nl2 = new NodeList(); nl.elementAt(i).collectInto(nl2, new TagNameFilter("td")); String nl2_1 = nl2.elementAt(1).toPlainTextString(); String nl2_2 = nl2.elementAt(2).toPlainTextString(); String nl2_3 = nl2.elementAt(3).toPlainTextString(); String nl2_4 = nl2.elementAt(4).toPlainTextString(); // Fund fund = new Fund(); //fund.setName(nl2.elementAt(1).toPlainTextString()); //fund.setCode(nl2.elementAt(2).toPlainTextString()); //fund.setCompany(nl2.elementAt(5).toPlainTextString()); //fund.setManager(nl2.elementAt(6).toPlainTextString()); //fund.setScale(new BigDecimal(nl2.elementAt(7).toPlainTextString())); // fundDao.persistFund(fund); System.out.println(nl2_1+" "+nl2_2+" "+nl2_3+" "+nl2_4); } } catch (ParserException e) { e.printStackTrace(); } } //鎸夐〉闈㈡柟寮忓
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -