tablecontentbyhtmlparse.java
来自「ajax lucene 部分源代码 HTMLParser.java Muil」· Java 代码 · 共 64 行
JAVA
64 行
package test;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.List;
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.HTMLElementName;
import net.htmlparser.jericho.Source;
/**
* 1.使用htmlparser.jericho方法来实现 2.通过列和行来定位表格元素。获取表格元素 3.可以获取所有的表格。
* 4.对于表格嵌套的话,可能还需要做相应更改
*
* @author Administrator
*
*/
public class TableContentByHtmlParse {
public Element getTableContent(Element element, int rows, int cols) {
Element resultElement = null;
List<Element> trList = element.findAllElements(HTMLElementName.TR);
if (rows < trList.size()) {
Element trElement = trList.get(rows);
List<Element> tdList = trElement
.findAllElements(HTMLElementName.TD);
if (cols < tdList.size()) {
// Element tdElement=tdList.get(cols);
resultElement = tdList.get(cols);
System.out.println(resultElement.getContent().toString());
}
}
return resultElement;
}
public static void test(String url) {
Source source = null;
try {
source = new Source(new URL(url));
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
List<Element> elementList = source
.findAllElements(HTMLElementName.TABLE);
for (Element element : elementList) {
Element currentElement = new TableContentByHtmlParse()
.getTableContent(element, 2, 2);
if (currentElement != null)
System.out.println(currentElement.getContent().toString());
}
}
public static void main(String[] args) {
test("http://www.akae.cn/study/resourceShareC.html");
}
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?