📄 tablecontentbyhtmlparse.java
字号:
package test;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.List;
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.HTMLElementName;
import net.htmlparser.jericho.Source;
/**
* 1.使用htmlparser.jericho方法来实现 2.通过列和行来定位表格元素。获取表格元素 3.可以获取所有的表格。
* 4.对于表格嵌套的话,可能还需要做相应更改
*
* @author Administrator
*
*/
public class TableContentByHtmlParse {
public Element getTableContent(Element element, int rows, int cols) {
Element resultElement = null;
List<Element> trList = element.findAllElements(HTMLElementName.TR);
if (rows < trList.size()) {
Element trElement = trList.get(rows);
List<Element> tdList = trElement
.findAllElements(HTMLElementName.TD);
if (cols < tdList.size()) {
// Element tdElement=tdList.get(cols);
resultElement = tdList.get(cols);
System.out.println(resultElement.getContent().toString());
}
}
return resultElement;
}
public static void test(String url) {
Source source = null;
try {
source = new Source(new URL(url));
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
List<Element> elementList = source
.findAllElements(HTMLElementName.TABLE);
for (Element element : elementList) {
Element currentElement = new TableContentByHtmlParse()
.getTableContent(element, 2, 2);
if (currentElement != null)
System.out.println(currentElement.getContent().toString());
}
}
public static void main(String[] args) {
test("http://www.akae.cn/study/resourceShareC.html");
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -