e1017. getting the links in an html document.txt
来自「这里面包含了一百多个JAVA源文件」· 文本 代码 · 共 41 行
TXT
41 行
// This method takes a URI which can be either a filename (e.g. file://c:/dir/file.html)
// or a URL (e.g. http://host.com/page.html) and returns all HREF links in the document.
public static String[] getLinks(String uriStr) {
List result = new ArrayList();
try {
// Create a reader on the HTML content
URL url = new URI(uriStr).toURL();
URLConnection conn = url.openConnection();
Reader rd = new InputStreamReader(conn.getInputStream());
// Parse the HTML
EditorKit kit = new HTMLEditorKit();
HTMLDocument doc = (HTMLDocument)kit.createDefaultDocument();
kit.read(rd, doc, 0);
// Find all the A elements in the HTML document
HTMLDocument.Iterator it = doc.getIterator(HTML.Tag.A);
while (it.isValid()) {
SimpleAttributeSet s = (SimpleAttributeSet)it.getAttributes();
String link = (String)s.getAttribute(HTML.Attribute.HREF);
if (link != null) {
// Add the link to the result list
result.add(link);
}
it.next();
}
} catch (MalformedURLException e) {
} catch (URISyntaxException e) {
} catch (BadLocationException e) {
} catch (IOException e) {
}
// Return all found links
return (String[])result.toArray(new String[result.size()]);
}
Related Examples
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?