📄 parsedhtml.java
字号:
private WebTable toWebTable(Element element) { return new WebTable(_response, _frame, element, _baseURL, _baseTarget, _characterSet); } private TextBlock toTextBlock(Element element) { return new TextBlock(_response, _frame, _baseURL, _baseTarget, element, _characterSet); } private TextBlock newTextBlock(Node textNode) { return new TextBlock(_response, _frame, _baseURL, _baseTarget, textNode, _characterSet); } private WebList toOrderedList(Element element) { return new WebList(_response, _frame, _baseURL, _baseTarget, element, _characterSet); } private void addToMaps(Node node, HTMLElement htmlElement) { _elements.put(node, htmlElement); if (htmlElement.getID() != null) { _elementsByID.put(htmlElement.getID(), htmlElement); } if (htmlElement.getName() != null) { addNamedElement(htmlElement.getName(), htmlElement); } } private void addNamedElement(String name, HTMLElement htmlElement) { List list = (List)_elementsByName.get(name); if (list == null) { _elementsByName.put(name, list = new ArrayList()); } list.add(htmlElement); } private void addToList(HTMLElement htmlElement) { ArrayList list = getListForElement(htmlElement); if (list != null) { list.add(htmlElement); } } private ArrayList getListForElement(HTMLElement element) { if (element instanceof WebLink) { return _linkList; } if (element instanceof WebForm) { return _formsList; } if (element instanceof WebImage) { return _imagesList; } if (element instanceof WebApplet) { return _appletList; } if (element instanceof WebTable) { return _tableList; } if (element instanceof WebFrame) { return _frameList; } if (element instanceof BlockElement) { return _blocksList; } return null; } /** * Returns the first link which contains the specified text. * * @param text * @return Link */ public WebLink getLinkWith(String text) { return getFirstMatchingLink(WebLink.MATCH_CONTAINED_TEXT, text); } /** * Returns the link which contains the first image with the specified text as its 'alt' * attribute. * * @param text * @return Link */ public WebLink getLinkWithImageText(String text) { WebImage image = getImageWithAltText(text); return image == null ? null : image.getLink(); } /** * Returns the link found in the page with the specified name. * * @param name * @return Link */ public WebLink getLinkWithName(String name) { return getFirstMatchingLink(WebLink.MATCH_NAME, name); } /** * Returns the first link found in the page matching the specified criteria. * * @param predicate * @param criteria * @return Link */ public WebLink getFirstMatchingLink(HTMLElementPredicate predicate, Object criteria) { WebLink[] links = getLinks(); for (int i = 0; i < links.length; i++) { if (predicate.matchesCriteria(links[i], criteria)) { return links[i]; } } return null; } /** * Returns all links found in the page matching the specified criteria. * * @param predicate * @param criteria * @return Links */ public WebLink[] getMatchingLinks(HTMLElementPredicate predicate, Object criteria) { ArrayList matches = new ArrayList(); WebLink[] links = getLinks(); for (int i = 0; i < links.length; i++) { if (predicate.matchesCriteria(links[i], criteria)) { matches.add(links[i]); } } return (WebLink[])matches.toArray(new WebLink[matches.size()]); } /** * Returns the image found in the page with the specified name. * * @param name * @return Image */ public WebImage getImageWithName(String name) { WebImage[] images = getImages(); for (int i = 0; i < images.length; i++) { if (HttpUnitUtils.matches(name, images[i].getName())) { return images[i]; } } return null; } /** * Returns the first image found in the page with the specified src attribute. * * @param source * @return Image */ public WebImage getImageWithSource(String source) { WebImage[] images = getImages(); for (int i = 0; i < images.length; i++) { if (HttpUnitUtils.matches(source, images[i].getSource())) { return images[i]; } } return null; } /** * Returns the first image found in the page with the specified alt attribute. * * @param altText * @return Image */ public WebImage getImageWithAltText(String altText) { WebImage[] images = getImages(); for (int i = 0; i < images.length; i++) { if (HttpUnitUtils.matches(altText, images[i].getAltText())) { return images[i]; } } return null; } /** * Returns the first table in the response which matches the specified predicate and value. Will * recurse into any nested tables, as needed. * * @param predicate * @param criteria * @return the selected table, or null if none is found */ public WebTable getFirstMatchingTable(HTMLElementPredicate predicate, Object criteria) { return getTableSatisfyingPredicate(getTables(), predicate, criteria); } /** * Returns the tables in the response which match the specified predicate and value. Will * recurse into any nested tables, as needed. * * @param predicate * @param criteria * @return the selected tables, or null if none are found */ public WebTable[] getMatchingTables(HTMLElementPredicate predicate, Object criteria) { return getTablesSatisfyingPredicate(getTables(), predicate, criteria); } /** * Returns the first table in the response which has the specified text as the full text of its * first non-blank row and non-blank column. Will recurse into any nested tables, as needed. * * @param text * @return the selected table, or null if none is found */ public WebTable getTableStartingWith(String text) { return getFirstMatchingTable(WebTable.MATCH_FIRST_NONBLANK_CELL, text); } /** * Returns the first table in the response which has the specified text as a prefix of the text * in its first non-blank row and non-blank column. Will recurse into any nested tables, as * needed. * * @param text * @return the selected table, or null if none is found */ public WebTable getTableStartingWithPrefix(String text) { return getFirstMatchingTable(WebTable.MATCH_FIRST_NONBLANK_CELL_PREFIX, text); } /** * Returns the first table in the response which has the specified text as its summary * attribute. Will recurse into any nested tables, as needed. * * @param summary * @return the selected table, or null if none is found */ public WebTable getTableWithSummary(String summary) { return getFirstMatchingTable(WebTable.MATCH_SUMMARY, summary); } /** * Returns the first table in the response which has the specified text as its ID attribute. * Will recurse into any nested tables, as needed. * * @param ID * @return the selected table, or null if none is found */ public WebTable getTableWithID(String ID) { return getFirstMatchingTable(WebTable.MATCH_ID, ID); } /** * Returns a copy of the domain object model associated with this page. * * @return Node */ public Node getDOM() { // JDo: see README // Better would be some read-only attribute // return getRootNode().cloneNode( /* deep */ true ); return getRootNode(); } // ---------------------------------- Object methods // -------------------------------- /** * @see java.lang.Object#toString() */ public String toString() { return _baseURL.toExternalForm() + System.getProperty("line.separator") + _rootNode; } // ---------------------------------- package members // -------------------------------- /** * Specifies the root node for this HTML fragment. */ void setRootNode(Node rootNode) { if (_rootNode != null && rootNode != _rootNode) { throw new IllegalStateException("The root node has already been defined as " + _rootNode + " and cannot be redefined as " + rootNode); } _rootNode = rootNode; _links = null; _forms = null; _images = null; _applets = null; _tables = null; _frames = null; _blocks = null; _updateElements = true; } /** * Returns the base URL for this HTML segment. */ URL getBaseURL() { return _baseURL; } WebResponse getResponse() { return _response; } /** * Returns the domain object model associated with this page, to be used internally. */ Node getOriginalDOM() { return getRootNode(); } /** * Returns the frames found in the page in the order in which they appear. * * @return Frames */ public WebFrame[] getFrames() { if (_frames == null) { loadElements(); _frames = (WebFrame[])_frameList.toArray(new WebFrame[_frameList.size()]); } return _frames; } // ---------------------------------- private members // -------------------------------- Node getRootNode() { if (_rootNode == null) { throw new IllegalStateException("The root node has not been specified"); } return _rootNode; } /** * Returns the table with the specified text in its summary attribute. */ private WebTable getTableSatisfyingPredicate(WebTable[] tables, HTMLElementPredicate predicate, Object value) { for (int i = 0; i < tables.length; i++) { if (predicate.matchesCriteria(tables[i], value)) { return tables[i]; } else { for (int j = 0; j < tables[i].getRowCount(); j++) { for (int k = 0; k < tables[i].getColumnCount(); k++) { TableCell cell = tables[i].getTableCell(j, k); if (cell != null) { WebTable[] innerTables = cell.getTables(); if (innerTables.length != 0) { WebTable result = getTableSatisfyingPredicate(innerTables, predicate, value); if (result != null) { return result; } } } } } } } return null; } /** * Returns the tables which match the specified criteria. */ private WebTable[] getTablesSatisfyingPredicate(WebTable[] tables, HTMLElementPredicate predicate, Object value) { ArrayList matches = new ArrayList(); for (int i = 0; i < tables.length; i++) { if (predicate.matchesCriteria(tables[i], value)) { matches.add(tables[i]); } for (int j = 0; j < tables[i].getRowCount(); j++) { for (int k = 0; k < tables[i].getColumnCount(); k++) { TableCell cell = tables[i].getTableCell(j, k); if (cell != null) { WebTable[] innerTables = cell.getTables(); if (innerTables.length != 0) { WebTable[] result = getTablesSatisfyingPredicate(innerTables, predicate, value); if (result != null && result.length > 0) { for (int l = 0; l < result.length; l++) { matches.add(result[l]); } } } } } } } if (matches.size() > 0) { return (WebTable[])matches.toArray(new WebTable[matches.size()]); } else { return null; } } class WebIFrame extends WebFrame implements ContentConcealer { /** * Constructor * * @param baseURL * @param frameNode * @param parentFrame */ public WebIFrame(URL baseURL, Node frameNode, FrameSelector parentFrame) { super(_response, baseURL, frameNode, parentFrame); } } class NoScriptElement extends HTMLElementBase implements ContentConcealer { /** * Constructor * * @param node */ public NoScriptElement(Node node) { super(node); } protected ScriptableDelegate newScriptable() { return null; } protected ScriptableDelegate getParentDelegate() { return null; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -