📄 parsedhtml.java
字号:
private boolean isWebLink( Node node ) { return (node.getAttributes().getNamedItem( "href" ) != null); } private WebImage toWebImage( Element child ) { return new WebImage( _response, this, _baseURL, child, _frame, _baseTarget, _characterSet ); } private WebApplet toWebApplet( Element element ) { return new WebApplet( _response, element, _baseTarget ); } private WebTable toWebTable( Element element ) { return new WebTable( _response, _frame, element, _baseURL, _baseTarget, _characterSet ); } private TextBlock toTextBlock( Element element ) { return new TextBlock( _response, _frame, _baseURL, _baseTarget, element, _characterSet ); } private TextBlock newTextBlock( Node textNode ) { return new TextBlock( _response, _frame, _baseURL, _baseTarget, textNode, _characterSet ); } private WebList toOrderedList( Element element ) { return new WebList( _response, _frame, _baseURL, _baseTarget, element, _characterSet ); } private void addToMaps( Node node, HTMLElement htmlElement ) { _elements.put( node, htmlElement ); if (htmlElement.getID() != null) _elementsByID.put( htmlElement.getID(), htmlElement ); if (htmlElement.getName() != null) addNamedElement( htmlElement.getName(), htmlElement ); } private void addNamedElement( String name, HTMLElement htmlElement ) { List list = (List) _elementsByName.get( name ); if (list == null) _elementsByName.put( name, list = new ArrayList() ); list.add( htmlElement ); } private void addToList( HTMLElement htmlElement ) { ArrayList list = getListForElement( htmlElement ); if (list != null) list.add( htmlElement ); } private ArrayList getListForElement( HTMLElement element ) { if (element instanceof WebLink) return _linkList; if (element instanceof WebForm) return _formsList; if (element instanceof WebImage) return _imagesList; if (element instanceof WebApplet) return _appletList; if (element instanceof WebTable) return _tableList; if (element instanceof WebFrame) return _frameList; if (element instanceof BlockElement) return _blocksList; return null; } /** * Returns the first link which contains the specified text. **/ public WebLink getLinkWith( String text ) { return getFirstMatchingLink( WebLink.MATCH_CONTAINED_TEXT, text ); } /** * Returns the link which contains the first image with the specified text as its 'alt' attribute. **/ public WebLink getLinkWithImageText( String text ) { WebImage image = getImageWithAltText( text ); return image == null ? null : image.getLink(); } /** * Returns the link found in the page with the specified name. **/ public WebLink getLinkWithName( String name ) { return getFirstMatchingLink( WebLink.MATCH_NAME, name ); } /** * Returns the first link found in the page matching the specified criteria. **/ public WebLink getFirstMatchingLink( HTMLElementPredicate predicate, Object criteria ) { WebLink[] links = getLinks(); for (int i = 0; i < links.length; i++) { if (predicate.matchesCriteria( links[i], criteria )) return links[i]; } return null; } /** * Returns all links found in the page matching the specified criteria. **/ public WebLink[] getMatchingLinks( HTMLElementPredicate predicate, Object criteria ) { ArrayList matches = new ArrayList(); WebLink[] links = getLinks(); for (int i = 0; i < links.length; i++) { if (predicate.matchesCriteria( links[i], criteria )) matches.add( links[i] ); } return (WebLink[]) matches.toArray( new WebLink[ matches.size() ] ); } /** * Returns the image found in the page with the specified name. **/ public WebImage getImageWithName( String name ) { WebImage[] images = getImages(); for (int i = 0; i < images.length; i++) { if (HttpUnitUtils.matches( name, images[i].getName() )) return images[i]; } return null; } /** * Returns the first image found in the page with the specified src attribute. **/ public WebImage getImageWithSource( String source ) { WebImage[] images = getImages(); for (int i = 0; i < images.length; i++) { if (HttpUnitUtils.matches( source, images[i].getSource() )) return images[i]; } return null; } /** * Returns the first image found in the page with the specified alt attribute. **/ public WebImage getImageWithAltText( String altText ) { WebImage[] images = getImages(); for (int i = 0; i < images.length; i++) { if (HttpUnitUtils.matches( altText, images[i].getAltText() )) return images[i]; } return null; } /** * Returns the first table in the response which matches the specified predicate and value. * Will recurse into any nested tables, as needed. * @return the selected table, or null if none is found **/ public WebTable getFirstMatchingTable( HTMLElementPredicate predicate, Object criteria ) { return getTableSatisfyingPredicate( getTables(), predicate, criteria ); } /** * Returns the tables in the response which match the specified predicate and value. * Will recurse into any nested tables, as needed. * @return the selected tables, or null if none are found **/ public WebTable[] getMatchingTables( HTMLElementPredicate predicate, Object criteria ) { return getTablesSatisfyingPredicate( getTables(), predicate, criteria ); } /** * Returns the first table in the response which has the specified text as the full text of * its first non-blank row and non-blank column. Will recurse into any nested tables, as needed. * @return the selected table, or null if none is found **/ public WebTable getTableStartingWith( String text ) { return getFirstMatchingTable( WebTable.MATCH_FIRST_NONBLANK_CELL, text ); } /** * Returns the first table in the response which has the specified text as a prefix of the text * in its first non-blank row and non-blank column. Will recurse into any nested tables, as needed. * @return the selected table, or null if none is found **/ public WebTable getTableStartingWithPrefix( String text ) { return getFirstMatchingTable( WebTable.MATCH_FIRST_NONBLANK_CELL_PREFIX, text ); } /** * Returns the first table in the response which has the specified text as its summary attribute. * Will recurse into any nested tables, as needed. * @return the selected table, or null if none is found **/ public WebTable getTableWithSummary( String summary ) { return getFirstMatchingTable( WebTable.MATCH_SUMMARY, summary ); } /** * Returns the first table in the response which has the specified text as its ID attribute. * Will recurse into any nested tables, as needed. * @return the selected table, or null if none is found **/ public WebTable getTableWithID( String ID ) { return getFirstMatchingTable( WebTable.MATCH_ID, ID ); } /** * Returns a copy of the domain object model associated with this page. **/ public Node getDOM() { return getRootNode().cloneNode( /* deep */ true ); }//---------------------------------- Object methods -------------------------------- public String toString() { return _baseURL.toExternalForm() + System.getProperty( "line.separator" ) + _rootNode; }//---------------------------------- package members -------------------------------- /** * Specifies the root node for this HTML fragment. */ void setRootNode( Node rootNode ) { if (_rootNode != null && rootNode != _rootNode ) throw new IllegalStateException( "The root node has already been defined as " + _rootNode + " and cannot be redefined as " + rootNode ); _rootNode = rootNode; _links = null; _forms = null; _images = null; _applets = null; _tables = null; _frames = null; _blocks = null; _updateElements = true; } /** * Returns the base URL for this HTML segment. **/ URL getBaseURL() { return _baseURL; } WebResponse getResponse() { return _response; } /** * Returns the domain object model associated with this page, to be used internally. **/ Node getOriginalDOM() { return getRootNode(); } /** * Returns the frames found in the page in the order in which they appear. **/ public WebFrame[] getFrames() { if (_frames == null) { loadElements(); _frames = (WebFrame[]) _frameList.toArray( new WebFrame[ _frameList.size() ] ); } return _frames; }//---------------------------------- private members -------------------------------- Node getRootNode() { if (_rootNode == null) throw new IllegalStateException( "The root node has not been specified" ); return _rootNode; } /** * Returns the table with the specified text in its summary attribute. **/ private WebTable getTableSatisfyingPredicate( WebTable[] tables, HTMLElementPredicate predicate, Object value ) { for (int i = 0; i < tables.length; i++) { if (predicate.matchesCriteria( tables[i], value )) { return tables[i]; } else { for (int j = 0; j < tables[i].getRowCount(); j++) { for (int k = 0; k < tables[i].getColumnCount(); k++) { TableCell cell = tables[i].getTableCell(j,k); if (cell != null) { WebTable[] innerTables = cell.getTables(); if (innerTables.length != 0) { WebTable result = getTableSatisfyingPredicate( innerTables, predicate, value ); if (result != null) return result; } } } } } } return null; } /** * Returns the tables which match the specified criteria. **/ private WebTable[] getTablesSatisfyingPredicate(WebTable[] tables, HTMLElementPredicate predicate, Object value) { ArrayList matches = new ArrayList(); for (int i = 0; i < tables.length; i++) { if (predicate.matchesCriteria(tables[i], value)) { matches.add(tables[i]); } for (int j = 0; j < tables[i].getRowCount(); j++) { for (int k = 0; k < tables[i].getColumnCount(); k++) { TableCell cell = tables[i].getTableCell(j, k); if (cell != null) { WebTable[] innerTables = cell.getTables(); if (innerTables.length != 0) { WebTable[] result = getTablesSatisfyingPredicate(innerTables, predicate, value); if (result != null && result.length > 0) { for (int l = 0; l < result.length; l++) { matches.add(result[l]); } } } } } } } if(matches.size() > 0) { return (WebTable[]) matches.toArray( new WebTable[ matches.size() ] ); } else { return null; } } class WebIFrame extends WebFrame implements ContentConcealer { public WebIFrame( URL baseURL, Node frameNode, FrameSelector parentFrame ) { super( _response, baseURL, frameNode, parentFrame ); } } class NoScriptElement extends HTMLElementBase implements ContentConcealer { public NoScriptElement( Node node ) { super( node ); } protected ScriptableDelegate newScriptable() { return null; } protected ScriptableDelegate getParentDelegate() { return null; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -