📄 parsedhtml.java
字号:
package com.meterware.httpunit;/********************************************************************************************************************* $Id: ParsedHTML.java,v 1.63 2006/03/09 01:52:28 russgold Exp $** Copyright (c) 2000-2004, Russell Gold** Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated* documentation files (the "Software"), to deal in the Software without restriction, including without limitation* the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and* to permit persons to whom the Software is furnished to do so, subject to the following conditions:** The above copyright notice and this permission notice shall be included in all copies or substantial portions* of the Software.** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO* THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER* DEALINGS IN THE SOFTWARE.********************************************************************************************************************/import org.w3c.dom.Element;import org.w3c.dom.Node;import org.w3c.dom.NodeList;import org.w3c.dom.Document;import java.net.URL;import java.util.*;import java.io.IOException;import com.meterware.httpunit.scripting.ScriptableDelegate;/** * @author <a href="mailto:russgold@httpunit.org">Russell Gold</a> * @author <a href="mailto:bx@bigfoot.com">Benoit Xhenseval</a> **/class ParsedHTML { final static private HTMLElement[] NO_ELEMENTS = new HTMLElement[0]; final static private String[] TEXT_ELEMENTS = { "p", "h1", "h2", "h3", "h4", "h5", "h6" }; private Node _rootNode; private URL _baseURL; private FrameSelector _frame; private String _baseTarget; private String _characterSet; private WebResponse _response; private boolean _updateElements = true; private boolean _enableNoScriptNodes; /** map of element IDs to elements. **/ private HashMap _elementsByID = new HashMap(); /** map of element names to lists of elements. **/ private HashMap _elementsByName = new HashMap(); /** map of DOM elements to HTML elements **/ private HashMap _elements = new HashMap(); private ArrayList _formsList = new ArrayList(); private WebForm[] _forms; private WebForm _activeForm; private ArrayList _imagesList = new ArrayList(); private WebImage[] _images; private ArrayList _linkList = new ArrayList(); private WebLink[] _links; private ArrayList _blocksList = new ArrayList(); private TextBlock[] _blocks; private ArrayList _appletList = new ArrayList(); private WebApplet[] _applets; private ArrayList _tableList = new ArrayList(); private WebTable[] _tables; private ArrayList _frameList = new ArrayList(); private WebFrame[] _frames; ParsedHTML( WebResponse response, FrameSelector frame, URL baseURL, String baseTarget, Node rootNode, String characterSet ) { _response = response; _frame = frame; _baseURL = baseURL; _baseTarget = baseTarget; _rootNode = rootNode; _characterSet = characterSet; } /** * Returns the forms found in the page in the order in which they appear. **/ public WebForm[] getForms() { if (_forms == null) { loadElements(); _forms = (WebForm[]) _formsList.toArray( new WebForm[ _formsList.size() ] ); } return _forms; } /** * Returns the links found in the page in the order in which they appear. **/ public WebLink[] getLinks() { if (_links == null) { loadElements(); _links = (WebLink[]) _linkList.toArray( new WebLink[ _linkList.size() ] ); } return _links; } /** * Returns a proxy for each applet found embedded in this page. */ public WebApplet[] getApplets() { if (_applets == null) { loadElements(); _applets = (WebApplet[]) _appletList.toArray( new WebApplet[ _appletList.size() ] ); } return _applets; } /** * Returns the images found in the page in the order in which they appear. **/ public WebImage[] getImages() { if (_images == null) { loadElements(); _images = (WebImage[]) _imagesList.toArray( new WebImage[ _imagesList.size() ] ); } return _images; } /** * Returns the top-level block elements found in the page in the order in which they appear. */ public TextBlock[] getTextBlocks() { if (_blocks == null) { loadElements(); _blocks = (TextBlock[]) _blocksList.toArray( new TextBlock[ _blocksList.size() ] ); } return _blocks; } /** * Returns the first text block found in the page which matches the specified predicate and value. */ public TextBlock getFirstMatchingTextBlock( HTMLElementPredicate predicate, Object criteria ) { TextBlock[] blocks = getTextBlocks(); for (int i = 0; i < blocks.length; i++) { if (predicate.matchesCriteria( blocks[i], criteria )) return blocks[i]; } return null; } public TextBlock getNextTextBlock( TextBlock block ) { int index = _blocksList.indexOf( block ); if (index < 0 || index == _blocksList.size() - 1) return null; return (TextBlock) _blocksList.get( index+1 ); } /** * Returns the top-level tables found in the page in the order in which they appear. **/ public WebTable[] getTables() { if (_tables == null) { loadElements(); _tables = (WebTable[]) _tableList.toArray( new WebTable[ _tableList.size() ] ); } return _tables; } /** * Returns the HTMLElement with the specified ID. */ public HTMLElement getElementWithID( String id ) { return (HTMLElement) getElementWithID( id, HTMLElement.class ); } /** * Returns the HTML elements with the specified name. */ public HTMLElement[] getElementsWithName( String name ) { loadElements(); ArrayList elements = (ArrayList) _elementsByName.get( name ); return elements == null ? NO_ELEMENTS : (HTMLElement[]) elements.toArray( new HTMLElement[ elements.size() ] ); } /** * Returns the HTML elements with an attribute with the specified name and value. */ public HTMLElement[] getElementsWithAttribute( String name, String value ) { loadElements(); ArrayList elements = new ArrayList(); for (Iterator i = _elements.values().iterator(); i.hasNext();) { HTMLElement element = (HTMLElement) i.next(); if (value.equals( element.getAttribute( name ))) elements.add( element ); } return (HTMLElement[]) elements.toArray( new HTMLElement[ elements.size() ] ); } /** * Returns a list of HTML element names contained in this HTML section. */ public String[] getElementNames() { loadElements(); return (String[]) _elementsByName.keySet().toArray( new String[ _elementsByName.size() ] ); } HTMLElement[] getElementsByTagName( Node dom, String name ) { loadElements(); if (dom instanceof Element) { return getElementsFromList( ((Element) dom).getElementsByTagName( name ) ); } else { return getElementsFromList( ((Document) dom).getElementsByTagName( name ) ); } } private HTMLElement[] getElementsFromList( NodeList nl ) { HTMLElement[] elements = new HTMLElement[ nl.getLength() ]; for (int i = 0; i < elements.length; i++) { Node node = nl.item(i); elements[i] = (HTMLElement) _elements.get( node ); if (elements[i] == null) { elements[i] = toDefaultElement( (Element) node ); _elements.put( node, elements[i] ); } } return elements; } /** * Returns the form found in the page with the specified ID. **/ public WebForm getFormWithID( String id ) { return (WebForm) getElementWithID( id, WebForm.class ); } /** * Returns the link found in the page with the specified ID. **/ public WebLink getLinkWithID( String id ) { return (WebLink) getElementWithID( id, WebLink.class ); } private Object getElementWithID( String id, final Class klass ) { loadElements(); return whenCast( _elementsByID.get( id ), klass ); } private Object whenCast( Object o, Class klass ) { return klass.isInstance( o ) ? o : null; } /** * Returns the first link found in the page matching the specified criteria. **/ public WebForm getFirstMatchingForm( HTMLElementPredicate predicate, Object criteria ) { WebForm[] forms = getForms(); for (int i = 0; i < forms.length; i++) { if (predicate.matchesCriteria( forms[i], criteria )) return forms[i]; } return null; } /** * Returns all links found in the page matching the specified criteria. **/ public WebForm[] getMatchingForms( HTMLElementPredicate predicate, Object criteria ) { ArrayList matches = new ArrayList(); WebForm[] forms = getForms(); for (int i = 0; i < forms.length; i++) { if (predicate.matchesCriteria( forms[i], criteria )) matches.add( forms[i] ); } return (WebForm[]) matches.toArray( new WebForm[ matches.size() ] ); } /** * Returns the form found in the page with the specified name. **/ public WebForm getFormWithName( String name ) { return getFirstMatchingForm( WebForm.MATCH_NAME, name ); } private void interpretScriptElement( Element element ) { String script = getScript( element ); if (script != null) { try { _updateElements = false; String language = NodeUtils.getNodeAttribute( element, "language", null ); if (!getResponse().getScriptableObject().supportsScript( language )) _enableNoScriptNodes = true; getResponse().getScriptableObject().runScript( language, script ); } finally { setRootNode( _rootNode ); } } } private String getScript( Node scriptNode ) { String scriptLocation = NodeUtils.getNodeAttribute( scriptNode, "src", null ); if (scriptLocation == null) { return NodeUtils.asText( scriptNode.getChildNodes() ); } else { try { return getIncludedScript( scriptLocation ); } catch (IOException e) { throw new RuntimeException( "Error loading included script: " + e ); } } } /** * Returns the contents of an included script, given its src attribute. * @param srcAttribute * @return the contents of the script. * @throws java.io.IOException if there is a problem retrieving the script */ String getIncludedScript( String srcAttribute ) throws IOException { WebRequest req = new GetMethodWebRequest( getBaseURL(), srcAttribute ); WebWindow window = getResponse().getWindow(); if (window == null) throw new IllegalStateException( "Unable to retrieve script included by this response, since it was loaded by getResource(). Use getResponse() instead."); return window.getResource( req ).getText(); } /** * If noscript node content is enabled, returns null - otherwise returns a concealing element. */ private HTMLElement toNoscriptElement( Element element ) { return _enableNoScriptNodes ? null : new NoScriptElement( element ); } static class HtmlElementRecorder { protected void recordHtmlElement( NodeUtils.PreOrderTraversal pot, Node node, HTMLElement htmlElement ) { if (htmlElement != null) { addToMaps( pot, node, htmlElement ); addToLists( pot, htmlElement ); } } protected void addToLists( NodeUtils.PreOrderTraversal pot, HTMLElement htmlElement ) { for (Iterator i = pot.getContexts(); i.hasNext();) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -