📄 parsedhtml.java
字号:
package com.meterware.httpunit;/******************************************************************************************************************** * $Id: ParsedHTML.java 584966 2007-10-15 23:10:50Z gseitz $ * * Copyright (c) 2000-2004, Russell Gold * * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated * documentation files (the "Software"), to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and * to permit persons to whom the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO * THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * *******************************************************************************************************************/import java.io.IOException;import java.net.URL;import java.util.ArrayList;import java.util.Arrays;import java.util.HashMap;import java.util.Iterator;import java.util.List;import org.w3c.dom.Document;import org.w3c.dom.Element;import org.w3c.dom.Node;import org.w3c.dom.NodeList;import com.meterware.httpunit.scripting.ScriptableDelegate;/** * @author <a href="mailto:russgold@httpunit.org">Russell Gold</a> * @author <a href="mailto:bx@bigfoot.com">Benoit Xhenseval</a> */class ParsedHTML{ final static private HTMLElement[] NO_ELEMENTS = new HTMLElement[0]; final static private String[] TEXT_ELEMENTS = { "p", "h1", "h2", "h3", "h4", "h5", "h6" }; private Node _rootNode; private URL _baseURL; private FrameSelector _frame; private String _baseTarget; private String _characterSet; private WebResponse _response; private boolean _updateElements = true; private boolean _enableNoScriptNodes; /** map of element IDs to elements. * */ private HashMap _elementsByID = new HashMap(); /** map of element names to lists of elements. * */ private HashMap _elementsByName = new HashMap(); /** map of DOM elements to HTML elements * */ private HashMap _elements = new HashMap(); private ArrayList _formsList = new ArrayList(); private WebForm[] _forms; private WebForm _activeForm; private ArrayList _imagesList = new ArrayList(); private WebImage[] _images; private ArrayList _linkList = new ArrayList(); private WebLink[] _links; private ArrayList _blocksList = new ArrayList(); private TextBlock[] _blocks; private ArrayList _appletList = new ArrayList(); private WebApplet[] _applets; private ArrayList _tableList = new ArrayList(); private WebTable[] _tables; private ArrayList _frameList = new ArrayList(); private WebFrame[] _frames; ParsedHTML(WebResponse response, FrameSelector frame, URL baseURL, String baseTarget, Node rootNode, String characterSet) { _response = response; _frame = frame; _baseURL = baseURL; _baseTarget = baseTarget; _rootNode = rootNode; _characterSet = characterSet; } /** * Returns the forms found in the page in the order in which they appear. * * @return Forms */ public WebForm[] getForms() { if (_forms == null) { loadElements(); _forms = (WebForm[])_formsList.toArray(new WebForm[_formsList.size()]); } return _forms; } /** * Returns the links found in the page in the order in which they appear. * * @return Links */ public WebLink[] getLinks() { if (_links == null) { loadElements(); _links = (WebLink[])_linkList.toArray(new WebLink[_linkList.size()]); } return _links; } /** * Returns a proxy for each applet found embedded in this page. * * @return Applets */ public WebApplet[] getApplets() { if (_applets == null) { loadElements(); _applets = (WebApplet[])_appletList.toArray(new WebApplet[_appletList.size()]); } return _applets; } /** * Returns the images found in the page in the order in which they appear. * * @return Images */ public WebImage[] getImages() { if (_images == null) { loadElements(); _images = (WebImage[])_imagesList.toArray(new WebImage[_imagesList.size()]); } return _images; } /** * Returns the top-level block elements found in the page in the order in which they appear. * * @return Text blocks */ public TextBlock[] getTextBlocks() { if (_blocks == null) { loadElements(); _blocks = (TextBlock[])_blocksList.toArray(new TextBlock[_blocksList.size()]); } return _blocks; } /** * Returns the first text block found in the page which matches the specified predicate and * value. * * @param predicate * @param criteria * @return Text block */ public TextBlock getFirstMatchingTextBlock(HTMLElementPredicate predicate, Object criteria) { TextBlock[] blocks = getTextBlocks(); for (int i = 0; i < blocks.length; i++) { if (predicate.matchesCriteria(blocks[i], criteria)) { return blocks[i]; } } return null; } /** * @param block * @return Text block */ public TextBlock getNextTextBlock(TextBlock block) { int index = _blocksList.indexOf(block); if (index < 0 || index == _blocksList.size() - 1) { return null; } return (TextBlock)_blocksList.get(index + 1); } /** * Returns the top-level tables found in the page in the order in which they appear. * * @return Tables */ public WebTable[] getTables() { if (_tables == null) { loadElements(); _tables = (WebTable[])_tableList.toArray(new WebTable[_tableList.size()]); } return _tables; } /** * Returns the HTMLElement with the specified ID. * * @param id * @return Element */ public HTMLElement getElementWithID(String id) { return (HTMLElement)getElementWithID(id, HTMLElement.class); } /** * Returns the HTML elements with the specified name. * * @param name * @return Element */ public HTMLElement[] getElementsWithName(String name) { loadElements(); ArrayList elements = (ArrayList)_elementsByName.get(name); return elements == null ? NO_ELEMENTS : (HTMLElement[])elements .toArray(new HTMLElement[elements.size()]); } /** * Returns the HTML elements with an attribute with the specified name and value. * * @param name * @param value * @return Elements */ public HTMLElement[] getElementsWithAttribute(String name, String value) { loadElements(); ArrayList elements = new ArrayList(); for (Iterator i = _elements.values().iterator(); i.hasNext();) { HTMLElement element = (HTMLElement)i.next(); if (value.equals(element.getAttribute(name))) { elements.add(element); } } return (HTMLElement[])elements.toArray(new HTMLElement[elements.size()]); } /** * Returns a list of HTML element names contained in this HTML section. * * @return Names */ public String[] getElementNames() { loadElements(); return (String[])_elementsByName.keySet().toArray(new String[_elementsByName.size()]); } HTMLElement[] getElementsByTagName(Node dom, String name) { loadElements(); if (dom instanceof Element) { return getElementsFromList(((Element)dom).getElementsByTagName(name)); } else { return getElementsFromList(((Document)dom).getElementsByTagName(name)); } } private HTMLElement[] getElementsFromList(NodeList nl) { HTMLElement[] elements = new HTMLElement[nl.getLength()]; for (int i = 0; i < elements.length; i++) { Node node = nl.item(i); elements[i] = (HTMLElement)_elements.get(node); if (elements[i] == null) { elements[i] = toDefaultElement((Element)node); _elements.put(node, elements[i]); } } return elements; } /** * Returns the form found in the page with the specified ID. * * @param id * @return Form */ public WebForm getFormWithID(String id) { return (WebForm)getElementWithID(id, WebForm.class); } /** * Returns the link found in the page with the specified ID. * * @param id * @return Link */ public WebLink getLinkWithID(String id) { return (WebLink)getElementWithID(id, WebLink.class); } private Object getElementWithID(String id, final Class klass) { loadElements(); return whenCast(_elementsByID.get(id), klass); } private Object whenCast(Object o, Class klass) { return klass.isInstance(o) ? o : null; } /** * Returns the first link found in the page matching the specified criteria. * * @param predicate * @param criteria * @return Form */ public WebForm getFirstMatchingForm(HTMLElementPredicate predicate, Object criteria) { WebForm[] forms = getForms(); for (int i = 0; i < forms.length; i++) { if (predicate.matchesCriteria(forms[i], criteria)) { return forms[i]; } } return null; } /** * Returns all links found in the page matching the specified criteria. * * @param predicate * @param criteria * @return Forms */ public WebForm[] getMatchingForms(HTMLElementPredicate predicate, Object criteria) { ArrayList matches = new ArrayList(); WebForm[] forms = getForms(); for (int i = 0; i < forms.length; i++) { if (predicate.matchesCriteria(forms[i], criteria)) { matches.add(forms[i]); } } return (WebForm[])matches.toArray(new WebForm[matches.size()]); } /** * Returns the form found in the page with the specified name. * * @param name * @return Form */ public WebForm getFormWithName(String name) { return getFirstMatchingForm(WebForm.MATCH_NAME, name); } private void interpretScriptElement(Element element) { String script = getScript(element); if (script != null) { try { _updateElements = false; String language = NodeUtils.getNodeAttribute(element, "language", null); if (!getResponse().getScriptableObject().supportsScript(language)) { _enableNoScriptNodes = true; } getResponse().getScriptableObject().runScript(language, script); } finally { setRootNode(_rootNode); } } } private String getScript(Node scriptNode) { String scriptLocation = NodeUtils.getNodeAttribute(scriptNode, "src", null); if (scriptLocation == null) { return NodeUtils.asText(scriptNode.getChildNodes()); } else { try { return getIncludedScript(scriptLocation); } catch (IOException e) { throw new RuntimeException("Error loading included script: " + e); } } } /** * Returns the contents of an included script, given its src attribute. * * @param srcAttribute * @return the contents of the script. * @throws java.io.IOException * if there is a problem retrieving the script */ String getIncludedScript(String srcAttribute) throws IOException { WebRequest req = new GetMethodWebRequest(getBaseURL(), srcAttribute); WebWindow window = getResponse().getWindow(); if (window == null) { throw new IllegalStateException( "Unable to retrieve script included by this response, since it was loaded by getResource(). Use getResponse() instead."); } return window.getResource(req).getText(); } /** * If noscript node content is enabled, returns null - otherwise returns a concealing element. */ private HTMLElement toNoscriptElement(Element element) { return _enableNoScriptNodes ? null : new NoScriptElement(element); } static class HtmlElementRecorder { protected void recordHtmlElement(NodeUtils.PreOrderTraversal pot, Node node, HTMLElement htmlElement) { if (htmlElement != null) { addToMaps(pot, node, htmlElement); addToLists(pot, htmlElement); } } protected void addToLists(NodeUtils.PreOrderTraversal pot, HTMLElement htmlElement) { for (Iterator i = pot.getContexts(); i.hasNext();) { Object o = i.next(); if (o instanceof ParsedHTML) { ((ParsedHTML)o).addToList(htmlElement); } } } protected void addToMaps(NodeUtils.PreOrderTraversal pot, Node node, HTMLElement htmlElement) { for (Iterator i = pot.getContexts(); i.hasNext();) { Object o = i.next(); if (o instanceof ParsedHTML) { ((ParsedHTML)o).addToMaps(node, htmlElement); } } } } abstract static class HTMLElementFactory extends HtmlElementRecorder { abstract HTMLElement toHTMLElement(NodeUtils.PreOrderTraversal pot, ParsedHTML parsedHTML, Element element); void recordElement(NodeUtils.PreOrderTraversal pot, Element element, ParsedHTML parsedHTML) { HTMLElement htmlElement = toHTMLElement(pot, parsedHTML, element); recordHtmlElement(pot, element, htmlElement); } protected boolean isRecognized(ClientProperties properties)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -