⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparsingutils.java

📁 测试工具
💻 JAVA
字号:
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 */

package org.apache.jmeter.protocol.http.parser;

import java.io.ByteArrayInputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.LinkedList;
import java.util.List;

import org.apache.jmeter.config.Argument;
import org.apache.jmeter.config.Arguments;
import org.apache.jmeter.protocol.http.sampler.HTTPSamplerBase;
import org.apache.jmeter.protocol.http.sampler.HTTPSamplerFactory;
import org.apache.jmeter.testelement.property.PropertyIterator;
import org.apache.jmeter.util.JMeterUtils;
import org.apache.jorphan.logging.LoggingManager;
import org.apache.log.Logger;
import org.apache.oro.text.PatternCacheLRU;
import org.apache.oro.text.regex.MatchResult;
import org.apache.oro.text.regex.Pattern;
import org.apache.oro.text.regex.PatternMatcherInput;
import org.apache.oro.text.regex.Perl5Compiler;
import org.apache.oro.text.regex.Perl5Matcher;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;

// For Junit tests @see TestHtmlParsingUtils

/**
 * @author Michael Stover Created June 14, 2001
 */
public final class HtmlParsingUtils {
	private static final Logger log = LoggingManager.getLoggerForClass();

	/**
	 * Private constructor to prevent instantiation.
	 */
	private HtmlParsingUtils() {
	}

	/**
	 * Check if anchor matches by checking against:
	 * - protocol
	 * - domain
	 * - path
	 * - parameter names
	 * 
	 * @param newLink target to match
	 * @param config pattern to match against
	 * 
	 * @return true if target URL matches pattern URL
	 */
	public static boolean isAnchorMatched(HTTPSamplerBase newLink, HTTPSamplerBase config)
	{
		String query = null;
		try {
			query = URLDecoder.decode(newLink.getQueryString(), "UTF-8"); // $NON-NLS-1$
		} catch (UnsupportedEncodingException e) {
			// UTF-8 unsupported? You must be joking!
			log.error("UTF-8 encoding not supported!");
			throw new Error("Should not happen: " + e.toString());
		}

		final Arguments arguments = config.getArguments();
		if (query == null && arguments.getArgumentCount() > 0) {
			return false;// failed to convert query, so assume no match
		}

		final Perl5Matcher matcher = JMeterUtils.getMatcher();
		final PatternCacheLRU patternCache = JMeterUtils.getPatternCache();

		if (!isEqualOrMatches(newLink.getProtocol(), config.getProtocol(), matcher, patternCache)){
			return false;
		}

		final String domain = config.getDomain();
		if (domain != null && domain.length() > 0) {
			if (!isEqualOrMatches(newLink.getDomain(), domain, matcher, patternCache)){
				return false;
			}
		}

		final String path = config.getPath();
		if (!newLink.getPath().equals(path)
				&& !matcher.matches(newLink.getPath(), patternCache.getPattern("[/]*" + path, // $NON-NLS-1$
						Perl5Compiler.READ_ONLY_MASK))) {
			return false;
		}

		PropertyIterator iter = arguments.iterator();
		while (iter.hasNext()) {
			Argument item = (Argument) iter.next().getObjectValue();
			final String name = item.getName();
			if (query.indexOf(name + "=") == -1) { // $NON-NLS-1$
				if (!(matcher.contains(query, patternCache.getPattern(name, Perl5Compiler.READ_ONLY_MASK)))) {
					return false;
				}
			}
		}

		return true;
	}

	/**
	 * Arguments match if the input name matches the corresponding pattern name
	 * and the input value matches the pattern value, where the matching is done
	 * first using String equals, and then Regular Expression matching if the equals test fails.
	 * 
	 * @param arg - input Argument
	 * @param patternArg - pattern to match against
	 * @return true if both name and value match
	 */
	public static boolean isArgumentMatched(Argument arg, Argument patternArg) {
		final Perl5Matcher matcher = JMeterUtils.getMatcher();
		final PatternCacheLRU patternCache = JMeterUtils.getPatternCache();
		return 
		    isEqualOrMatches(arg.getName(), patternArg.getName(), matcher, patternCache)
		&& 
		    isEqualOrMatches(arg.getValue(), patternArg.getValue(), matcher, patternCache);
	}

	/**
	 * Match the input argument against the pattern using String.equals() or pattern matching if that fails. 
	 * 
	 * @param arg input string
	 * @param pat pattern string
	 * @param matcher Perl5Matcher
	 * @param cache PatternCache
	 * 
	 * @return true if input matches the pattern
	 */
	public static boolean isEqualOrMatches(String arg, String pat, Perl5Matcher matcher, PatternCacheLRU cache){
		return 
		    arg.equals(pat) 
		    || 
		    matcher.matches(arg,cache.getPattern(pat,Perl5Compiler.READ_ONLY_MASK));
	}

	/**
	 * Match the input argument against the pattern using String.equals() or pattern matching if that fails
	 * using case-insenssitive matching.
	 * 
	 * @param arg input string
	 * @param pat pattern string
	 * @param matcher Perl5Matcher
	 * @param cache PatternCache
	 * 
	 * @return true if input matches the pattern
	 */
	public static boolean isEqualOrMatchesCaseBlind(String arg, String pat, Perl5Matcher matcher, PatternCacheLRU cache){
		return 
		    arg.equalsIgnoreCase(pat) 
		    || 
		    matcher.matches(arg,cache.getPattern(pat,Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.CASE_INSENSITIVE_MASK));
	}

	/**
	 * Match the input argument against the pattern using String.equals() or pattern matching if that fails
	 * using case-insensitive matching.
	 * 
	 * @param arg input string
	 * @param pat pattern string
	 * 
	 * @return true if input matches the pattern
	 */
	public static boolean isEqualOrMatches(String arg, String pat){
		return isEqualOrMatches(arg, pat, JMeterUtils.getMatcher(), JMeterUtils.getPatternCache());
	}

	/**
	 * Match the input argument against the pattern using String.equals() or pattern matching if that fails
	 * using case-insensitive matching.
	 * 
	 * @param arg input string
	 * @param pat pattern string
	 * 
	 * @return true if input matches the pattern
	 */
	public static boolean isEqualOrMatchesCaseBlind(String arg, String pat){
		return isEqualOrMatchesCaseBlind(arg, pat, JMeterUtils.getMatcher(), JMeterUtils.getPatternCache());
	}

	/**
	 * Returns <code>tidy</code> as HTML parser.
	 * 
	 * @return a <code>tidy</code> HTML parser
	 */
	public static Tidy getParser() {
		log.debug("Start : getParser1");
		Tidy tidy = new Tidy();
		tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8);
		tidy.setQuiet(true);
		tidy.setShowWarnings(false);

		if (log.isDebugEnabled()) {
			log.debug("getParser1 : tidy parser created - " + tidy);
		}

		log.debug("End : getParser1");

		return tidy;
	}

	/**
	 * Returns a node representing a whole xml given an xml document.
	 * 
	 * @param text
	 *            an xml document
	 * @return a node representing a whole xml
	 */
	public static Node getDOM(String text) {
		log.debug("Start : getDOM1");

		try {
			Node node = getParser().parseDOM(new ByteArrayInputStream(text.getBytes("UTF-8")), null);// $NON-NLS-1$

			if (log.isDebugEnabled()) {
				log.debug("node : " + node);
			}

			log.debug("End : getDOM1");

			return node;
		} catch (UnsupportedEncodingException e) {
			log.error("getDOM1 : Unsupported encoding exception - " + e);
			log.debug("End : getDOM1");
			throw new RuntimeException("UTF-8 encoding failed");
		}
	}

	public static Document createEmptyDoc() {
		return Tidy.createEmptyDocument();
	}

	/**
	 * Create a new Sampler based on an HREF string plus a contextual URL
	 * object. Given that an HREF string might be of three possible forms, some
	 * processing is required.
	 */
	public static HTTPSamplerBase createUrlFromAnchor(String parsedUrlString, URL context) throws MalformedURLException {
		if (log.isDebugEnabled()) {
			log.debug("Creating URL from Anchor: " + parsedUrlString + ", base: " + context);
		}
		URL url = new URL(context, parsedUrlString);
		HTTPSamplerBase sampler =HTTPSamplerFactory.newInstance();
		sampler.setDomain(url.getHost());
		sampler.setProtocol(url.getProtocol());
		sampler.setPort(url.getPort());
		sampler.setPath(url.getPath());
		sampler.parseArguments(url.getQuery());

		return sampler;
	}

	public static List createURLFromForm(Node doc, URL context) {
		String selectName = null;
		LinkedList urlConfigs = new LinkedList();
		recurseForm(doc, urlConfigs, context, selectName, false);
		/*
		 * NamedNodeMap atts = formNode.getAttributes();
		 * if(atts.getNamedItem("action") == null) { throw new
		 * MalformedURLException(); } String action =
		 * atts.getNamedItem("action").getNodeValue(); UrlConfig url =
		 * createUrlFromAnchor(action, context); recurseForm(doc, url,
		 * selectName,true,formStart);
		 */
		return urlConfigs;
	}

    // N.B. Since the tags are extracted from an HTML Form, any values must already have been encoded
	private static boolean recurseForm(Node tempNode, LinkedList urlConfigs, URL context, String selectName,
			boolean inForm) {
		NamedNodeMap nodeAtts = tempNode.getAttributes();
		String tag = tempNode.getNodeName();
		try {
			if (inForm) {
				HTTPSamplerBase url = (HTTPSamplerBase) urlConfigs.getLast();
				if (tag.equalsIgnoreCase("form")) { // $NON-NLS-1$
					try {
						urlConfigs.add(createFormUrlConfig(tempNode, context));
					} catch (MalformedURLException e) {
						inForm = false;
					}
				} else if (tag.equalsIgnoreCase("input")) { // $NON-NLS-1$
					url.addEncodedArgument(getAttributeValue(nodeAtts, "name"),  // $NON-NLS-1$
							getAttributeValue(nodeAtts, "value")); // $NON-NLS-1$
				} else if (tag.equalsIgnoreCase("textarea")) { // $NON-NLS-1$
					try {
						url.addEncodedArgument(getAttributeValue(nodeAtts, "name"),  // $NON-NLS-1$
								tempNode.getFirstChild().getNodeValue());
					} catch (NullPointerException e) {
						url.addArgument(getAttributeValue(nodeAtts, "name"), ""); // $NON-NLS-1$
					}
				} else if (tag.equalsIgnoreCase("select")) { // $NON-NLS-1$
					selectName = getAttributeValue(nodeAtts, "name"); // $NON-NLS-1$
				} else if (tag.equalsIgnoreCase("option")) { // $NON-NLS-1$
					String value = getAttributeValue(nodeAtts, "value"); // $NON-NLS-1$
					if (value == null) {
						try {
							value = tempNode.getFirstChild().getNodeValue();
						} catch (NullPointerException e) {
							value = ""; // $NON-NLS-1$
						}
					}
					url.addEncodedArgument(selectName, value);
				}
			} else if (tag.equalsIgnoreCase("form")) { // $NON-NLS-1$
				try {
					urlConfigs.add(createFormUrlConfig(tempNode, context));
					inForm = true;
				} catch (MalformedURLException e) {
					inForm = false;
				}
			}
		} catch (Exception ex) {
			log.warn("Some bad HTML " + printNode(tempNode), ex);
		}
		NodeList childNodes = tempNode.getChildNodes();
		for (int x = 0; x < childNodes.getLength(); x++) {
			inForm = recurseForm(childNodes.item(x), urlConfigs, context, selectName, inForm);
		}
		return inForm;
	}

	private static String getAttributeValue(NamedNodeMap att, String attName) {
		try {
			return att.getNamedItem(attName).getNodeValue();
		} catch (Exception ex) {
			return ""; // $NON-NLS-1$
		}
	}

	private static String printNode(Node node) {
		StringBuffer buf = new StringBuffer();
		buf.append("<"); // $NON-NLS-1$
		buf.append(node.getNodeName());
		NamedNodeMap atts = node.getAttributes();
		for (int x = 0; x < atts.getLength(); x++) {
			buf.append(" "); // $NON-NLS-1$
			buf.append(atts.item(x).getNodeName());
			buf.append("=\""); // $NON-NLS-1$
			buf.append(atts.item(x).getNodeValue());
			buf.append("\""); // $NON-NLS-1$
		}

		buf.append(">"); // $NON-NLS-1$

		return buf.toString();
	}

	private static HTTPSamplerBase createFormUrlConfig(Node tempNode, URL context) throws MalformedURLException {
		NamedNodeMap atts = tempNode.getAttributes();
		if (atts.getNamedItem("action") == null) { // $NON-NLS-1$
			throw new MalformedURLException();
		}
		String action = atts.getNamedItem("action").getNodeValue(); // $NON-NLS-1$
		HTTPSamplerBase url = createUrlFromAnchor(action, context);
		return url;
	}
	
	public static void extractStyleURLs(final URL baseUrl, final URLCollection urls, String styleTagStr) {
		Perl5Matcher matcher = JMeterUtils.getMatcher();
		Pattern pattern = JMeterUtils.getPatternCache().getPattern(
				"URL\\(\\s*('|\")(.*)('|\")\\s*\\)", // $NON-NLS-1$
				Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.SINGLELINE_MASK | Perl5Compiler.READ_ONLY_MASK);
		PatternMatcherInput input = null;
		input = new PatternMatcherInput(styleTagStr);
		while (matcher.contains(input, pattern)) {
		    MatchResult match = matcher.getMatch();
		    // The value is in the second group
		    String styleUrl = match.group(2);
		    urls.addURL(styleUrl, baseUrl);
		}
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -