📄 config.java

📁 HTML解析器是一个Java库
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.0
// Copyright (C) 2007 Martin Jericho
// http://jerichohtml.sourceforge.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.

package net.htmlparser.jericho;

import java.util.*;

/**
 * Encapsulates global configuration properties which determine the behaviour of various functions.
 * <p>
 * All of the properties in this class are static, affecting all objects and threads.
 * Multiple concurrent configurations are not possible.
 * <p>
 * Properties that relate to <a target="_blank" href="http://www.w3.org/TR/html401/conform.html#didx-user_agent">user agent</a>
 * compatibility issues are stored in instances of the {@link Config.CompatibilityMode} class.
 * This allows all of the properties in the compatibility mode to be set as a block by setting the static
 * {@link #CurrentCompatibilityMode} property to a different instance.
 *
 * @see Config.CompatibilityMode
 */ 
public final class Config {
	private Config() {}

	/**
	 * Determines the string used to separate a single column's multiple values in the output of the {@link FormFields#getColumnValues(Map)} method.
	 * <p>
	 * The situation where a single column has multiple values only arises if {@link FormField#getUserValueCount()}<code>&gt;1</code>
	 * on the relevant form field, which usually indicates a poorly designed form.
	 * <p>
	 * The default value is "<code>,</code>" (a comma, not including the quotes).
	 * <p>
	 * Must not be <code>null</code>.
	 */
	public static String ColumnMultipleValueSeparator=",";

	/**
	 * Determines the string that represents the value <code>true</code> in the output of the {@link FormFields#getColumnValues(Map)} method.
	 * <p>
	 * The default value is "<code>true</code>" (without the quotes).
	 * <p>
	 * Must not be <code>null</code>.
	 */
	public static String ColumnValueTrue=Boolean.toString(true);

	/**
	 * Determines the string that represents the value <code>false</code> in the output of the {@link FormFields#getColumnValues(Map)} method.
	 * <p>
	 * The default value is <code>null</code>, which represents no output at all.
	 */
	public static String ColumnValueFalse=null;

	/**
	 * Determines whether the {@link CharacterReference#decode(CharSequence)} and similar methods convert non-breaking space ({@link CharacterEntityReference#_nbsp &amp;nbsp;}) character entity references to normal spaces.
	 * <p>
	 * The default value is <code>true</code>.
	 * <p>
	 * When this property is set to <code>false</code>, non-breaking space ({@link CharacterEntityReference#_nbsp &amp;nbsp;})
	 * character entity references are decoded as non-breaking space characters (U+00A0) instead of being converted to normal spaces (U+0020).
	 * <p>
	 * Note that the introduction of this property in Version 2.6 has changed the default behaviour of the methods listed below,
	 * which prior to Version 2.6 always decoded {@link CharacterEntityReference#_nbsp &amp;nbsp;} as non-breaking space characters (U+00A0).
	 * <p>
	 * The new default behaviour of the library reflects the fact that non-breaking space character entity references are almost always used in HTML documents
	 * as a <a target="_blank" href="http://en.wikipedia.org/wiki/Non-breaking_space#Use_as_non-collapsing_whitespace">non-collapsing white space</a> character.
	 * Converting them to the correct character code U+00A0, which is represented by a visible character in many older character sets, was confusing to most users
	 * who expected to see only normal spaces.
	 * The most common example of this is its visualisation as the character <b>&aacute;</b> in the MS-DOS <a target="_blank" href="http://en.wikipedia.org/wiki/Code_page_437">CP437</a> character set.
	 * <p>
	 * The functionality of the following methods is affected:
	 * <ul>
	 *  <li>{@link CharacterReference#decode(CharSequence)}
	 *  <li>{@link CharacterReference#decode(CharSequence, boolean insideAttributeValue)}
	 *  <li>{@link CharacterReference#decodeCollapseWhiteSpace(CharSequence)}
	 *  <li>{@link CharacterReference#reencode(CharSequence)}
 	 *  <li>{@link Attribute#getValue()}
 	 *  <li>{@link Attributes#getValue(String name)}
	 *  <li>{@link Attributes#populateMap(Map, boolean convertNamesToLowerCase)}
	 *  <li>{@link StartTag#getAttributeValue(String attributeName)}
	 *  <li>{@link Element#getAttributeValue(String attributeName)}
	 *  <li>{@link FormControl#getPredefinedValues()}
	 *  <li>{@link OutputDocument#replace(Attributes, boolean convertNamesToLowerCase)}
	 *  <li>{@link Renderer#getConvertNonBreakingSpaces()}
	 *  <li>{@link TextExtractor#getConvertNonBreakingSpaces()}
	 * </ul>
	 */
	public static boolean ConvertNonBreakingSpaces=true;


	/**
	 * Determines the currently active {@linkplain Config.CompatibilityMode compatibility mode}.
	 * <p>
	 * The default setting is {@link Config.CompatibilityMode#IE} (MS Internet Explorer 6.0).
	 * <p>
	 * Must not be <code>null</code>.
	 */
	public static CompatibilityMode CurrentCompatibilityMode=CompatibilityMode.IE;

	/**
	 * Determines whether apostrophes are encoded when calling the {@link CharacterReference#encode(CharSequence)} method.
	 * <p>
	 * A value of <code>false</code> means {@linkplain CharacterEntityReference#_apos apostrophe}
	 * (U+0027) characters are not encoded.
	 * The only time apostrophes need to be encoded is within an attribute value delimited by
	 * single quotes (apostrophes), so in most cases ignoring apostrophes is perfectly safe and
	 * enhances the readability of the source document.
	 * <p>
	 * Note that apostrophes are always encoded as a {@linkplain NumericCharacterReference numeric character reference}, never as the
	 * character entity reference {@link CharacterEntityReference#_apos &amp;apos;}.
	 * <p>
	 * The default value is <code>false</code>.
	 */
	public static boolean IsApostropheEncoded=false;

	/**
	 * Determines the {@link LoggerProvider} that is used to create the default {@link Logger} object for each new {@link Source} object.
	 * <p>
	 * The {@link LoggerProvider} interface contains several predefined <code>LoggerProvider</code> instances which this property can be set to,
	 * mostly representing wrappers to common logging frameworks.
	 * <p>
	 * The default value is <code>null</code>, which results in the auto-detection of the most appropriate logging mechanism according to the following algorithm:
	 * <p>
	 * <ol>
	 *  <li>If the class <code>org.slf4j.impl.StaticLoggerBinder</code> is detected:
	 *   <ul>
	 *    <li>If the class <code>org.slf4j.impl.JDK14LoggerFactory</code> is detected, use {@link LoggerProvider#JAVA}.
	 *    <li>If the class <code>org.slf4j.impl.Log4jLoggerFactory</code> is detected, use {@link LoggerProvider#LOG4J}.
	 *    <li>If the class <code>org.slf4j.impl.JCLLoggerFactory</code> is NOT detected, use {@link LoggerProvider#SLF4J}.
	 *   </ul>
	 *  <li>If the class <code>org.apache.commons.logging.Log</code> is detected:
	 *   <blockquote>
	 *    Create an instance of it using the commons-logging <code>LogFactory</code> class.
	 *     <ul>
	 *      <li>If the created <code>Log</code> is of type <code>org.apache.commons.logging.impl.Jdk14Logger</code>, use {@link LoggerProvider#JAVA}.
	 *      <li>If the created <code>Log</code> is of type <code>org.apache.commons.logging.impl.Log4JLogger</code>, use {@link LoggerProvider#LOG4J}.
	 *      <li>otherwise, use {@link LoggerProvider#JCL}.
	 *     </ul>
	 *   </blockquote>
	 *  <li>If the class <code>org.apache.log4j.Logger</code> is detected, use {@link LoggerProvider#LOG4J}.
	 *  <li>otherwise, use {@link LoggerProvider#JAVA}.
	 * </ol>
	 *
	 * @see Source#setLogger(Logger)
	 */
	public static LoggerProvider LoggerProvider=null;

	/**
	 * Determines the string used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in text output throughout the library.
	 * <p>
	 * The default value is the standard new line character sequence of the host platform, determined by <code>System.getProperty("line.separator")</code>.
	 */
	public static String NewLine=System.getProperty("line.separator");

	/**
	 * Used in Element.getChildElements.
	 * Server elements containing markup should be included in the hierarchy, so consider making this option public in future.
	 */ 
	static final boolean IncludeServerTagsInElementHierarchy=false;

	/**
	 * Represents a set of maximum unicode code points to be recognised for the three types of
	 * <a href="CharacterReference.html#Unterminated">unterminated</a> character reference in a given context.
	 * <p>
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -