📄 starttagtype.java

📁 HTML解析器是一个Java库
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.0
// Copyright (C) 2007 Martin Jericho
// http://jerichohtml.sourceforge.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.

package net.htmlparser.jericho;

import java.util.*;

/**
 * Defines the syntax for a start tag type.
 * <p>
 * A start tag type is any {@link TagType} that {@linkplain #getStartDelimiter() starts} with the character '<code>&lt;</code>'
 * (as with all tag types), but whose second character is <b>not</b> '<code>/</code>'.
 * <p>
 * This includes types for many tags which stand alone, without a {@linkplain #getCorrespondingEndTagType() corresponding end tag},
 * and would not intuitively be categorised as a "start tag".  For example, an HTML {@linkplain #COMMENT comment} in a document
 * is represented as a single start tag that spans the whole comment, and does not have an end tag at all.
 * <p>
 * The singleton instances of all the <a href="TagType.html#Standard">standard</a> start tag types are available in this class as static
 * <a href="#field_summary">fields</a>.
 * <p>
 * Because all <code>StartTagType</code> instaces must be singletons, the '<code>==</code>' operator can be used to test for a particular tag type
 * instead of the <code>equals(Object)</code> method.
 *
 * @see EndTagType
 */
public abstract class StartTagType extends TagType {
	private final EndTagType correspondingEndTagType;
	private final boolean hasAttributes;
	private final boolean isNameAfterPrefixRequired;

	static final String START_DELIMITER_PREFIX="<";

	/**
	 * The tag type given to an {@linkplain Tag#isUnregistered() unregistered} {@linkplain StartTag start tag}
	 * (<code>&lt;<var> &#46;&#46;&#46; </var>&gt;</code>).
	 * <p>
	 * See the documentation of the {@link Tag#isUnregistered()} method for details.
	 * <p>
	 * <dl>
	 *  <dt>Properties:</dt>
	 *   <dd>
	 *    <table class="bordered" style="margin: 15px" cellspacing="0">
	 *     <tr><th>Property<th>Value
	 *     <tr><td>{@link #getDescription() Description}<td>unregistered
	 *     <tr><td>{@link #getStartDelimiter() StartDelimiter}<td><code>&lt;</code>
	 *     <tr><td>{@link #getClosingDelimiter() ClosingDelimiter}<td><code>&gt;</code>
	 *     <tr><td>{@link #isServerTag() IsServerTag}<td><code>false</code>
	 *     <tr><td>{@link #getNamePrefix() NamePrefix}<td><i>(empty string)</i>
	 *     <tr><td>{@link #getCorrespondingEndTagType() CorrespondingEndTagType}<td><code>null</code>
	 *     <tr><td>{@link #hasAttributes() HasAttributes}<td><code>false</code>
	 *     <tr><td>{@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}<td><code>false</code>
	 *    </table>
	 *  <dt>Example:</dt>
	 *   <dd><code>&lt;"This is not recognised as any of the predefined tag types in this library"&gt;</code></dd>
	 * </dl>
	 * @see EndTagType#UNREGISTERED
	 */
	public static final StartTagType UNREGISTERED=StartTagTypeUnregistered.INSTANCE;

	/**
	 * The tag type given to a normal HTML or XML {@linkplain StartTag start tag}
	 * (<code>&lt;<var>name</var><var> &#46;&#46;&#46; </var>&gt;</code>).
	 * <p>
	 * <dl>
	 *  <dt>Properties:</dt>
	 *   <dd>
	 *    <table class="bordered" style="margin: 15px" cellspacing="0">
	 *     <tr><th>Property<th>Value
	 *     <tr><td>{@link #getDescription() Description}<td>normal
	 *     <tr><td>{@link #getStartDelimiter() StartDelimiter}<td><code>&lt;</code>
	 *     <tr><td>{@link #getClosingDelimiter() ClosingDelimiter}<td><code>&gt;</code>
	 *     <tr><td>{@link #isServerTag() IsServerTag}<td><code>false</code>
	 *     <tr><td>{@link #getNamePrefix() NamePrefix}<td><i>(empty string)</i>
	 *     <tr><td>{@link #getCorrespondingEndTagType() CorrespondingEndTagType}<td>{@link EndTagType#NORMAL}
	 *     <tr><td>{@link #hasAttributes() HasAttributes}<td><code>true</code>
	 *     <tr><td>{@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}<td><code>true</code>
	 *    </table>
	 *  <dt>Example:</dt>
	 *   <dd><code>&lt;div class="NormalDivTag"&gt;</code></dd>
	 * </dl>
	 */
	public static final StartTagType NORMAL=StartTagTypeNormal.INSTANCE;

	/**
	 * The tag type given to an HTML <a target="_blank" href="http://www.w3.org/TR/html401/intro/sgmltut.html#h-3.2.4">comment</a>
	 * (<code>&lt;&#33;--<var> &#46;&#46;&#46; </var>--&gt;</code>).
	 * <p>
	 * An HTML comment is an area of the source document enclosed by the delimiters
	 * <code>&lt;!--</code> on the left and <code>--&gt;</code> on the right.
	 * <p>
	 * The <a target="_blank" href="http://www.w3.org/TR/html401/intro/sgmltut.html#h-3.2.4">HTML 4.01 specification section 3.2.4</a>
	 * states that the end of comment delimiter may contain white space between the "<code>--</code>" and "<code>&gt;</code>" characters,
	 * but this library does not recognise end of comment delimiters containing white space.
	 * <p>
	 * In the default configuration, any non-{@linkplain #isServerTag() server} tag appearing within an HTML comment is ignored
	 * by the parser.
	 * See the documentation of the <a href="Tag.html#ParsingProcess">tag parsing process</a> for more information.
	 * <p>
	 * <dl>
	 *  <dt>Properties:</dt>
	 *   <dd>
	 *    <table class="bordered" style="margin: 15px" cellspacing="0">
	 *     <tr><th>Property<th>Value
	 *     <tr><td>{@link #getDescription() Description}<td>comment
	 *     <tr><td>{@link #getStartDelimiter() StartDelimiter}<td><code>&lt;!--</code>
	 *     <tr><td>{@link #getClosingDelimiter() ClosingDelimiter}<td><code>--&gt;</code>
	 *     <tr><td>{@link #isServerTag() IsServerTag}<td><code>false</code>
	 *     <tr><td>{@link #getNamePrefix() NamePrefix}<td><code>!--</code>
	 *     <tr><td>{@link #getCorrespondingEndTagType() CorrespondingEndTagType}<td><code>null</code>
	 *     <tr><td>{@link #hasAttributes() HasAttributes}<td><code>false</code>
	 *     <tr><td>{@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}<td><code>false</code>
	 *    </table>
	 *  <dt>Example:</dt>
	 *   <dd><code>&lt;!-- This is a comment --&gt;</code></dd>
	 * </dl>
	 */
	public static final StartTagType COMMENT=StartTagTypeComment.INSTANCE;

	/**
	 * The tag type given to an <a target="_blank" href="http://www.w3.org/TR/REC-xml/#sec-prolog-dtd">XML declaration</a>
	 * (<code>&lt;&#63;xml<var> &#46;&#46;&#46; </var>&#63;&gt;</code>).
	 * <p>
	 * An XML declaration is often referred to in texts as a special type of processing instruction with the reserved
	 * <a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-PITarget">PITarget</a> name of "<code>xml</code>".
	 * Technically it is not an {@linkplain #XML_PROCESSING_INSTRUCTION XML processing instruction} at all, but is still a type of
	 * <a target="_blank" href="http://www.w3.org/TR/html401/appendix/notes.html#h-B.3.6">SGML processing instruction</a>.
	 * <p>
	 * According to section <a target="_blank" href="http://www.w3.org/TR/REC-xml/#sec-prolog-dtd">2.8</a> of the XML 1.0 specification,
	 * a valid XML declaration can specify only "version", "encoding" and "standalone" attributes in that order.
	 * This library parses the {@linkplain Attributes attributes} of an XML declaration in the same way as those of a
	 * {@linkplain #NORMAL normal} tag, without checking that they conform to the specification. 
	 * <p>
	 * <dl>
	 *  <dt>Properties:</dt>
	 *   <dd>
	 *    <table class="bordered" style="margin: 15px" cellspacing="0">
	 *     <tr><th>Property<th>Value
	 *     <tr><td>{@link #getDescription() Description}<td>XML declaration
	 *     <tr><td>{@link #getStartDelimiter() StartDelimiter}<td><code>&lt;?xml</code>
	 *     <tr><td>{@link #getClosingDelimiter() ClosingDelimiter}<td><code>?&gt;</code>
	 *     <tr><td>{@link #isServerTag() IsServerTag}<td><code>false</code>
	 *     <tr><td>{@link #getNamePrefix() NamePrefix}<td><code>?xml</code>
	 *     <tr><td>{@link #getCorrespondingEndTagType() CorrespondingEndTagType}<td><code>null</code>
	 *     <tr><td>{@link #hasAttributes() HasAttributes}<td><code>true</code>
	 *     <tr><td>{@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}<td><code>false</code>
	 *    </table>
	 *  <dt>Example:</dt>
	 *   <dd><code>&lt;?xml version="1.0" encoding="UTF-8"?&gt;</code></dd>
	 * </dl>
	 */
	public static final StartTagType XML_DECLARATION=StartTagTypeXMLDeclaration.INSTANCE;

	/**
	 * The tag type given to an <a target="_blank" href="http://www.w3.org/TR/REC-xml#sec-pi">XML processing instruction</a>
	 * (<code>&lt;&#63;<var>PITarget</var><var> &#46;&#46;&#46; </var>&#63;&gt;</code>).
	 * <p>
	 * An XML processing instruction is a specific form of
	 * <a target="_blank" href="http://www.w3.org/TR/html401/appendix/notes.html#h-B.3.6">SGML processing instruction</a> with the following
	 * two additional constraints:
	 * <ul>
	 *  <li>it must be {@linkplain #getClosingDelimiter() closed} with '<code>?&gt;</code>' instead of just a single
	 *  '<code>&gt;</code>' character.
	 *  <li>it requires a <a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-PITarget">PITarget</a>
	 *   (essentially a {@linkplain Tag#getName() name} following the '<code>&lt;?</code>' {@linkplain #getStartDelimiter() start delimiter}).
	 * </ul>
	 * <p>
	 * This library does not include a <a href="TagType.html#Predefined">predefined</a> generic tag type for SGML processing instructions
	 * as the only forms in which they are found in HTML documents are the more specific XML processing instruction and
	 * the {@linkplain #XML_DECLARATION XML declaration}, both of which have their own dedicated predefined tag type.
	 * <p>
	 * There is no restriction on the contents of an XML processing instruction.  In particular, it can not be assumed that the
	 * processing instruction contains {@linkplain Attributes attributes}, in contrast to the {@linkplain #XML_DECLARATION XML declaration}.
	 * <p>
	 * Note that {@linkplain #register() registering} the {@link PHPTagTypes#PHP_SHORT} tag type overrides this tag type.
	 * This is because they both have the same {@linkplain #getStartDelimiter start delimiter},
	 * so the one registered latest takes <a href="TagType.html#Precedence">precedence</a> over the other.
	 * See the documentation of the {@link PHPTagTypes} class for more information.
	 * <p>
	 * <dl>
	 *  <dt>Properties:</dt>
	 *   <dd>
	 *    <table class="bordered" style="margin: 15px" cellspacing="0">
	 *     <tr><th>Property<th>Value
	 *     <tr><td>{@link #getDescription() Description}<td>XML processing instruction
	 *     <tr><td>{@link #getStartDelimiter() StartDelimiter}<td><code>&lt;?</code>
	 *     <tr><td>{@link #getClosingDelimiter() ClosingDelimiter}<td><code>?&gt;</code>
	 *     <tr><td>{@link #isServerTag() IsServerTag}<td><code>false</code>
	 *     <tr><td>{@link #getNamePrefix() NamePrefix}<td><code>?</code>
	 *     <tr><td>{@link #getCorrespondingEndTagType() CorrespondingEndTagType}<td><code>null</code>
	 *     <tr><td>{@link #hasAttributes() HasAttributes}<td><code>false</code>
	 *     <tr><td>{@link #isNameAfterPrefixRequired() IsNameAfterPrefixRequired}<td><code>true</code>
	 *    </table>
	 *  <dt>Example:</dt>
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -