📄 tagtype.java
字号:
* if it occurs inside a {@linkplain StartTagType#COMMENT comment}. The end of the {@linkplain StartTagType#COMMENT comment} however also
* ends the implicit treatment of the {@link HTMLElementName#SCRIPT SCRIPT} element content as CDATA.
* <p>
* Although {@link HTMLElementName#STYLE STYLE} elements should theoretically be treated in the same way as {@link HTMLElementName#SCRIPT SCRIPT} elements,
* the syntax of <a target="_blank" href="http://www.w3.org/Style/CSS/">Cascading Style Sheets</a> (CSS) does not contain any constructs that
* could be misinterpreted as HTML tags, so there is virtually no need to perform any special checks in this case.
* <p>
* IMPLEMENTATION NOTE: The rationale behind using an integer array to hold this value, rather than a scalar <code>int</code> value,
* is to emulate passing the parameter by reference.
* This value needs to be shared amongst several internal methods during the {@linkplain Source#fullSequentialParse() full sequential parse} process,
* and any one of those methods needs to be able to modify the value and pass it back to the calling method.
* This would normally be implemented by passing the parameter by reference, but because Java does not support this language construct, a container for a
* mutable integer must be passed instead.
* Because the standard Java library does not provide a class for holding a single mutable integer (the <code>java.lang.Integer</code> class is immutable),
* the easiest container to use, without creating a class especially for this purpose, is an integer array.
* The use of an array does not imply any intention to use more than a single array entry in subsequent versions.
*
* @param source the {@link Source} document.
* @param pos the character position in the source document to check.
* @param fullSequentialParseData an integer array containing data allowing this method to implement a better algorithm when a {@linkplain Source#fullSequentialParse() full sequential parse} is being performed, or <code>null</code> in <a href="Source.html#ParseOnDemand">parse on demand</a> mode.
* @return <code>true</code> if a tag of this type is valid in the specified position of the specified source document, otherwise <code>false</code>.
*/
protected boolean isValidPosition(final Source source, final int pos, final int[] fullSequentialParseData) {
if (isServerTag()) return true;
if (fullSequentialParseData!=null) {
// use simplified check when doing full sequential parse. Normally we are only able to check whether a tag is inside specially cached
// tag types for efficiency reasons, but during a full sequential parse we can reject a tag if it is inside any other tag.
if (fullSequentialParseData[0]==Integer.MAX_VALUE) { // we are in a SCRIPT element
if (this==EndTagType.NORMAL && source.getParseText().containsAt("</script",pos)) {
// The character sequence "</script" terminates the implicit CDATA section inside the SCRIPT element
fullSequentialParseData[0]=pos;
return true;
}
if (this==StartTagType.COMMENT) {
// Although not technically correct, all major browsers also recognise comments inside SCRIPT elements.
// The end of the comment will however terminate the implicit CDATA section inside the SCRIPT element.
fullSequentialParseData[0]=pos;
return true;
}
return false; // reject any other tags inside SCRIPT element
}
return pos>=fullSequentialParseData[0]; // accept the non-server tag only if it is after the end of the last found non-server tag
}
// Use the normal method of checking whether the position is inside a tag of a tag type that ignores enclosed markup:
final TagType[] tagTypesIgnoringEnclosedMarkup=getTagTypesIgnoringEnclosedMarkup();
for (int i=0; i<tagTypesIgnoringEnclosedMarkup.length; i++) {
final TagType tagTypeIgnoringEnclosedMarkup=tagTypesIgnoringEnclosedMarkup[i];
// If this tag type is a comment, don't bother checking whether it is inside another comment.
// See javadocs for getTagTypesIgnoringEnclosedMarkup() for more explanation.
// Allowing it might result in multiple comments being recognised with the same end delimiter, but the risk of this occuring in a syntactically invalid document
// is outweighed by the benefit of not recursively checking all previous comments in a document, risking stack overflow.
if (this==StartTagType.COMMENT && tagTypeIgnoringEnclosedMarkup==StartTagType.COMMENT) continue;
if (tagTypeIgnoringEnclosedMarkup.tagEncloses(source,pos)) return false;
}
return true;
}
/**
* Returns an array of all the tag types inside which the parser ignores all non-{@linkplain #isServerTag() server} tags
* in <a href="Source.html#ParseOnDemand">parse on demand</a> mode.
* <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
* <p>
* The tag types returned by this property (referred to in the following paragraphs as the "listed types") default to
* {@link StartTagType#COMMENT} and {@link StartTagType#CDATA_SECTION}.
* <p>
* This property is used by the default implementation of the {@link #isValidPosition(Source, int pos, int[] fullSequentialParseData) isValidPosition} method
* in <a href="Source.html#ParseOnDemand">parse on demand</a> mode.
* It is not used at all during a {@linkplain Source#fullSequentialParse() full sequential parse}.
* <p>
* In the default implementation of the {@link #isValidPosition(Source, int pos, int[] fullSequentialParseData) isValidPosition} method,
* in <a href="Source.html#ParseOnDemand">parse on demand</a> mode,
* every new non-server tag found by the parser (referred to as a "new tag") undergoes a check to see whether it is enclosed
* by a tag of one of the listed types.
* This includes new tags of the listed types themselves if they are non-server tags.
* The recursive nature of this check means that <i>all</i> tags of the listed types occurring before the new tag must be found
* by the parser before it can determine whether the new tag should be ignored.
* To mitigate any performance issues arising from this process, the listed types are given special treatment in the tag cache.
* This dramatically decreases the time taken to search on these tag types, so adding a tag type to this array that
* is easily recognised and occurs infrequently only results in a small degradation in overall performance.
* <p>
* A special exception to the algorithm described above applies to {@link StartTagType#COMMENT COMMENT} tags.
* The default implementation of the {@link #isValidPosition(Source,int,int[]) isValidPosition} method
* does not check whether a {@link StartTagType#COMMENT COMMENT} tag is inside another {@link StartTagType#COMMENT COMMENT} tag,
* as this should never happen in a syntactically correct document (the characters '<code>--</code>' should not occur inside a comment).
* Skipping this check also avoids the need to recursively check every {@link StartTagType#COMMENT COMMENT} tag back to the start of the document,
* which has the potential to cause a stack overflow in a large document containing lots of comments.
* <p>
* Theoretically, non-server tags appearing inside any other tag should be ignored, which is how the parser behaves during a
* {@linkplain Source#fullSequentialParse() full sequential parse}.
* <p>
* Server tags in particular very often contain other "tags" that should not be recognised as tags by the parser.
* If this behaviour is required in <a href="Source.html#ParseOnDemand">parse on demand</a>, the tag type of each server tag that might be found
* in the source documents can be added to this property using the static {@link #setTagTypesIgnoringEnclosedMarkup(TagType[])} method.
* For example, the following command would prevent non-server tags from being recognised inside {@linkplain PHPTagTypes#PHP_STANDARD standard PHP} tags,
* as well as the default {@linkplain StartTagType#COMMENT comment} and {@linkplain StartTagType#CDATA_SECTION CDATA section} tags:
* <p>
* <blockquote><code>TagType.setTagTypesIgnoringEnclosedMarkup(new TagType[] {PHPTagTypes.PHP_STANDARD, StartTagType.COMMENT, StartTagType.CDATA_SECTION});</code></blockquote>
* <p>
* The only situation where a non-server tag can legitimately contain a sequence of characters that resembles a tag is within an attribute value.
* The <a target="_blank" href="http://www.w3.org/TR/html401/charset.html#h-5.3.2">HTML 4.01 specification section 5.3.2</a>
* specifically allows the presence of '<code><</code>' and '<code>></code>' characters within attribute values.
* A common occurrence of this is in <a target="_blank" href="http://www.w3.org/TR/html401/interact/scripts.html#events">event</a> attributes containing scripts,
* such as the <code><a target="_blank" href="http://www.w3.org/TR/html401/interact/scripts.html#adef-onclick">onclick</a></code> attribute.
* There is no way of preventing such "tags" from being recognised in <a href="Source.html#ParseOnDemand">parse on demand</a> mode, as adding
* {@link StartTagType#NORMAL} to this property as a listed type would be far too inefficient.
* Performing a {@linkplain Source#fullSequentialParse() full sequential parse} of the source document prevents these attribute values from being
* recognised as tags, but can be very expensive if only a few tags in the document need to be parsed.
* The penalty of not parsing every tag in the document is that the exactness of this check is compromised, but in practical terms the difference is inconsequential.
* The default listed types of {@linkplain StartTagType#COMMENT comments} and {@linkplain StartTagType#CDATA_SECTION CDATA sections} yields sensible results
* in the vast majority of practical applications with only a minor impact on performance.
* <p>
* In <a target="_blank" href="http://www.w3.org/TR/xhtml1/">XHTML</a>, '<code><</code>' and '<code>></code>' characters
* must be represented in attribute values as {@linkplain CharacterReference character references}
* (see the XML 1.0 specification section <a target="_blank" href="http://www.w3.org/TR/REC-xml#CleanAttrVals">3.1</a>),
* so the situation should never arise that a tag is found inside another tag unless one of them is a
* {@linkplain #isServerTag() server tag}.
*
* @return an array of all the tag types inside which the parser ignores all non-{@linkplain #isServerTag() server} tags.
*/
public static final TagType[] getTagTypesIgnoringEnclosedMarkup() {
return TagTypesIgnoringEnclosedMarkup.array;
}
/**
* Sets the tag types inside which the parser ignores all non-{@linkplain #isServerTag() server} tags.
* <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
* <p>
* See {@link #getTagTypesIgnoringEnclosedMarkup()} for the documentation of this property.
*
* @param tagTypes an array of tag types.
*/
public static final void setTagTypesIgnoringEnclosedMarkup(TagType[] tagTypes) {
if (tagTypes==null) throw new IllegalArgumentException();
TagTypesIgnoringEnclosedMarkup.array=tagTypes;
}
/**
* Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
* <br />(<a href="TagType.html#AbstractImplementation">abstract implementation</a> method)
* <p>
* The implementation of this method must check that the text at the specified position meets all of
* the criteria of this tag type, including such checks as the presence of the correct or well formed
* {@linkplain #getClosingDelimiter() closing delimiter}, {@linkplain Tag#getName() name}, {@linkplain Attributes attributes},
* {@linkplain EndTag end tag}, or any other distinguishing features.
* <p>
* It can be assumed that the specified position starts with the {@linkplain #getStartDelimiter() start delimiter} of this tag type,
* and that all other tag types with higher <a href="TagType.html#Precedence">precedence</a> (if any) have already been rejected as candidates.
* Tag types with lower precedence will be considered if this method returns <code>null</code>.
* <p>
* This method is only called after a successful check of the tag's position, i.e.
* {@link #isValidPosition(Source,int,int[]) isValidPosition(source,pos,fullSequentialParseData)}<code>==true</code>.
* <p>
* The {@link StartTagTypeGenericImplementation} and {@link EndTagTypeGenericImplementation} subclasses provide default
* implementations of this method that allow the use of much simpler <a href="TagType.html#Property">properties</a> and
* <a href="TagType.html#ImplementationAssistance">implementation assistance</a> methods and to carry out the required functions.
*
* @param source the {@link Source} document.
* @param pos the position in the source document.
* @return a tag of this type at the specified position in the specified source document if it meets all of the required features, or <code>null</code> if it does not meet the criteria.
*/
protected abstract Tag constructTagAt(Source source, int pos);
/**
* Indicates whether a tag of this type encloses the specified position of the specified source document.
* <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
* <p>
* This is logically equivalent to <code>source.</code>{@link Source#getEnclosingTag(int,TagType) getEnclosingTag(pos,this)}<code>!=null</code>,
* but is safe to use within other implementation methods without the risk of causing an infinite recursion.
* <p>
* This method is called from the default implementation of the {@link #isValidPosition(Source, int pos, int[] fullSequentialParseData)} method.
*
* @param source the {@link Source} document.
* @param pos the character position in the source document to check.
* @return <code>true</code> if a tag of this type encloses the specified position of the specified source document, otherwise <code>false</code>.
*/
protected final boolean tagEncloses(final Source source, final int pos) {
if (pos==0) return false;
final Tag enclosingTag=source.getEnclosingTag(pos-1,this); // use pos-1 otherwise a tag at pos could cause infinite recursion when this is called from constructTagAt
return enclosingTag!=null && pos!=enclosingTag.getEnd(); // make sure pos!=enclosingTag.getEnd() to compensate for using pos-1 above (important if the tag in question immediately follows an end tag delimiter)
}
/**
* Returns a string representation of this object useful for debugging purposes.
* @return a string representation of this object useful for debugging purposes.
*/
public String toString() {
return getDescription();
}
static final Tag getTagAt(final Source source, final int pos, final boolean serverTagOnly, final boolean assumeNoNestedTags) {
final TagTypeRegister.ProspectiveTagTypeIterator prospectiveTagTypeIterator=new TagTypeRegister.ProspectiveTagTypeIterator(source,pos);
// prospectiveTagTypeIterator is empty if pos is out of range.
while (prospectiveTagTypeIterator.hasNext()) {
final TagType tagType=prospectiveTagTypeIterator.next();
if (serverTagOnly && !tagType.isServerTag()) continue;
if (!assumeNoNestedTags && !tagType.isValidPosition(source,pos,source.fullSequentialParseData)) continue;
try {
final Tag tag=tagType.constructTagAt(source,pos);
if (tag!=null) return tag;
} catch (IndexOutOfBoundsException ex) {
if (source.logger.isInfoEnabled()) source.logger.info(source.getRowColumnVector(pos).appendTo(new StringBuilder(200).append("Tag at ")).append(" not recognised as type '").append(tagType.getDescription()).append("' because it has no end delimiter").toString());
}
}
return null;
}
final char[] getStartDelimiterCharArray() {
return startDelimiterCharArray;
}
private static final class TagTypesIgnoringEnclosedMarkup {
// This internal class is used to contain the array because its static initialisation can occur after
// the StartTagType.COMMENT and StartTagType.CDATA_SECTION members have been created.
public static TagType[] array=new TagType[] {
StartTagType.COMMENT,
StartTagType.CDATA_SECTION
};
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -