📄 tag.java
字号:
/**
* Returns an XML representation of this tag.
* <p>
* This is an abstract method which is implemented in the {@link StartTag} and {@link EndTag} subclasses.
* See the documentation of the {@link StartTag#tidy()} and {@link EndTag#tidy()} methods for details.
*
* @return an XML representation of this tag.
*/
public abstract String tidy();
/**
* Indicates whether the specified text is a valid <a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-Name">XML Name</a>.
* <p>
* This implementation first checks that the first character of the specified text is a valid XML Name start character
* as defined by the {@link #isXMLNameStartChar(char)} method, and then checks that the rest of the characters are valid
* XML Name characters as defined by the {@link #isXMLNameChar(char)} method.
* <p>
* Note that this implementation does not exactly adhere to the
* <a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-Name">formal definition of an XML Name</a>,
* but the differences are unlikely to be significant in real-world XML or HTML documents.
*
* @param text the text to test.
* @return <code>true</code> if the specified text is a valid <a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-Name">XML Name</a>, otherwise <code>false</code>.
* @see Source#getNameEnd(int pos)
*/
public static final boolean isXMLName(final CharSequence text) {
if (text==null || text.length()==0 || !isXMLNameStartChar(text.charAt(0))) return false;
for (int i=1; i<text.length(); i++)
if (!isXMLNameChar(text.charAt(i))) return false;
return true;
}
/**
* Indicates whether the specified character is valid at the start of an
* <a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-Name">XML Name</a>.
* <p>
* The <a target="_blank" href="http://www.w3.org/TR/REC-xml/#sec-common-syn">XML 1.0 specification section 2.3</a> defines a
* <code><a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-Name">Name</a></code> as starting with one of the characters
* <br /><code>(<a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-Letter">Letter</a> | '_' | ':')</code>.
* <p>
* This method uses the expression
* <br /><code>Character.isLetter(ch) || ch=='_' || ch==':'</code>.
* <p>
* Note that there are many differences between the <code>Character.isLetter()</code> definition of a Letter and the
* <a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-Letter">XML definition of a Letter</a>,
* but these differences are unlikely to be significant in real-world XML or HTML documents.
*
* @param ch the character to test.
* @return <code>true</code> if the specified character is valid at the start of an <a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-Name">XML Name</a>, otherwise <code>false</code>.
* @see Source#getNameEnd(int pos)
*/
public static final boolean isXMLNameStartChar(final char ch) {
return Character.isLetter(ch) || ch=='_' || ch==':';
}
/**
* Indicates whether the specified character is valid anywhere in an
* <a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-Name">XML Name</a>.
* <p>
* The <a target="_blank" href="http://www.w3.org/TR/REC-xml/#sec-common-syn">XML 1.0 specification section 2.3</a> uses the
* entity <code><a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-NameChar">NameChar</a></code> to represent this set of
* characters, which is defined as
* <br /><code>(<a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-Letter">Letter</a>
* | <a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-Digit">Digit</a> | '.' | '-' | '_' | ':'
* | <a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-CombiningChar">CombiningChar</a>
* | <a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-Extender">Extender</a>)</code>.
* <p>
* This method uses the expression
* <br /><code>Character.isLetterOrDigit(ch) || ch=='.' || ch=='-' || ch=='_' || ch==':'</code>.
* <p>
* Note that there are many differences between these definitions,
* but these differences are unlikely to be significant in real-world XML or HTML documents.
*
* @param ch the character to test.
* @return <code>true</code> if the specified character is valid anywhere in an <a target="_blank" href="http://www.w3.org/TR/REC-xml/#NT-Name">XML Name</a>, otherwise <code>false</code>.
* @see Source#getNameEnd(int pos)
*/
public static final boolean isXMLNameChar(final char ch) {
return Character.isLetterOrDigit(ch) || ch=='.' || ch=='-' || ch=='_' || ch==':';
}
/**
* This method has been deprecated as of version 3.0 in order to apply a consistent naming convention across all <a href="Tag.html#TagSearchMethods">tag search methods</a>.
* @deprecated Replaced by {@link #getNextTag()}.
*/
@Deprecated
public Tag findNextTag() {
return getNextTag();
}
/**
* This method has been deprecated as of version 3.0 in order to apply a consistent naming convention across all <a href="Tag.html#TagSearchMethods">tag search methods</a>.
* @deprecated Replaced by {@link #getPreviousTag()}.
*/
@Deprecated
public Tag findPreviousTag() {
return getPreviousTag();
}
// *** consider making public
StartTag getNextStartTag() {
Tag tag=this;
while (true) {
tag=tag.getNextTag();
if (tag==null) return null;
if (tag instanceof StartTag) return (StartTag)tag;
}
}
// *** consider making public
StartTag getPreviousStartTag() {
Tag tag=this;
while (true) {
tag=tag.getPreviousTag();
if (tag==null) return null;
if (tag instanceof StartTag) return (StartTag)tag;
}
}
// *** consider making public
Tag getNextTag(final TagType tagType) {
if (tagType==null) return getNextTag();
if (tagType==StartTagType.UNREGISTERED || tagType==EndTagType.UNREGISTERED) return getNextTag(source,begin+1,tagType);
Tag tag=this;
while (true) {
if (tag.nextTag==NOT_CACHED) return getNextTag(source,tag.begin+1,tagType);
tag=tag.nextTag;
if (tag==null) return null;
if (tag.getTagType()==tagType) return tag;
}
}
// *** consider making public
Tag getPreviousTag(final TagType tagType) {
if (tagType==null) return getPreviousTag();
if (tagType==StartTagType.UNREGISTERED || tagType==EndTagType.UNREGISTERED) return getPreviousTag(source,begin-1,tagType);
Tag tag=this;
while (true) {
if (tag.previousTag==NOT_CACHED) return getPreviousTag(source,tag.begin-1,tagType);
tag=tag.previousTag;
if (tag==null) return null;
if (tag.getTagType()==tagType) return tag;
}
}
final boolean includeInSearch() {
return INCLUDE_UNREGISTERED_IN_SEARCH || !isUnregistered();
}
static final Tag getPreviousTag(final Source source, final int pos) {
// returns null if pos is out of range.
return source.useAllTypesCache
? source.cache.getPreviousTag(pos)
: getPreviousTagUncached(source,pos,ParseText.NO_BREAK);
}
static final Tag getNextTag(final Source source, final int pos) {
// returns null if pos is out of range.
return source.useAllTypesCache
? source.cache.getNextTag(pos)
: getNextTagUncached(source,pos,ParseText.NO_BREAK);
}
static final Tag getPreviousTagUncached(final Source source, final int pos, final int breakAtPos) {
// returns null if pos is out of range.
try {
final ParseText parseText=source.getParseText();
int begin=pos;
do {
begin=parseText.lastIndexOf('<',begin,breakAtPos); // this assumes that all tags start with '<'
// parseText.lastIndexOf and indexOf return -1 if pos is out of range.
if (begin==-1) return null;
final Tag tag=getTagAt(source,begin,false);
if (tag!=null && tag.includeInSearch()) return tag;
} while ((begin-=1)>=0);
} catch (IndexOutOfBoundsException ex) {
throw new AssertionError("Unexpected internal exception");
}
return null;
}
static final Tag getNextTagUncached(final Source source, final int pos, final int breakAtPos) {
// returns null if pos is out of range.
try {
final ParseText parseText=source.getParseText();
int begin=pos;
do {
begin=parseText.indexOf('<',begin,breakAtPos); // this assumes that all tags start with '<'
// parseText.lastIndexOf and indexOf return -1 if pos is out of range.
if (begin==-1) return null;
final Tag tag=getTagAt(source,begin,false);
if (tag!=null && tag.includeInSearch()) return tag;
} while ((begin+=1)<source.end);
} catch (IndexOutOfBoundsException ex) {
// this should only happen when the end of file is reached in the middle of a tag.
// we don't have to do anything to handle it as there are no more tags anyway.
}
return null;
}
static final Tag getPreviousTag(final Source source, final int pos, final TagType tagType) {
// returns null if pos is out of range.
if (source.useSpecialTypesCache) return source.cache.getPreviousTag(pos,tagType);
return getPreviousTagUncached(source,pos,tagType,ParseText.NO_BREAK);
}
static final Tag getNextTag(final Source source, final int pos, final TagType tagType) {
// returns null if pos is out of range.
if (source.useSpecialTypesCache) return source.cache.getNextTag(pos,tagType);
return getNextTagUncached(source,pos,tagType,ParseText.NO_BREAK);
}
static final Tag getPreviousTagUncached(final Source source, final int pos, final TagType tagType, final int breakAtPos) {
// returns null if pos is out of range.
if (tagType==null) return getPreviousTagUncached(source,pos,breakAtPos);
final char[] startDelimiterCharArray=tagType.getStartDelimiterCharArray();
try {
final ParseText parseText=source.getParseText();
int begin=pos;
do {
begin=parseText.lastIndexOf(startDelimiterCharArray,begin,breakAtPos);
// parseText.lastIndexOf and indexOf return -1 if pos is out of range.
if (begin==-1) return null;
final Tag tag=getTagAt(source,begin,false);
if (tag!=null && tag.getTagType()==tagType) return tag;
} while ((begin-=1)>=0);
} catch (IndexOutOfBoundsException ex) {
// this should never happen during a get previous operation so rethrow it:
throw ex;
}
return null;
}
static final Tag getNextTagUncached(final Source source, final int pos, final TagType tagType, final int breakAtPos) {
// returns null if pos is out of range.
if (tagType==null) return getNextTagUncached(source,pos,breakAtPos);
final char[] startDelimiterCharArray=tagType.getStartDelimiterCharArray();
try {
final ParseText parseText=source.getParseText();
int begin=pos;
do {
begin=parseText.indexOf(startDelimiterCharArray,begin,breakAtPos);
// parseText.lastIndexOf and indexOf return -1 if pos is out of range.
if (begin==-1) return null;
final Tag tag=getTagAt(source,begin,false);
if (tag!=null && tag.getTagType()==tagType) return tag;
} while ((begin+=1)<source.end);
} catch (IndexOutOfBoundsException ex) {
// this should only happen when the end of file is reached in the middle of a tag.
// we don't have to do anything to handle it as there are no more tags anyway.
}
return null;
}
static final Tag getTagAt(final Source source, final int pos, final boolean serverTagOnly) {
// returns null if pos is out of range.
return source.useAllTypesCache
? source.cache.getTagAt(pos,serverTagOnly)
: getTagAtUncached(source,pos,serverTagOnly);
}
static final Tag getTagAtUncached(final Source source, final int pos, final boolean serverTagOnly) {
// returns null if pos is out of range.
return TagType.getTagAt(source,pos,serverTagOnly,false);
}
static final Tag[] parseAll(final Source source, final boolean assumeNoNestedTags) {
int registeredTagCount=0;
int registeredStartTagCount=0;
final ArrayList<Tag> list=new ArrayList<Tag>();
source.fullSequentialParseData=new int[1]; // fullSequentialParseData is simply a holder for a single mutable integer. It holds the end position of the last normal tag (ie one that ignores enclosed markup), or MAX_VALUE if we are in a SCRIPT element.
if (source.end!=0) {
final ParseText parseText=source.getParseText();
Tag tag=parseAllgetNextTag(source,parseText,0,assumeNoNestedTags);
while (tag!=null) {
list.add(tag);
if (!tag.isUnregistered()) {
registeredTagCount++;
if (tag instanceof StartTag) registeredStartTagCount++;
}
// Look for next tag after end of next tag if we're assuming tags don't appear inside other tags, as long as the last tag found was not an unregistered tag:
final int pos=(assumeNoNestedTags && !tag.isUnregistered()) ? tag.end : tag.begin+1;
if (pos==source.end) break;
tag=parseAllgetNextTag(source,parseText,pos,assumeNoNestedTags);
}
}
final Tag[] allRegisteredTags=new Tag[registeredTagCount];
final StartTag[] allRegisteredStartTags=new StartTag[registeredStartTagCount];
source.cache.loadAllTags(list,allRegisteredTags,allRegisteredStartTags);
source.allTagsArray=allRegisteredTags;
source.allTags=Arrays.asList(allRegisteredTags);
source.allStartTags=Arrays.asList(allRegisteredStartTags);
final int lastIndex=allRegisteredTags.length-1;
for (int i=0; i<allRegisteredTags.length; i++) {
final Tag tag=allRegisteredTags[i];
tag.previousTag=i>0 ? allRegisteredTags[i-1] : null;
tag.nextTag=i<lastIndex ? allRegisteredTags[i+1] : null;
}
return allRegisteredTags;
}
private static final Tag parseAllgetNextTag(final Source source, final ParseText parseText, final int pos, final boolean assumeNoNestedTags) {
try {
int begin=pos;
do {
begin=parseText.indexOf('<',begin); // this assumes that all tags start with '<'
if (begin==-1) return null;
final Tag tag=TagType.getTagAt(source,begin,false,assumeNoNestedTags);
if (tag!=null) {
if (!assumeNoNestedTags) {
final TagType tagType=tag.getTagType();
if (tag.end>source.fullSequentialParseData[0]
&& tagType!=StartTagType.DOCTYPE_DECLARATION
&& tagType!=StartTagType.UNREGISTERED && tagType!=EndTagType.UNREGISTERED) {
source.fullSequentialParseData[0]=(tagType==StartTagType.NORMAL && tag.name==HTMLElementName.SCRIPT) ? Integer.MAX_VALUE : tag.end;
}
}
return tag;
}
} while ((begin+=1)<source.end);
} catch (IndexOutOfBoundsException ex) {
// this should only happen when the end of file is reached in the middle of a tag.
// we don't have to do anything to handle it as there are no more tags anyway.
}
return null;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -