📄 segment.java
字号:
*/
public Attributes parseAttributes() {
return source.parseAttributes(begin,end);
}
/**
* Causes the this segment to be ignored when parsing.
* <p>
* Ignored segments are treated as blank spaces by the parsing mechanism, but are included as normal text in all other functions.
* <p>
* This method was originally the only means of preventing {@linkplain TagType#isServerTag() server tags} located inside
* {@linkplain StartTagType#NORMAL normal} tags from interfering with the parsing of the tags
* (such as where an {@linkplain Attribute attribute} of a normal tag uses a server tag to dynamically set its value),
* as well as preventing non-server tags from being recognised inside server tags.
* <p>
* It is not necessary to use this method to ignore {@linkplain TagType#isServerTag() server tags} located inside normal tags,
* as the attributes parser automatically ignores any server tags.
* <p>
* It is not necessary to use this method to ignore non-server tags inside server tags, or the contents of {@link HTMLElementName#SCRIPT SCRIPT} elements,
* as the parser does this automatically when performing a {@linkplain Source#fullSequentialParse() full sequential parse}.
* <p>
* This leaves only very few scenarios where calling this method still provides a significant benefit.
* <p>
* One such case is where XML-style server tags are used inside {@linkplain StartTagType#NORMAL normal} tags.
* Here is an example using an XML-style JSP tag:
* <blockquote class="code"><code><a href="<i18n:resource path="/Portal"/>?BACK=TRUE">back</a></code></blockquote>
* The first double-quote of <code>"/Portal"</code> will be interpreted as the end quote for the <code>href</code> attribute,
* as there is no way for the parser to recognise the <code>il8n:resource</code> element as a server tag.
* Such use of XML-style server tags inside {@linkplain StartTagType#NORMAL normal} tags is generally seen as bad practice,
* but it is nevertheless valid JSP. The only way to ensure that this library is able to parse the normal tag surrounding it is to
* find these server tags first and call the <code>ignoreWhenParsing</code> method to ignore them before parsing the rest of the document.
* <p>
* It is important to understand the difference between ignoring the segment when parsing and removing the segment completely.
* Any text inside a segment that is ignored when parsing is treated by most functions as content, and as such is included in the output of
* tools such as {@link TextExtractor} and {@link Renderer}.
* <p>
* To remove segments completely, create an {@link OutputDocument} and call its {@link OutputDocument#remove(Segment) remove(Segment)} or
* {@link OutputDocument#replaceWithSpaces(int,int) replaceWithSpaces(int begin, int end)} method for each segment.
* Then create a new source document using {@link Source#Source(CharSequence) new Source(outputDocument.toString())}
* and perform the desired operations on this new source object.
* <p>
* Calling this method after the {@link Source#fullSequentialParse()} method has been called is not permitted and throws an <code>IllegalStateException</code>.
* <p>
* Any tags appearing in this segment that are found before this method is called will remain in the {@linkplain Source#getCacheDebugInfo() tag cache},
* and so will continue to be found by the <a href="Tag.html#TagSearchMethods">tag search methods</a>.
* If this is undesirable, the {@link Source#clearCache()} method can be called to remove them from the cache.
* Calling the {@link Source#fullSequentialParse()} method after this method clears the cache automatically.
* <p>
* For best performance, this method should be called on all segments that need to be ignored without calling
* any of the <a href="Tag.html#TagSearchMethods">tag search methods</a> in between.
*
* @see Source#ignoreWhenParsing(Collection segments)
*/
public void ignoreWhenParsing() {
source.ignoreWhenParsing(begin,end);
}
/**
* Compares this <code>Segment</code> object to another object.
* <p>
* If the argument is not a <code>Segment</code>, a <code>ClassCastException</code> is thrown.
* <p>
* A segment is considered to be before another segment if its begin position is earlier,
* or in the case that both segments begin at the same position, its end position is earlier.
* <p>
* Segments that begin and end at the same position are considered equal for
* the purposes of this comparison, even if they relate to different source documents.
* <p>
* Note: this class has a natural ordering that is inconsistent with equals.
* This means that this method may return zero in some cases where calling the
* {@link #equals(Object)} method with the same argument returns <code>false</code>.
*
* @param segment the segment to be compared
* @return a negative integer, zero, or a positive integer as this segment is before, equal to, or after the specified segment.
* @throws ClassCastException if the argument is not a <code>Segment</code>
*/
public int compareTo(final Segment segment) {
if (this==segment) return 0;
if (begin<segment.begin) return -1;
if (begin>segment.begin) return 1;
if (end<segment.end) return -1;
if (end>segment.end) return 1;
return 0;
}
/**
* Indicates whether this segment consists entirely of {@linkplain #isWhiteSpace(char) white space}.
* @return <code>true</code> if this segment consists entirely of {@linkplain #isWhiteSpace(char) white space}, otherwise <code>false</code>.
*/
public final boolean isWhiteSpace() {
for (int i=begin; i<end; i++)
if (!isWhiteSpace(source.charAt(i))) return false;
return true;
}
/**
* Indicates whether the specified character is <a target="_blank" href="http://www.w3.org/TR/html401/struct/text.html#h-9.1">white space</a>.
* <p>
* The <a target="_blank" href="http://www.w3.org/TR/html401/struct/text.html#h-9.1">HTML 4.01 specification section 9.1</a>
* specifies the following white space characters:
* <ul>
* <li>space (U+0020)
* <li>tab (U+0009)
* <li>form feed (U+000C)
* <li>line feed (U+000A)
* <li>carriage return (U+000D)
* <li>zero-width space (U+200B)
* </ul>
* <p>
* Despite the explicit inclusion of the zero-width space in the HTML specification, Microsoft IE6 does not
* recognise them as white space and renders them as an unprintable character (empty square).
* Even zero-width spaces included using the numeric character reference <code>&#x200B;</code> are rendered this way.
*
* @param ch the character to test.
* @return <code>true</code> if the specified character is <a target="_blank" href="http://www.w3.org/TR/html401/struct/text.html#h-9.1">white space</a>, otherwise <code>false</code>.
*/
public static final boolean isWhiteSpace(final char ch) {
for (char whiteSpaceChar : WHITESPACE) if (ch==whiteSpaceChar) return true;
return false;
}
/**
* Returns a string representation of this object useful for debugging purposes.
* @return a string representation of this object useful for debugging purposes.
*/
public String getDebugInfo() {
final StringBuilder sb=new StringBuilder(50);
sb.append('(');
source.getRowColumnVector(begin).appendTo(sb);
sb.append('-');
source.getRowColumnVector(end).appendTo(sb);
sb.append(')');
return sb.toString();
}
/**
* Returns the character at the specified index.
* <p>
* This is logically equivalent to <code>toString().charAt(index)</code>
* for valid argument values <code>0 <= index < length()</code>.
* <p>
* However because this implementation works directly on the underlying document source string,
* it should not be assumed that an <code>IndexOutOfBoundsException</code> is thrown
* for an invalid argument value.
*
* @param index the index of the character.
* @return the character at the specified index.
*/
public final char charAt(final int index) {
return source.string.charAt(begin+index);
}
/**
* Returns a new character sequence that is a subsequence of this sequence.
* <p>
* This is logically equivalent to <code>toString().subSequence(beginIndex,endIndex)</code>
* for valid values of <code>beginIndex</code> and <code>endIndex</code>.
* <p>
* However because this implementation works directly on the underlying document source string,
* it should not be assumed that an <code>IndexOutOfBoundsException</code> is thrown
* for invalid argument values as described in the <code>String.subSequence(int,int)</code> method.
*
* @param beginIndex the begin index, inclusive.
* @param endIndex the end index, exclusive.
* @return a new character sequence that is a subsequence of this sequence.
*/
public final CharSequence subSequence(final int beginIndex, final int endIndex) {
return source.string.subSequence(begin+beginIndex,begin+endIndex);
}
/**
* This method has been deprecated as of version 3.0 in order to apply a consistent naming convention across all <a href="Tag.html#TagSearchMethods">tag search methods</a>.
* @deprecated Replaced by {@link #getAllTags()}.
*/
@Deprecated
public List<Tag> findAllTags() {
return getAllTags();
}
/**
* This method has been deprecated as of version 3.0 in order to apply a consistent naming convention across all <a href="Tag.html#TagSearchMethods">tag search methods</a>.
* @deprecated Replaced by {@link #getAllTags(TagType)}.
*/
@Deprecated
public List<Tag> findAllTags(final TagType tagType) {
return getAllTags(tagType);
}
/**
* This method has been deprecated as of version 3.0 in order to apply a consistent naming convention across all <a href="Tag.html#TagSearchMethods">tag search methods</a>.
* @deprecated Replaced by {@link #getAllStartTags()}.
*/
@Deprecated
public List<StartTag> findAllStartTags() {
return getAllStartTags();
}
/**
* This method has been deprecated as of version 3.0 in order to apply a consistent naming convention across all <a href="Tag.html#TagSearchMethods">tag search methods</a>.
* @deprecated Replaced by {@link #getAllStartTags(String name)}.
*/
@Deprecated
public List<StartTag> findAllStartTags(String name) {
return getAllStartTags(name);
}
/**
* This method has been deprecated as of version 3.0 in order to apply a consistent naming convention across all <a href="Tag.html#TagSearchMethods">tag search methods</a>.
* @deprecated Replaced by {@link #getAllStartTags(String attributeName, String value, boolean valueCaseSensitive)}.
*/
@Deprecated
public List<StartTag> findAllStartTags(final String attributeName, final String value, final boolean valueCaseSensitive) {
return getAllStartTags(attributeName,value,valueCaseSensitive);
}
/**
* This method has been deprecated as of version 3.0 in order to apply a consistent naming convention across all <a href="Tag.html#TagSearchMethods">tag search methods</a>.
* @deprecated Replaced by {@link #getAllElements(String name)}.
*/
@Deprecated
public List<Element> findAllElements(String name) {
return getAllElements(name);
}
/**
* This method has been deprecated as of version 3.0 in order to apply a consistent naming convention across all <a href="Tag.html#TagSearchMethods">tag search methods</a>.
* @deprecated Replaced by {@link #getAllElements(StartTagType)}.
*/
@Deprecated
public List<Element> findAllElements(final StartTagType startTagType) {
return getAllElements(startTagType);
}
/**
* This method has been deprecated as of version 3.0 in order to apply a consistent naming convention across all <a href="Tag.html#TagSearchMethods">tag search methods</a>.
* @deprecated Replaced by {@link #getAllElements(String attributeName, String value, boolean valueCaseSensitive)}.
*/
@Deprecated
public List<Element> findAllElements(final String attributeName, final String value, final boolean valueCaseSensitive) {
return getAllElements(attributeName,value,valueCaseSensitive);
}
/**
* This method has been deprecated as of version 3.0 in order to apply a consistent naming convention across all <a href="Tag.html#TagSearchMethods">tag search methods</a>.
* @deprecated Replaced by {@link #getAllCharacterReferences()}.
*/
@Deprecated
public List<CharacterReference> findAllCharacterReferences() {
return getAllCharacterReferences();
}
/**
* This method has been deprecated as of version 3.0 in order to apply a consistent naming convention across all <a href="Tag.html#TagSearchMethods">tag search methods</a>.
* @deprecated Replaced by {@link #getFormControls()}.
*/
@Deprecated
public List<FormControl> findFormControls() {
return getFormControls();
}
/**
* This method has been deprecated as of version 3.0 in order to apply a consistent naming convention across all <a href="Tag.html#TagSearchMethods">tag search methods</a>.
* @deprecated Replaced by {@link #getFormFields()}.
*/
@Deprecated
public FormFields findFormFields() {
return getFormFields();
}
/**
* Collapses the {@linkplain #isWhiteSpace(char) white space} in the specified text.
* All leading and trailing white space is omitted, and any sections of internal white space are replaced by a single space.
*/
static final StringBuilder appendCollapseWhiteSpace(final StringBuilder sb, final CharSequence text) {
final int textLength=text.length();
int i=0;
boolean lastWasWhiteSpace=false;
while (true) {
if (i>=textLength) return sb;
if (!isWhiteSpace(text.charAt(i))) break;
i++;
}
do {
final char ch=text.charAt(i++);
if (isWhiteSpace(ch)) {
lastWasWhiteSpace=true;
} else {
if (lastWasWhiteSpace) {
sb.append(' ');
lastWasWhiteSpace=false;
}
sb.append(ch);
}
} while (i<textLength);
return sb;
}
private Tag checkEnclosure(final Tag tag) {
if (tag==null || tag.end>end) return null;
return tag;
}
private CharacterReference getNextCharacterReference(final int pos) {
final CharacterReference characterReference=source.getNextCharacterReference(pos);
if (characterReference==null || characterReference.end>end) return null;
return characterReference;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -