📄 htmltokenizer.java
字号:
} else {
content();
}
} else if (_isStyleContext) {
if ( startsWith("</style") && (isWhitespace(_pos + 7) || isChar(_pos + 7, '>')) ) {
tagEnd();
} else {
content();
}
} else {
if ( startsWith("<!doctype") ) {
if ( !_isLateForDoctype ) {
doctype();
_isLateForDoctype = true;
} else {
ignore();
}
} else if ( startsWith("</") && isIdentifierStartChar(_pos + 2) ) {
_isLateForDoctype = true;
tagEnd();
} else if ( startsWith("<!--") ) {
comment();
} else if ( startsWith("<") && isIdentifierStartChar(_pos + 1) ) {
_isLateForDoctype = true;
tagStart();
} else {
content();
}
}
}
_reader.close();
}
/**
* Parses start of the tag.
* It expects that current position is at the "<" after which
* the tag's name follows.
* @throws IOException
*/
private void tagStart() throws IOException {
saveCurrent();
go();
if ( isAllRead() ) {
return;
}
String tagName = identifier();
_currentTagToken = new TagNode(tagName);
if (_asExpected) {
skipWhitespaces();
tagAttributes();
String originalSource = _saved.toString();
addToken(_currentTagToken);
if ( isChar('>') ) {
go();
if ( "script".equalsIgnoreCase(tagName) ) {
_isScriptContext = true;
} else if ( "style".equalsIgnoreCase(tagName) ) {
_isStyleContext = true;
}
originalSource += ">";
} else if ( startsWith("/>") ) {
go(2);
addToken( new EndTagToken(tagName) );
originalSource += "/>";
}
_currentTagToken.setOriginalSource(originalSource);
_currentTagToken = null;
} else {
addSavedAsContent();
}
}
/**
* Parses end of the tag.
* It expects that current position is at the "<" after which
* "/" and the tag's name follows.
* @throws IOException
*/
private void tagEnd() throws IOException {
saveCurrent(2);
go(2);
if ( isAllRead() ) {
return;
}
String tagName = identifier();
_currentTagToken = new EndTagToken(tagName);
if (_asExpected) {
skipWhitespaces();
tagAttributes();
String originalSource = _saved.toString();
addToken(_currentTagToken);
if ( isChar('>') ) {
go();
originalSource += ">";
}
if ( "script".equalsIgnoreCase(tagName) ) {
_isScriptContext = false;
} else if ( "style".equalsIgnoreCase(tagName) ) {
_isStyleContext = false;
}
_currentTagToken.setOriginalSource(originalSource);
_currentTagToken = null;
} else {
addSavedAsContent();
}
}
/**
* Parses an identifier from the current position.
* @throws IOException
*/
private String identifier() throws IOException {
_asExpected = true;
if ( !isIdentifierStartChar() ) {
_asExpected = false;
return null;
}
StringBuffer tagName = new StringBuffer(16);
while ( !isAllRead() && isIdentifierChar() ) {
saveCurrent();
tagName.append( _working[_pos] );
go();
}
return tagName.toString();
}
/**
* Parses list tag attributes from the current position.
* @throws IOException
*/
private void tagAttributes() throws IOException {
while( !isAllRead() && _asExpected && !isChar('>') && !startsWith("/>") ) {
skipWhitespaces();
String attName = identifier();
if (!_asExpected) {
if ( !isChar('<') && !isChar('>') && !startsWith("/>") ) {
saveCurrent();
go();
}
if (!isChar('<')) {
_asExpected = true;
}
continue;
}
String attValue = attName;
skipWhitespaces();
if ( isChar('=') ) {
saveCurrent();
go();
attValue = attributeValue();
}
if (_asExpected) {
_currentTagToken.addAttribute(attName, attValue);
}
}
}
/**
* Parses a single tag attribute - it is expected to be in one of the forms:
* name=value
* name="value"
* name='value'
* name
* @throws IOException
*/
private String attributeValue() throws IOException {
skipWhitespaces();
if ( isChar('<') || isChar('>') || startsWith("/>") ) {
return "";
}
boolean isQuoteMode = false;
boolean isAposMode = false;
StringBuffer result = new StringBuffer();
if ( isChar('\'') ) {
isAposMode = true;
saveCurrent();
go();
} else if ( isChar('\"') ) {
isQuoteMode = true;
saveCurrent();
go();
}
while ( !isAllRead() &&
( (isAposMode && !isChar('\'')) ||
(isQuoteMode && !isChar('\"')) ||
(!isAposMode && !isQuoteMode && !isWhitespace() && !isChar('>') && !startsWith("/>"))
)
) {
result.append( _working[_pos] );
saveCurrent();
go();
}
if ( isChar('\'') && isAposMode ) {
saveCurrent();
go();
} else if ( isChar('\"') && isQuoteMode ) {
saveCurrent();
go();
}
return result.toString();
}
private void content() throws IOException {
while ( !isAllRead() ) {
saveCurrent();
go();
if ( isChar('<') ) {
break;
}
}
addSavedAsContent();
}
private void ignore() throws IOException {
while ( !isAllRead() ) {
go();
if ( isChar('<') ) {
break;
}
}
}
private void comment() throws IOException {
go(4);
while ( !isAllRead() && !startsWith("-->") ) {
saveCurrent();
go();
}
if (startsWith("-->")) {
go(3);
}
if (_saved.length() > 0) {
if ( !cleaner.isOmitComments() ) {
String hyphenRepl = cleaner.getHyphenReplacementInComment();
String comment = _saved.toString().replaceAll("--", hyphenRepl + hyphenRepl);
if ( comment.length() > 0 && comment.charAt(0) == '-' ) {
comment = hyphenRepl + comment.substring(1);
}
int len = comment.length();
if ( len > 0 && comment.charAt(len - 1) == '-' ) {
comment = comment.substring(0, len - 1) + hyphenRepl;
}
addToken( new CommentToken(comment) );
}
_saved.delete(0, _saved.length());
}
}
private void doctype() throws IOException {
go(9);
skipWhitespaces();
String part1 = identifier();
skipWhitespaces();
String part2 = identifier();
skipWhitespaces();
String part3 = attributeValue();
skipWhitespaces();
String part4 = attributeValue();
ignore();
DoctypeToken _docType = new DoctypeToken(part1, part2, part3, part4);
if ( _docType.isValid() ) {
cleaner.setDoctype(_docType);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -