📄 fastpageparser.java
字号:
/* * Title: FastPageParser * Description: * * This software is published under the terms of the OpenSymphony Software * License version 1.1, of which a copy has been included with this * distribution in the LICENSE.txt file. */package com.opensymphony.module.sitemesh.parser;import com.opensymphony.module.sitemesh.Page;import com.opensymphony.module.sitemesh.PageParser;import com.opensymphony.module.sitemesh.html.util.CharArray;import com.opensymphony.module.sitemesh.util.CharArrayReader;import java.io.IOException;import java.io.Reader;import java.util.Collections;import java.util.HashMap;import java.util.Map;/** * Very fast PageParser implementation for parsing HTML. * * <p>Produces FastPage.</p> * * @author <a href="mailto:salaman@qoretech.com">Victor Salaman</a> * @version $Revision: 1.13 $ * @deprecated Use HTMLPageParser instead - it performs better and is more extensible. */public final class FastPageParser implements PageParser{ private static final int TOKEN_NONE = -0; private static final int TOKEN_EOF = -1; private static final int TOKEN_TEXT = -2; private static final int TOKEN_TAG = -3; private static final int TOKEN_COMMENT = -4; private static final int TOKEN_CDATA = -5; private static final int TOKEN_SCRIPT = -6; private static final int TOKEN_DOCTYPE = -7; private static final int TOKEN_EMPTYTAG = -8; private static final int STATE_EOF = -1; private static final int STATE_TEXT = -2; private static final int STATE_TAG = -3; private static final int STATE_COMMENT = -4; private static final int STATE_TAG_QUOTE = -5; private static final int STATE_CDATA = -6; private static final int STATE_SCRIPT = -7; private static final int STATE_DOCTYPE = -8; private static final int TAG_STATE_NONE = 0; private static final int TAG_STATE_HTML = -1; private static final int TAG_STATE_HEAD = -2; private static final int TAG_STATE_TITLE = -3; private static final int TAG_STATE_BODY = -4; private static final int TAG_STATE_XML = -6; private static final int TAG_STATE_XMP = -7; // These hashcodes are hardcoded because swtich statements can only // switch on compile-time constants. // In theory it is possible for there to be a hashcode collision with // other HTML tags, however in practice it is *very* unlikely because // tags are generally only a few characters long and hence are likely // to produce unique values. private static final int SLASH_XML_HASH = 1518984; // "/xml".hashCode(); private static final int XML_HASH = 118807; // "xml".hashCode(); private static final int SLASH_XMP_HASH = 1518988; // "/xmp".hashCode(); private static final int XMP_HASH = 118811; // "xmp".hashCode(); private static final int HTML_HASH = 3213227; // "html".hashCode(); private static final int SLASH_HTML_HASH = 46618714; // "/html".hashCode(); private static final int HEAD_HASH = 3198432; // "head".hashCode(); private static final int TITLE_HASH = 110371416; // "title".hashCode(); private static final int SLASH_TITLE_HASH = 1455941513; // "/title".hashCode(); private static final int PARAMETER_HASH = 1954460585; // "parameter".hashCode(); private static final int META_HASH = 3347973; // "meta".hashCode(); private static final int SLASH_HEAD_HASH = 46603919; // "/head".hashCode(); private static final int FRAMESET_HASH = -1644953643; // "frameset".hashCode(); private static final int FRAME_HASH = 97692013; // "frame".hashCode(); private static final int BODY_HASH = 3029410; // "body".hashCode(); private static final int SLASH_BODY_HASH = 46434897; // "/body".hashCode(); private static final int CONTENT_HASH = 951530617; // "content".hashCode(); public Page parse(char[] data) throws IOException { FastPage page = internalParse(new CharArrayReader(data)); page.setVerbatimPage(data); return page; } public Page parse(Reader reader) { return internalParse(reader); } private FastPage internalParse(Reader reader) { CharArray _buffer = new CharArray(4096); CharArray _body = new CharArray(4096); CharArray _head = new CharArray(512); CharArray _title = new CharArray(128); Map _htmlProperties = null; Map _metaProperties = new HashMap(6); Map _sitemeshProperties = new HashMap(6); Map _bodyProperties = null; CharArray _currentTaggedContent = new CharArray(1024); String _contentTagId = null; boolean tagged = false; boolean _frameSet = false; int _state = STATE_TEXT; int _tokenType = TOKEN_NONE; int _pushBack = 0; int _comment = 0; int _quote = 0; boolean hide = false; int state = TAG_STATE_NONE; int laststate = TAG_STATE_NONE; boolean doneTitle = false; // This tag object gets reused each iteration. Tag tagObject = new Tag(); while (_tokenType != TOKEN_EOF) { if(tagged) { if(_tokenType == TOKEN_TAG || _tokenType == TOKEN_EMPTYTAG) { if(_buffer==null || _buffer.length()==0) { _tokenType=TOKEN_NONE; continue; } if (parseTag(tagObject, _buffer) == null) continue; if (_buffer.compareLowerSubstr("/content")) // Note that the '/' survives the | 32 operation { tagged = false; if(_contentTagId != null) { state = TAG_STATE_NONE; _sitemeshProperties.put(_contentTagId, _currentTaggedContent.toString()); _currentTaggedContent.setLength(0); _contentTagId = null; } } else { _currentTaggedContent.append('<').append(_buffer).append('>'); } } else { if(_buffer.length() > 0) _currentTaggedContent.append(_buffer); } } else { if(_tokenType == TOKEN_TAG || _tokenType == TOKEN_EMPTYTAG) { if(_buffer==null || _buffer.length()==0) { _tokenType=TOKEN_NONE; continue; } if(parseTag(tagObject, _buffer) == null) { _tokenType=TOKEN_TEXT; continue; } int tagHash = _buffer.substrHashCode(); if(state == TAG_STATE_XML || state == TAG_STATE_XMP) { writeTag(state, laststate, hide, _head, _buffer, _body); if( (state == TAG_STATE_XML && tagHash == SLASH_XML_HASH) ||(state == TAG_STATE_XMP && tagHash == SLASH_XMP_HASH) ) { state = laststate; } } else { boolean doDefault = false; switch (tagHash) { case HTML_HASH: if (!_buffer.compareLowerSubstr("html")) { // skip any accidental hash collisions doDefault = true; break; } state = TAG_STATE_HTML; _htmlProperties = parseProperties(tagObject, _buffer).properties; break; case HEAD_HASH: if (!_buffer.compareLowerSubstr("head")) { // skip any accidental hash collisions doDefault = true; break; } state = TAG_STATE_HEAD; break; case XML_HASH: if (!_buffer.compareLowerSubstr("xml")) { // skip any accidental hash collisions doDefault = true; break; } laststate = state; writeTag(state, laststate, hide, _head, _buffer, _body); state = TAG_STATE_XML; break; case XMP_HASH: if (!_buffer.compareLowerSubstr("xmp")) { // skip any accidental hash collisions doDefault = true; break; } laststate = state; writeTag(state, laststate, hide, _head, _buffer, _body); state = TAG_STATE_XMP; break; case TITLE_HASH: if (!_buffer.compareLowerSubstr("title")) { // skip any accidental hash collisions doDefault = true; break; } if (doneTitle) { hide = true; } else { laststate = state; state = TAG_STATE_TITLE; } break; case SLASH_TITLE_HASH: if (!_buffer.compareLowerSubstr("/title")) { // skip any accidental hash collisions doDefault = true; break; } if (doneTitle) { hide = false; } else { doneTitle = true; state = laststate; } break; case PARAMETER_HASH: if (!_buffer.compareLowerSubstr("parameter")) { // skip any accidental hash collisions doDefault = true; break; } parseProperties(tagObject, _buffer); String name = (String) tagObject.properties.get("name"); String value = (String) tagObject.properties.get("value"); if (name != null && value != null) { _sitemeshProperties.put(name, value); } break; case META_HASH: if (!_buffer.compareLowerSubstr("meta")) { // skip any accidental hash collisions doDefault = true; break; } CharArray metaDestination = state == TAG_STATE_HEAD ? _head : _body; metaDestination.append('<'); metaDestination.append(_buffer); metaDestination.append('>'); parseProperties(tagObject, _buffer); name = (String) tagObject.properties.get("name"); value = (String) tagObject.properties.get("content"); if (name == null) { String httpEquiv = (String) tagObject.properties.get("http-equiv"); if (httpEquiv != null) { name = "http-equiv." + httpEquiv; } } if (name != null && value != null) { _metaProperties.put(name, value); } break; case SLASH_HEAD_HASH: if (!_buffer.compareLowerSubstr("/head")) { // skip any accidental hash collisions doDefault = true; break; } state = TAG_STATE_HTML; break; case FRAME_HASH: if (!_buffer.compareLowerSubstr("frame")) { // skip any accidental hash collisions doDefault = true; break; } _frameSet = true; break; case FRAMESET_HASH: if (!_buffer.compareLowerSubstr("frameset")) { // skip any accidental hash collisions doDefault = true; break; } _frameSet = true; break; case BODY_HASH: if (!_buffer.compareLowerSubstr("body")) { // skip any accidental hash collisions doDefault = true; break; } if (_tokenType == TOKEN_EMPTYTAG) { state = TAG_STATE_BODY; } _bodyProperties = parseProperties(tagObject, _buffer).properties; break; case CONTENT_HASH: if (!_buffer.compareLowerSubstr("content")) { // skip any accidental hash collisions doDefault = true; break; } state = TAG_STATE_NONE; Map props = parseProperties(tagObject, _buffer).properties; if (props != null) { tagged = true; _contentTagId = (String) props.get("tag"); } break; case SLASH_XMP_HASH: if (!_buffer.compareLowerSubstr("/xmp")) { // skip any accidental hash collisions doDefault = true; break; } hide = false; break; case SLASH_BODY_HASH: if (!_buffer.compareLowerSubstr("/body")) { // skip any accidental hash collisions doDefault = true; break; } state = TAG_STATE_NONE; hide = true; break; case SLASH_HTML_HASH: if (!_buffer.compareLowerSubstr("/html")) { // skip any accidental hash collisions doDefault = true; break; } state = TAG_STATE_NONE; hide = true; break; default: doDefault = true; } if (doDefault) writeTag(state, laststate, hide, _head, _buffer, _body); } } else if (!hide) { if (_tokenType == TOKEN_TEXT) { if (state == TAG_STATE_TITLE) { _title.append(_buffer); } else if (shouldWriteToHead(state, laststate)) { _head.append(_buffer); } else { _body.append(_buffer); } } else if (_tokenType == TOKEN_COMMENT) { final CharArray commentDestination = shouldWriteToHead(state, laststate) ? _head : _body; commentDestination.append("<!--"); commentDestination.append(_buffer); commentDestination.append("-->"); } else if (_tokenType == TOKEN_CDATA) { final CharArray commentDestination = state == TAG_STATE_HEAD ? _head : _body; commentDestination.append("<![CDATA["); commentDestination.append(_buffer); commentDestination.append("]]>"); } else if (_tokenType == TOKEN_SCRIPT) { final CharArray commentDestination = state == TAG_STATE_HEAD ? _head : _body; commentDestination.append('<'); commentDestination.append(_buffer); } } } _buffer.setLength(0); start: while (true) { int c; if(_pushBack != 0) { c = _pushBack;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -