📄 fastpageparser.java
字号:
_pushBack = 0; } else { try { c = reader.read(); } catch(IOException e) { _tokenType = TOKEN_EOF; break start; } } if(c < 0) { int tmpstate = _state; _state = STATE_EOF; if(_buffer.length() > 0 && tmpstate == STATE_TEXT) { _tokenType = TOKEN_TEXT; break start; } else { _tokenType = TOKEN_EOF; break start; } } switch(_state) { case STATE_TAG: { int buflen = _buffer.length(); if(c == '>') { if (_buffer.length() > 1 && _buffer.charAt(_buffer.length() - 1) == '/') { _tokenType = TOKEN_EMPTYTAG; } else { _tokenType = TOKEN_TAG; } _state = STATE_TEXT; break start; } else if(c == '/') { _buffer.append('/'); } else if(c == '<' && buflen == 0) { _buffer.append("<<"); _state = STATE_TEXT; } else if(c == '-' && buflen == 2 && _buffer.charAt(1) == '-' && _buffer.charAt(0) == '!') { _buffer.setLength(0); _state = STATE_COMMENT; } else if(c == '[' && buflen == 7 && _buffer.charAt(0) == '!' && _buffer.charAt(1) == '[' && _buffer.compareLower("cdata", 2)) { _buffer.setLength(0); _state = STATE_CDATA; } else if((c == 'e' || c == 'E') && buflen == 7 && _buffer.charAt(0) == '!' && _buffer.compareLower("doctyp", 1)) { _buffer.append((char)c); _state = STATE_DOCTYPE; } else if((c == 'T' || c == 't') && buflen == 5 && _buffer.compareLower("scrip", 0)) { _buffer.append((char)c); _state = STATE_SCRIPT; } else if(c == '"' || c == '\'') { _quote = c; _buffer.append(( char ) c); _state = STATE_TAG_QUOTE; } else { _buffer.append(( char ) c); } } break; case STATE_TEXT: { if(c == '<') { _state = STATE_TAG; if(_buffer.length() > 0) { _tokenType = TOKEN_TEXT; break start; } } else { _buffer.append(( char ) c); } } break; case STATE_TAG_QUOTE: { if(c == '>') { _pushBack = c; _state = STATE_TAG; } else { _buffer.append(( char ) c); if(c == _quote) { _state = STATE_TAG; } } } break; case STATE_COMMENT: { if(c == '>' && _comment >= 2) { _buffer.setLength(_buffer.length() - 2); _comment = 0; _state = STATE_TEXT; _tokenType = TOKEN_COMMENT; break start; } else if(c == '-') { _comment++; } else { _comment = 0; } _buffer.append(( char ) c); } break; case STATE_CDATA: { if(c == '>' && _comment >= 2) { _buffer.setLength(_buffer.length() - 2); _comment = 0; _state = STATE_TEXT; _tokenType = TOKEN_CDATA; break start; } else if(c == ']') { _comment++; } else { _comment = 0; } _buffer.append(( char ) c); } break; case STATE_SCRIPT: { _buffer.append((char) c); if (c == '<') { _comment = 0; } else if ((c == '/' && _comment == 0) ||((c == 's' || c == 'S' ) && _comment == 1) ||((c == 'c' || c == 'C' ) && _comment == 2) ||((c == 'r' || c == 'R' ) && _comment == 3) ||((c == 'i' || c == 'I' ) && _comment == 4) ||((c == 'p' || c == 'P' ) && _comment == 5) ||((c == 't' || c == 'T' ) && _comment == 6) ) { _comment++; } else if(c == '>' && _comment >= 7) { _comment = 0; _state = STATE_TEXT; _tokenType = TOKEN_SCRIPT; break start; } } break; case STATE_DOCTYPE: { _buffer.append((char) c); if (c == '>') { _state = STATE_TEXT; _tokenType = TOKEN_DOCTYPE; break start; } else { _comment = 0; } } break; } } } // Help the GC _currentTaggedContent = null; _buffer = null; return new FastPage(_sitemeshProperties, _htmlProperties, _metaProperties, _bodyProperties, _title.toString().trim(), _head.toString().trim(), _body.toString().trim(), _frameSet); } private static void writeTag(int state, int laststate, boolean hide, CharArray _head, CharArray _buffer, CharArray _body) { if (!hide) { if (shouldWriteToHead(state, laststate)) { _head.append('<').append(_buffer).append('>'); } else { _body.append('<').append(_buffer).append('>'); } } } private static boolean shouldWriteToHead(int state, int laststate) { return state == TAG_STATE_HEAD ||(laststate == TAG_STATE_HEAD && (state == TAG_STATE_XML || state == TAG_STATE_XMP)); } /** * Populates a {@link Tag} object using data from the supplied {@link CharArray}. * * The supplied tag parameter is reset and reused - this avoids excess object * creation which hwlps performance. * * @return the same tag instance that was passed in, except it will be populated * with a new <tt>name</tt> value (and the corresponding <tt>nameEndIdx</tt> value). * However if the tag contained nathing but whitespace, this method will return * <tt>null</tt>. */ private Tag parseTag(Tag tag, CharArray buf) { int len = buf.length(); int idx = 0; int begin; // Skip over any leading whitespace in the tag while (idx < len && Character.isWhitespace(buf.charAt(idx))) idx++; if(idx == len) return null; // Find out where the non-whitespace characters end. This will give us the tag name. begin = idx; while (idx < len && !Character.isWhitespace(buf.charAt(idx))) idx++; // Mark the tag name as a substring within the buffer. This allows us to perform // a substring comparison against it at a later date buf.setSubstr(begin, buf.charAt(idx - 1) == '/' ? idx - 1 : idx); // Remember where the name finishes so we can pull out the properties later if need be tag.nameEndIdx = idx; return tag; } /** * This is called when we need to extract the properties for the tag from the tag's HTML. * We only call this when necessary since it has quite a lot of overhead. * * @param tag the tag that is currently being processed. This should be the * tag that was returned as a result of a call to {@link #parseTag(FastPageParser.Tag, CharArray)} * (ie, it has the <tt>name</tt> and <tt>nameEndIdx</tt> fields set correctly for the * tag in question. The <tt>properties</tt> field can be in an undefined state - it * will get replaced regardless). * @param buffer a <tt>CharArray</tt> containing the entire tag that is being parsed. * @return the same tag instance that was passed in, only it will now be populated * with any properties that were specified in the tag's HTML. */ private static Tag parseProperties(Tag tag, CharArray buffer) { int len = buffer.length(); int idx = tag.nameEndIdx; // Start with an empty hashmap. A new HashMap is lazy-created if we happen to find any properties tag.properties = Collections.EMPTY_MAP; int begin; while (idx < len) { // Skip forward to the next non-whitespace character while (idx < len && Character.isWhitespace(buffer.charAt(idx))) idx++; if(idx == len) continue; begin = idx; if(buffer.charAt(idx) == '"') { idx++; while (idx < len && buffer.charAt(idx) != '"') idx++; if(idx == len) continue; idx++; } else if(buffer.charAt(idx) == '\'') { idx++; while (idx < len && buffer.charAt(idx) != '\'') idx++; if(idx == len) continue; idx++; } else { while (idx < len && !Character.isWhitespace(buffer.charAt(idx)) && buffer.charAt(idx) != '=') idx++; } // Mark the substring. This is the attribute name buffer.setSubstr(begin, idx); if(idx < len && Character.isWhitespace(buffer.charAt(idx))) { while (idx < len && Character.isWhitespace(buffer.charAt(idx))) idx++; } if(idx == len || buffer.charAt(idx) != '=') continue; idx++; if(idx == len) continue; while(idx < len && (buffer.charAt(idx) == '\n' || buffer.charAt(idx) == '\r')) idx++; if(buffer.charAt(idx) == ' ') { while (idx < len && Character.isWhitespace(buffer.charAt(idx))) idx++; if(idx == len || (buffer.charAt(idx) != '"' && buffer.charAt(idx) != '"')) continue; } begin = idx; int end; if(buffer.charAt(idx) == '"') { idx++; begin = idx; while (idx < len && buffer.charAt(idx) != '"') idx++; if(idx == len) continue; end = idx; idx++; } else if(buffer.charAt(idx) == '\'') { idx++; begin = idx; while (idx < len && buffer.charAt(idx) != '\'') idx++; if(idx == len) continue; end = idx; idx++; } else { while (idx < len && !Character.isWhitespace(buffer.charAt(idx))) idx++; end = idx; } // Extract the name and value as String objects and add them to the property map String name = buffer.getLowerSubstr(); String value = buffer.substring(begin, end); tag.addProperty(name, value); } return tag; } private class Tag { // The index where the name string ends. This is used as the starting // offet if we need to continue processing to find the tag's properties public int nameEndIdx = 0; // This holds a map of the various properties for a particular tag. // This map is only populated when required - normally it will remain empty public Map properties = Collections.EMPTY_MAP; /** * Adds a name/value property pair to this tag. Each property that is * added represents a property that was parsed from the tag's HTML. */ public void addProperty(String name, String value) { if(properties==Collections.EMPTY_MAP) { properties = new HashMap(8); } properties.put(name, value); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -