📄 htmltoken.cpp
字号:
} } else if ( startTag) { startTag = false; if (*src == '/') { // Start of an End-Tag if (pending == LFPending) pending = NonePending; // Ignore leading LFs } else if ( ((*src >= 'a') && (*src <='z')) || ((*src >= 'A') && (*src <='Z')) ) { // Start of a Start-Tag } else if ( *src == '!') { // <!-- comment --> } else if ( *src == '?') { // <? meta stuff ?> } else { // Invalid tag // Add as is if (pending) addPending(); *dest = '<'; dest++; *dest++ = *src++; continue; } if (pending) addPending(); if ( dest > buffer ) { *dest = 0; appendToken( buffer, dest-buffer ); dest = buffer; } *dest = TAG_ESCAPE; dest++; *dest = '<'; dest++; tag = true; searchCount = 1; // Look for '<!--' sequence to start comment // No 'src++' add '*src' in a second pass with 'startTag=false' } else if ( *src == '&' ) { src++; discard = NoneDiscard; if (pending) addPending(); charEntity = true; searchBuffer[0] = TAG_ESCAPE; searchBuffer[1] = '&'; searchCount = 1; } else if ( *src == '<' && !tag) { src++; startTag = true; discard = NoneDiscard; } else if ( *src == '>' && tag && !tquote ) { searchCount = 0; // Stop looking for '<!--' sequence *dest = '>'; *(dest+1) = 0; // make the tag lower case char *ptr = buffer+2; if (*ptr == '/') { // End Tag discard = NoneDiscard; } else { // Start Tag // Ignore CR/LF's after a start tag discard = LFDiscard; } while ( *ptr && *ptr != ' ' ) { *ptr = tolower( *ptr ); ptr++; } appendToken( buffer, dest-buffer+1 ); dest = buffer; tag = false; pending = NonePending; // Ignore pending spaces src++; if ( strncmp( buffer+2, "pre", 3 ) == 0 ) { prePos = 0; pre = true; } else if ( strncmp( buffer+2, "/pre", 4 ) == 0 ) { pre = false; } else if ( strncmp( buffer+2, "textarea", 8 ) == 0 ) { textarea = true; } else if ( strncmp( buffer+2, "/textarea", 9 ) == 0 ) { textarea = false; } else if ( strncmp( buffer+2, "title", 5 ) == 0 ) { title = true; } else if ( strncmp( buffer+2, "/title", 6 ) == 0 ) { title = false; } else if ( strncmp( buffer+2, "script", 6 ) == 0 ) { script = true; searchCount = 0; searchFor = scriptEnd; scriptCode = new char[ 1024 ]; scriptCodeSize = 0; scriptCodeMaxSize = 1024; } else if ( strncmp( buffer+2, "style", 5 ) == 0 ) { style = true; searchCount = 0; searchFor = styleEnd; scriptCode = new char[ 1024 ]; scriptCodeSize = 0; scriptCodeMaxSize = 1024; } else if ( strncmp( buffer+2, "select", 6 ) == 0 ) { select = true; } else if ( strncmp( buffer+2, "/select", 7 ) == 0 ) { select = false; } else if ( strncmp( buffer+2, "frameset", 8 ) == 0 ) { blocking.append( new BlockingToken( BlockingToken::FrameSet, last ) ); } else if ( strncmp( buffer+2, "cell", 4 ) == 0 ) { blocking.append( new BlockingToken(BlockingToken::Cell, last) ); } else if ( strncmp( buffer+2, "table", 5 ) == 0 ) { blocking.append( new BlockingToken( BlockingToken::Table, last ) ); } else if ( !blocking.isEmpty() && strncasecmp( buffer+1, blocking.getLast()->tokenName(), strlen( blocking.getLast()->tokenName() ) ) == 0 ) { blocking.removeLast(); } } else if (( *src == '\n' ) || ( *src == '\r' )) { if ( tquote) { if (discard == NoneDiscard) pending = SpacePending; } else if ( tag ) { searchCount = 0; // Stop looking for '<!--' sequence if (discard == NoneDiscard) pending = SpacePending; // Treat LFs inside tags as spaces } else if ( pre || textarea) { if (discard == LFDiscard) { // Ignore this LF discard = NoneDiscard; // We have discarded 1 LF } else { // Process this LF if (pending) addPending(); pending = LFPending; } } else { if (discard == LFDiscard) { // Ignore this LF discard = NoneDiscard; // We have discarded 1 LF } else { // Process this LF if (pending == NonePending) pending = LFPending; } } /* Check for MS-DOS CRLF sequence */ if (*src == '\r') { skipLF = true; } src++; } else if (( *src == ' ' ) || ( *src == '\t')) { if ( tquote) { if (discard == NoneDiscard) pending = SpacePending; } else if ( tag ) { searchCount = 0; // Stop looking for '<!--' sequence if (discard == NoneDiscard) pending = SpacePending; } else if ( pre || textarea) { if (pending) addPending(); if (*src == ' ') pending = SpacePending; else pending = TabPending; } else { pending = SpacePending; } src++; } else if ( *src == '\"' || *src == '\'') { // we treat " & ' the same in tags discard = NoneDiscard; if ( tag ) { searchCount = 0; // Stop looking for '<!--' sequence if ( ((tquote == SINGLE_QUOTE) && (*src == '\"')) || ((tquote == DOUBLE_QUOTE) && (*src == '\''))) { // just add it *dest++ = *src; } else if ( *(dest-1) == '=' && !tquote ) { // according to HTML4 DTD, we can simplify // strings like " my \nstring " to "my string" discard = SpaceDiscard; // ignore leading spaces pending = NonePending; if (*src == '\"') tquote = DOUBLE_QUOTE; else tquote = SINGLE_QUOTE; *dest++ = *src; } else if ( tquote ) { tquote = NO_QUOTE; *dest++ = *src; pending = SpacePending; // Add space automatically } else { // Ignore stray "\'" } src++; } else { if (pending) addPending(); if ( pre ) prePos++; *dest++ = *src++; } } else if ( *src == '=' ) { src++; discard = NoneDiscard; if ( tag ) { searchCount = 0; // Stop looking for '<!--' sequence *dest++ = '='; if ( !tquote ) { pending = NonePending; // Discard spaces before '=' discard = SpaceDiscard; // Ignore following spaces } } else { if (pending) addPending(); if ( pre ) prePos++; *dest++ = '='; } } else { discard = NoneDiscard; if (pending) addPending(); if (tag) { if (searchCount > 0) { if (*src == commentStart[searchCount]) { searchCount++; if (searchCount == 4) { // Found '<!--' sequence comment = true; dest = buffer; // ignore the previous part of this tag tag = false; searchCount = 0; continue; } } else { searchCount = 0; // Stop looking for '<!--' sequence } } } else if ( pre ) { prePos++; } *dest++ = *src++; } } if ( srcPtr ) delete [] srcPtr;}void HTMLTokenizer::end(){ if ( buffer == 0 ) return; if ( dest > buffer ) { *dest = 0; appendToken( buffer, dest-buffer ); } delete [] buffer; buffer = 0; // if there are still blocking tokens then the HTML is illegal - remove // blocks anyway and hope for the best blocking.clear();}void HTMLTokenizer::appendTokenBuffer( int min_size){ int newBufSize = TOKEN_BUFFER_SIZE; // If we were using a buffer, mark it's end if (next) { // Mark current buffer end *next = '\0'; } if (min_size > newBufSize) { // Wow! This surely is a big token... newBufSize += min_size; } HTMLTokenBuffer *newBuffer = (HTMLTokenBuffer *) new char [ newBufSize + 1]; tokenBufferList.append( newBuffer); next = newBuffer->first(); tokenBufferSizeRemaining = newBufSize; if (!curr) { curr = tokenBufferList.at(0)->first(); tokenBufferCurrIndex = 0; }} void HTMLTokenizer::nextTokenBuffer(){ tokenBufferCurrIndex++; if (tokenBufferCurrIndex < tokenBufferList.count()) { curr = tokenBufferList.at(tokenBufferCurrIndex)->first(); } else { // Should never occur. printf("ERROR in HTMLTokenize::nextToken()\n"); }}void HTMLTokenizer::first(){ tokenBufferCurrIndex = 0; curr = 0; if (tokenBufferList.count()) { HTMLTokenBuffer *tokenBufferCurr = tokenBufferList.at(tokenBufferCurrIndex); if (tokenBufferCurr) { curr = tokenBufferCurr->first(); } }}HTMLTokenizer::~HTMLTokenizer(){ reset();}//-----------------------------------------------------------------------------StringTokenizer::StringTokenizer(){ buffer = 0; pos = 0; end = 0; bufLen = 0;}void StringTokenizer::tokenize( const char *str, const char *_separators ){ if ( *str == '\0' ) { pos = 0; return; } int strLength = strlen( str ) + 1; if ( bufLen < strLength ) { delete [] buffer; buffer = new char[ strLength ]; bufLen = strLength; } const char *src = str; end = buffer; int quoted = NO_QUOTE; for ( ; *src != '\0'; src++ ) { char *x = strchr( _separators, *src ); if (( *src == '\"' ) && !quoted) quoted = DOUBLE_QUOTE; else if (( *src == '\'') && !quoted) quoted = SINGLE_QUOTE; else if ( (( *src == '\"') && (quoted == DOUBLE_QUOTE)) || (( *src == '\'') && (quoted == SINGLE_QUOTE))) quoted = NO_QUOTE; else if ( x && !quoted ) *end++ = 0; else *end++ = *src; } *end = 0; if ( end - buffer <= 1 ) pos = 0; // no tokens else pos = buffer;}const char* StringTokenizer::nextToken(){ if ( pos == 0 ) return 0; char *ret = pos; pos += strlen( ret ) + 1; if ( pos >= end ) pos = 0; return ret;}StringTokenizer::~StringTokenizer(){ if ( buffer != 0 ) delete [] buffer;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -