📄 qregexp.cpp
字号:
else { // case sensitive if ( pl && *p == c ) { p++; pl--; } } d++; } else if ( *d & MCC ) { // match char class if ( pl && matchcharclass( d, *p ) ) { p++; pl--; } d += (*d & MVL) + 1; } else if ( *d == PWS ) { if ( pl && p->isSpace() ) { p++; pl--; } d++; } else if ( *d == PDG ) { if ( pl && p->isDigit() ) { p++; pl--; } d++; } else if ( *d == ANY ) { if ( pl ) { p++; pl--; } d++; } else { return -1; // error } d++; // skip OPT's END while ( p >= first_p ) { // go backwards int end = matchstring( d, p, pl, bol, cs ); if ( end >= 0 ) return ( p - start ) + end; if ( !p ) return -1; --p; ++pl; } } return -1; default: // error return -1; } } return p - start;}/*! \internal Recursively match string.*/// This is obsolete now, but since it is protected (not private), it// is still implemented on the off-chance that somebody has made a// class derived from QRegExp and calls this directly.// Qt 3.0: Remove this?const QChar *QRegExp::matchstr( uint *rxd, const QChar *str, uint strlength, const QChar *bol ) const{ int len = matchstring( rxd, str, strlength, bol, cs ); if ( len < 0 ) return 0; return str + len;}/*! Attempts to match in \e str, starting from position \e index. Returns the position of the match, or -1 if there was no match. If \e len is not a null pointer, the length of the match is stored in \e *len. If \e indexIsStart is TRUE (the default), the position \e index in the string will match the start-of-input primitive (^) in the regexp, if present. Otherwise, position 0 in \e str will match. Example: \code QRegExp r("[0-9]*\\.[0-9]+"); // matches floating point int len; r.match("pi = 3.1416", 0, &len); // returns 5, len == 6 \endcode \note In Qt 3.0, this function will be replaced by find().*/int QRegExp::match( const QString &str, int index, int *len, bool indexIsStart ) const{ if ( !isValid() || isEmpty() ) return -1; if ( str.length() < (uint)index ) return -1; const QChar *start = str.unicode(); const QChar *p = start + index; uint pl = str.length() - index; uint *d = rxdata; int ep = -1; if ( *d == BOL ) { // match from beginning of line ep = matchstring( d, p, pl, indexIsStart ? p : start, cs ); } else { if ( *d & CHR ) { QChar c( *d ); if ( !cs && !c.row() ) { // case sensitive, # only 8bit while ( pl && ( p->row() || tolower(p->cell()) != c.cell() ) ) { p++; pl--; } } else { // case insensitive while ( pl && *p != c ) { p++; pl--; } } } while( 1 ) { // regular match ep = matchstring( d, p, pl, indexIsStart ? start+index : start, cs ); if ( ep >= 0 ) break; if ( !pl ) break; p++; pl--; } } if ( len ) *len = ep >= 0 ? ep : 0; // No match -> 0, for historical reasons return ep >= 0 ? (int)(p - start) : -1; // return index;}/*! \fn int QRegExp::find( const QString& str, int index ) Attempts to match in \e str, starting from position \e index. Returns the position of the match, or -1 if there was no match. \sa match()*///// Translate wildcard pattern to standard regexp pattern.// Ex: *.cpp ==> ^.*\.cpp$//static QString wc2rx( const QString &pattern ){ int patlen = (int)pattern.length(); QString wcpattern = QString::fromLatin1("^"); QChar c; for( int i = 0; i < patlen; i++ ) { c = pattern[i]; switch ( (char)c ) { case '*': // '*' ==> '.*' wcpattern += '.'; break; case '?': // '?' ==> '.' c = '.'; break; case '.': // quote special regexp chars case '+': case '\\': case '$': case '^': wcpattern += '\\'; break; case '[': if ( (char)pattern[i+1] == '^' ) { // don't quote '^' after '[' wcpattern += '['; c = pattern[i+1]; i++; } break; } wcpattern += c; } wcpattern += '$'; return wcpattern; // return new regexp pattern}//// Internal: Get char value and increment pointer.//static uint char_val( const QChar **str, uint *strlength ) // get char value{ const QChar *p = *str; uint pl = *strlength; uint len = 1; uint v = 0; if ( (char)*p == '\\' ) { // escaped code p++; pl--; if ( !pl ) { // it is just a '\' (*str)++; (*strlength)--; return '\\'; } len++; // length at least 2 int i; char c; char ch = tolower((char)*p); switch ( ch ) { case 'b': v = '\b'; break; // bell case 'f': v = '\f'; break; // form feed case 'n': v = '\n'; break; // newline case 'r': v = '\r'; break; // return case 't': v = '\t'; break; // tab case 's': v = PWS; break; // whitespace charclass case 'd': v = PDG; break; // digit charclass case '<': v = BOW; break; // word beginning matcher case '>': v = EOW; break; // word ending matcher case 'x': { // hex code p++; pl--; for ( i = 0; (i < 4) && pl; i++ ) { //up to 4 hex digits c = tolower((char)*p); bool a = ( c >= 'a' && c <= 'f' ); if ( (c >= '0' && c <= '9') || a ) { v <<= 4; v += a ? 10 + c - 'a' : c - '0'; len++; } else { break; } p++; pl--; } } break; default: { if ( ch >= '0' && ch <= '7' ) { //octal code len--; for ( i = 0; (i < 3) && pl; i++ ) { // up to 3 oct digits c = (char)*p; if ( c >= '0' && c <= '7' ) { v <<= 3; v += c - '0'; len++; } else { break; } p++; pl--; } } else { // not an octal number v = (((uint)(p->row())) << 8) | ((uint)p->cell()); } } } } else { v = (((uint)(p->row())) << 8) | ((uint)p->cell()); } *str += len; *strlength -= len; return v;}#if defined(DEBUG)static uint *dump( uint *p ){ while ( *p != END ) { if ( *p & CHR ) { QChar uc = (QChar)*p; char c = (char)uc; uint u = (((uint)(uc.row())) << 8) | ((uint)uc.cell()); qDebug( "\tCHR\tU%04x (%c)", u, (c ? c : ' ')); p++; } else if ( *p & MCC ) { uint clcode = *p & MCD; uint numFields = *p & MVL; if ( clcode == CCL ) qDebug( "\tCCL\t%i", numFields ); else if ( clcode == CCN ) qDebug( "\tCCN\t%i", numFields ); else qDebug("coding error!"); for ( int i = 0; i < (int)numFields; i++ ) { p++; if ( *p == PWS ) qDebug( "\t\tPWS" ); else if ( *p == PDG ) qDebug( "\t\tPDG" ); else { uint from = ( *p & MCD ) >> 16; uint to = *p & MVL; char fc = (char)QChar(from); char tc = (char)QChar(to); qDebug( "\t\tU%04x (%c) - U%04x (%c)", from, (fc ? fc : ' '), to, (tc ? tc : ' ') ); } } p++; } else switch ( *p++ ) { case PWS: qDebug( "\tPWS" ); break; case PDG: qDebug( "\tPDG" ); break; case BOL: qDebug( "\tBOL" ); break; case EOL: qDebug( "\tEOL" ); break; case BOW: qDebug( "\tBOW" ); break; case EOW: qDebug( "\tEOW" ); break; case ANY: qDebug( "\tANY" ); break; case CLO: qDebug( "\tCLO" ); p = dump( p ); break; case OPT: qDebug( "\tOPT" ); p = dump( p ); break; } } qDebug( "\tEND" ); return p+1;}#endif // DEBUGstatic const int maxlen = 1024; // max length of regexp arraystatic uint rxarray[ maxlen ]; // tmp regexp array/*! \internal Compiles the regular expression and stores the result in rxdata. The 'error' flag is set to non-zero if an error is detected. NOTE! This function is not reentrant!*/void QRegExp::compile(){ if ( rxdata ) { // delete old data delete [] rxdata; rxdata = 0; } if ( rxstring.isEmpty() ) { // no regexp pattern set error = PatNull; return; } error = PatOk; // assume pattern is ok QString pattern; if ( wc ) pattern = wc2rx(rxstring); else pattern = rxstring; const QChar *start = pattern.unicode(); // pattern pointer const QChar *p = start; // pattern pointer uint pl = pattern.length(); uint *d = rxarray; // data pointer uint *prev_d = 0;#define GEN(x) *d++ = (x) while ( pl ) { char ch = (char)*p; switch ( ch ) { case '^': // beginning of line prev_d = d; GEN( p == start ? BOL : (CHR | ch) ); p++; pl--; break; case '$': // end of line prev_d = d; GEN( pl == 1 ? EOL : (CHR | ch) ); p++; pl--; break; case '.': // any char prev_d = d; GEN( ANY ); p++; pl--; break; case '[': // character class { prev_d = d; p++; pl--; if ( !pl ) { error = PatSyntax; return; } bool firstIsEscaped = ( (char)*p == '\\' ); uint cch = char_val( &p, &pl ); if ( cch == '^' && !firstIsEscaped ) { // negate! GEN( CCN ); if ( !pl ) { error = PatSyntax; return; } cch = char_val( &p, &pl ); } else { GEN( CCL ); } uint numFields = 0; while ( pl ) { if ((pl>2) && ((char)*p == '-') && ((char)*(p+1) != ']')) { // Found a range char_val( &p, &pl ); // Read the '-' uint cch2 = char_val( &p, &pl ); // Read the range end if ( cch > cch2 ) { // swap start and stop int tmp = cch; cch = cch2; cch2 = tmp; } GEN( (cch << 16) | cch2 ); // from < to numFields++; } else { // Found a single character if ( cch & MCD ) // It's a code; will not be mistaken GEN( cch ); // for a range, since from > to else GEN( (cch << 16) | cch ); // from == to range numFields++; } if ( d >= rxarray + maxlen ) { // pattern too long error = PatOverflow; return; } if ( !pl ) { // At least ']' should be left error = PatSyntax; return; } bool nextIsEscaped = ( (char)*p == '\\' ); cch = char_val( &p, &pl ); if ( cch == (uint)']' && !nextIsEscaped ) break; if ( !pl ) { // End, should have seen ']' error = PatSyntax; return; } } *prev_d |= numFields; // Store number of fields } break; case '*': // Kleene closure, or case '+': // positive closure, or case '?': // optional closure { if ( prev_d == 0 ) { // no previous expression error = PatSyntax; // empty closure return; } switch ( *prev_d ) { // test if invalid closure case BOL: case BOW: case EOW: case CLO: case OPT: error = PatSyntax; return; } int ddiff = d - prev_d; if ( *p == '+' ) { // convert to Kleene closure if ( d + ddiff >= rxarray + maxlen ) { error = PatOverflow; // pattern too long return; } memcpy( d, prev_d, ddiff*sizeof(uint) ); d += ddiff; prev_d += ddiff; } memmove( prev_d+1, prev_d, ddiff*sizeof(uint) ); *prev_d = ch == '?' ? OPT : CLO; d++; GEN( END ); p++; pl--; } break; default: { prev_d = d; uint cv = char_val( &p, &pl ); if ( cv & MCD ) { // It's a code GEN( cv ); } else { if ( !cs && cv <= 0xff ) // #only 8bit support cv = tolower( cv ); GEN( CHR | cv ); } } } if ( d >= rxarray + maxlen ) { // oops! error = PatOverflow; // pattern too long return; } } GEN( END ); int len = d - rxarray; rxdata = new uint[ len ]; // copy from rxarray to rxdata CHECK_PTR( rxdata ); memcpy( rxdata, rxarray, len*sizeof(uint) );#if defined(DEBUG) //dump( rxdata ); // uncomment this line for debugging#endif}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -