📄 yyreg.cpp
字号:
/************************************************************************** Copyright (C) 2000 Trolltech AS. All rights reserved.**** This file is part of Qt Designer.**** This file may be distributed and/or modified under the terms of the** GNU General Public License version 2 as published by the Free Software** Foundation and appearing in the file LICENSE.GPL included in the** packaging of this file.**** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.**** See http://www.trolltech.com/gpl/ for GPL licensing information.**** Contact info@trolltech.com if any conditions of this licensing are** not clear to you.************************************************************************/#include <qregexp.h>#include <ctype.h>#include <stdio.h>#include "yyreg.h"/* First comes the tokenizer. We don't need something that knows much about C++. However, we need something that gives tokens from the end of the file to the start, which is tricky. If you are not familiar with hand-written tokenizers and parsers, you might want to read other simpler parsers written in the same style: $(QTDIR)/src/tools/qregexp.cpp $(QTDIR)/tools/inspector/cppparser.cpp You might also want to read Section 2 in the Dragon Book.*//* Those are the tokens we are interested in. Tok_Something represents any C++ token that does not interest us, but it's dangerous to ignore tokens completely.*/enum { Tok_Boi, Tok_Ampersand, Tok_Aster, Tok_LeftParen, Tok_RightParen, Tok_Equal, Tok_LeftBrace, Tok_RightBrace, Tok_Semicolon, Tok_Colon, Tok_LeftAngle, Tok_RightAngle, Tok_Comma, Tok_Ellipsis, Tok_Gulbrandsen, Tok_LeftBracket, Tok_RightBracket, Tok_Tilde, Tok_Something, Tok_Comment, Tok_Ident, Tok_char, Tok_const, Tok_double, Tok_int, Tok_long, Tok_operator, Tok_short, Tok_signed, Tok_unsigned };/* The following variables store the lexical analyzer state. The best way to understand them is to implement a function myGetToken() that calls getToken(), to add some qDebug() statements in there and then to #define getToken() myGetToken().*/static QString *yyIn; // the input streamstatic int yyPos; // the position of the current token in yyInstatic int yyCurPos; // the position of the next lookahead characterstatic char *yyLexBuf; // the lexeme bufferstatic const int YYLexBufSize = 65536; // big enough for long commentsstatic char *yyLex; // the lexeme itself (a pointer into yyLexBuf)static int yyCh; // the lookbehind character/* Moves back to the previous character in the input stream and updates the tokenizer state. This function is to be used only by getToken(), which provides the right abstraction.*/static inline void readChar(){ if ( yyCh == EOF ) return; if ( yyLex > yyLexBuf ) *--yyLex = (char) yyCh; if ( yyCurPos < 0 ) yyCh = EOF; else yyCh = (*yyIn)[yyCurPos].unicode(); yyCurPos--;}/* Sets up the tokenizer.*/static void startTokenizer( const QString& in ){ yyIn = new QString; *yyIn = in; yyPos = yyIn->length() - 1; yyCurPos = yyPos; yyLexBuf = new char[YYLexBufSize]; yyLex = yyLexBuf + YYLexBufSize - 1; *yyLex = '\0'; yyCh = '\0'; readChar();}/* Frees resources allocated by the tokenizer.*/static void stopTokenizer(){ delete yyIn; delete[] yyLexBuf; yyLexBuf = 0;}/* These two macros implement quick-and-dirty hashing for telling apart keywords fast.*/#define HASH( ch, len ) ( (ch) | ((len) << 8) )#define CHECK( target ) \ if ( strcmp((target), yyLex) != 0 ) \ break;/* Returns the previous token in the abstract token stream. The parser deals only with tokens, not with characters.*/static int getToken(){ // why "+ 2"? try putting some qDebug()'s and see yyPos = yyCurPos + 2; for ( ;; ) { /* See if the previous token is interesting. If it isn't, we will loop anyway an go to the token before the previous token, and so on. */ yyLex = yyLexBuf + YYLexBufSize - 1; *yyLex = '\0'; if ( yyCh == EOF ) { break; } else if ( isspace(yyCh) ) { bool metNL = FALSE; do { metNL = ( metNL || yyCh == '\n' ); readChar(); } while ( isspace(yyCh) ); if ( metNL ) { /* C++ style comments are tricky. In left-to-right thinking, C++ comments start with "//" and end with '\n'. In right-to-left thinking, they start with a '\n'; but of course not every '\n' starts a comment. When we meet the '\n', we look behind, on the same line, for a "//", and if there is one we mess around with the tokenizer state to effectively ignore the comment. Beware of off-by-one and off-by-two bugs when you modify this code by adding qDebug()'s here and there. */ if ( yyCurPos >= 0 ) { int lineStart = yyIn->findRev( QChar('\n'), yyCurPos ) + 1; QString line = yyIn->mid( lineStart, yyCurPos - lineStart + 2 ); int commentStart = line.find( QString("//") ); if ( commentStart != -1 ) { yyCurPos = lineStart + commentStart - 1; yyPos = yyCurPos + 2; readChar(); } } } } else if ( isalnum(yyCh) || yyCh == '_' ) { do { readChar(); } while ( isalnum(yyCh) || yyCh == '_' ); switch ( HASH(yyLex[0], strlen(yyLex)) ) { case HASH( 'c', 4 ): CHECK( "char" ); return Tok_char; case HASH( 'c', 5 ): CHECK( "const" ); return Tok_const; case HASH( 'd', 6 ): CHECK( "double" ); return Tok_double; case HASH( 'i', 3 ): CHECK( "int" ); return Tok_int; case HASH( 'l', 4 ): CHECK( "long" ); return Tok_long; case HASH( 'o', 8 ): CHECK( "operator" ); return Tok_operator; case HASH( 's', 5 ): CHECK( "short" ); return Tok_short; case HASH( 's', 6 ): CHECK( "signed" ); return Tok_signed; case 'u': CHECK( "unsigned" ); return Tok_unsigned; } if ( isdigit(*yyLex) ) return Tok_Something; else return Tok_Ident; } else { int quote; switch ( yyCh ) { case '!': case '%': case '^': case '+': case '-': case '?': case '|': readChar(); return Tok_Something; case '"': case '\'': quote = yyCh; readChar(); while ( yyCh != EOF && yyCh != '\n' ) { if ( yyCh == quote ) { readChar(); if ( yyCh != '\\' ) break; } else { readChar(); } } return Tok_Something; case '&': readChar(); if ( yyCh == '&' ) { readChar(); return Tok_Something; } else { return Tok_Ampersand; } case '(': readChar(); return Tok_LeftParen; case ')': readChar(); return Tok_RightParen; case '*': readChar(); return Tok_Aster; case ',': readChar(); return Tok_Comma; case '.': readChar(); if ( yyCh == '.' ) { do { readChar(); } while ( yyCh == '.' ); return Tok_Ellipsis; } else { return Tok_Something; } case '/': /* C-style comments are symmetric. C++-style comments are handled elsewhere. */ readChar(); if ( yyCh == '*' ) { bool metAster = FALSE; bool metAsterSlash = FALSE; readChar(); while ( !metAsterSlash ) { if ( yyCh == EOF ) break; if ( yyCh == '*' ) metAster = TRUE; else if ( metAster && yyCh == '/' ) metAsterSlash = TRUE; else metAster = FALSE; readChar(); } break; // return Tok_Comment; } else { return Tok_Something; } case ':': readChar(); if ( yyCh == ':' ) { readChar(); return Tok_Gulbrandsen; } else { return Tok_Colon; } case ';': readChar(); return Tok_Semicolon; case '<': readChar(); return Tok_LeftAngle; case '=': readChar(); return Tok_Equal; case '>': readChar(); return Tok_RightAngle; case '[': readChar(); return Tok_LeftBracket; case ']': readChar(); return Tok_RightBracket; case '{': readChar(); return Tok_LeftBrace; case '}': readChar(); return Tok_RightBrace; case '~': readChar(); return Tok_Tilde; default: readChar(); } } } return Tok_Boi;}/* Follow the member function(s) of CppFunction.*//* Returns the prototype for the C++ function, without the semicolon.*/QString CppFunction::prototype() const{ QString proto; if ( !returnType().isEmpty() ) proto = returnType() + QChar( ' ' ); proto += scopedName(); proto += QChar( '(' ); if ( !parameterList().isEmpty() ) { QStringList::ConstIterator p = parameterList().begin(); proto += *p; ++p; while ( p != parameterList().end() ) { proto += QString( ", " ); proto += *p; ++p; } } proto += QChar( ')' ); if ( isConst() ) proto += QString( " const" ); return proto;}/* The parser follows. We are not really parsing C++, just trying to find the start and end of function definitions. One important pitfall is that the parsed code needs not be valid. Parsing from right to left helps cope with that, as explained in comments below. In the examples, we will use the symbol @ to stand for the position in the token stream. In "int @ x ;", the lookahead token (yyTok) is 'int'.*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -