📄 tokenize.cpp

📁 cppcheck is a static C/C++ code analyzer that checks for memory leaks, mismatching allocation-deallo
💻 CPP
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/* * Cppcheck - A tool for static C/C++ code analysis * Copyright (C) 2007-2009 Daniel Marjamäki, Reijo Tomperi, Nicolas Le Cam, * Leandro Penz, Kimmo Varis, Vesa Pikki * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program.  If not, see <http://www.gnu.org/licenses/ *///---------------------------------------------------------------------------#include "tokenize.h"#include "filelister.h"#include <locale>#include <fstream>#include <string>#include <cstring>#include <iostream>#include <sstream>#include <list>#include <algorithm>#include <cstdlib>#include <cctype>//---------------------------------------------------------------------------Tokenizer::Tokenizer(){    _tokens = 0;    _tokensBack = 0;}Tokenizer::~Tokenizer(){    DeallocateTokens();}//---------------------------------------------------------------------------// Helper functions..//---------------------------------------------------------------------------const Token *Tokenizer::tokens() const{    return _tokens;}const std::vector<std::string> *Tokenizer::getFiles() const{    return &_files;}//---------------------------------------------------------------------------//---------------------------------------------------------------------------// addtoken// add a token. Used by 'Tokenizer'//---------------------------------------------------------------------------void Tokenizer::addtoken(const char str[], const unsigned int lineno, const unsigned int fileno){    if (str[0] == 0)        return;    // Replace hexadecimal value with decimal    std::ostringstream str2;    if (strncmp(str, "0x", 2) == 0)    {        str2 << std::strtoul(str + 2, NULL, 16);    }    else    {        str2 << str;    }    if (_tokensBack)    {        _tokensBack->insertToken(str2.str().c_str());        _tokensBack = _tokensBack->next();    }    else    {        _tokens = new Token;        _tokensBack = _tokens;        _tokensBack->str(str2.str().c_str());    }    _tokensBack->linenr(lineno);    _tokensBack->fileIndex(fileno);}//---------------------------------------------------------------------------//---------------------------------------------------------------------------// SizeOfType - gives the size of a type//---------------------------------------------------------------------------int Tokenizer::SizeOfType(const char type[]) const{    if (!type)        return 0;    std::map<std::string, unsigned int>::const_iterator it = _typeSize.find(type);    if (it == _typeSize.end())        return 0;    return it->second;}//---------------------------------------------------------------------------//---------------------------------------------------------------------------// InsertTokens - Copy and insert tokens//---------------------------------------------------------------------------void Tokenizer::InsertTokens(Token *dest, Token *src, unsigned int n){    while (n > 0)    {        dest->insertToken(src->aaaa());        dest = dest->next();        dest->fileIndex(src->fileIndex());        dest->linenr(src->linenr());        dest->varId(src->varId());        src  = src->next();        --n;    }}//---------------------------------------------------------------------------//---------------------------------------------------------------------------// Tokenize - tokenizes a given file.//---------------------------------------------------------------------------void Tokenizer::tokenize(std::istream &code, const char FileName[]){    // The "_files" vector remembers what files have been tokenized..    _files.push_back(FileLister::simplifyPath(FileName));    // line number in parsed code    unsigned int lineno = 1;    // The current token being parsed    std::string CurrentToken;    // lineNumbers holds line numbers for files in fileIndexes    // every time an include file is complitely parsed, last item in the vector    // is removed and lineno is set to point to that value.    std::vector<unsigned int> lineNumbers;    // fileIndexes holds index for _files vector about currently parsed files    // every time an include file is complitely parsed, last item in the vector    // is removed and FileIndex is set to point to that value.    std::vector<unsigned int> fileIndexes;    // FileIndex. What file in the _files vector is read now?    unsigned int FileIndex = 0;    // Read one byte at a time from code and create tokens    for (char ch = (char)code.get(); code.good(); ch = (char)code.get())    {        // We are not handling UTF and stuff like that. Code is supposed to plain simple text.        if (ch < 0)            continue;        if (ch == '\n')        {            // Add current token..            addtoken(CurrentToken.c_str(), lineno++, FileIndex);            CurrentToken.clear();            continue;        }        // char/string..        if (ch == '\'' || ch == '\"')        {            // Add previous token            addtoken(CurrentToken.c_str(), lineno, FileIndex);            CurrentToken.clear();            // read char            bool special = false;            char c = ch;            do            {                // Append token..                CurrentToken += c;                if (c == '\n')                    ++lineno;                // Special sequence '\.'                if (special)                    special = false;                else                    special = (c == '\\');                // Get next character                c = (char)code.get();            }            while (code.good() && (special || c != ch));            CurrentToken += ch;            // Add token and start on next..            addtoken(CurrentToken.c_str(), lineno, FileIndex);            CurrentToken.clear();            continue;        }        if (ch == '#' && CurrentToken.empty())        {            // If previous token was "#" then append this to create a "##" token            if (Token::simpleMatch(_tokensBack, "#"))            {                _tokensBack->str("##");                continue;            }            std::string line("#");            {                char chPrev = '#';                bool skip = false;                while (code.good())                {                    ch = (char)code.get();                    if (chPrev != '\\' && ch == '\n')                        break;                    if (chPrev == '\\')                        line += chPrev;                    if (chPrev == '#' && ch == '#')                    {                        addtoken("##", lineno, FileIndex);                        skip = true;                        break;                    }                    if (ch != ' ')                        chPrev = ch;                    if (ch != '\\' && ch != '\n')                    {                        line += ch;                    }                    if (ch == '\n')                        ++lineno;                }                if (skip)                    continue;            }            if (strncmp(line.c_str(), "#file", 5) == 0 &&                line.find("\"") != std::string::npos)            {                // Extract the filename                line.erase(0, line.find("\"") + 1);                if (line.find("\"") != std::string::npos)                    line.erase(line.find("\""));                // Has this file been tokenized already?                ++lineno;                bool foundOurfile = false;                fileIndexes.push_back(FileIndex);                for (unsigned int i = 0; i < _files.size(); i++)                {                    if (FileLister::SameFileName(_files[i].c_str(), line.c_str()))                    {                        // Use this index                        foundOurfile = true;                        FileIndex = i;                    }                }                if (!foundOurfile)                {                    // The "_files" vector remembers what files have been tokenized..                    _files.push_back(FileLister::simplifyPath(line.c_str()));                    FileIndex = static_cast<unsigned int>(_files.size() - 1);                }                lineNumbers.push_back(lineno);                lineno = 1;                continue;            }            else if (strncmp(line.c_str(), "#endfile", 8) == 0)            {                if (lineNumbers.empty() || fileIndexes.empty())                {                    std::cerr << "####### Preprocessor bug! #######\n";                    std::exit(0);                }                lineno = lineNumbers.back();                lineNumbers.pop_back();                FileIndex = fileIndexes.back();                fileIndexes.pop_back();                continue;            }            else            {                addtoken(line.c_str(), lineno, FileIndex);            }        }        if (strchr("#+-*/%&|^?!=<>[](){};:,.~", ch))        {            if (ch == '.' &&                CurrentToken.length() > 0 &&                std::isdigit(CurrentToken[0]))            {                // Don't separate doubles "5.4"            }            else if (strchr("+-", ch) &&                     CurrentToken.length() > 0 &&                     std::isdigit(CurrentToken[0]) &&                     CurrentToken[CurrentToken.length()-1] == 'e')            {                // Don't separate doubles "4.2e+10"            }            else            {                addtoken(CurrentToken.c_str(), lineno, FileIndex);                CurrentToken.clear();                CurrentToken += ch;                // Add "++", "--" or ">>" token                if ((ch == '+' || ch == '-' || ch == '>') && (code.peek() == ch))                    CurrentToken += (char)code.get();                addtoken(CurrentToken.c_str(), lineno, FileIndex);                CurrentToken.clear();                continue;            }        }        if (std::isspace(ch) || std::iscntrl(ch))        {            addtoken(CurrentToken.c_str(), lineno, FileIndex);            CurrentToken.clear();            continue;        }        CurrentToken += ch;    }    addtoken(CurrentToken.c_str(), lineno, FileIndex);    // Combine tokens..    for (Token *tok = _tokens; tok && tok->next(); tok = tok->next())    {        static const char* combineWithNext[][3] =        {            { "<", "<", "<<" },            { "&", "&", "&&" },            { "|", "|", "||" },            { "+", "=", "+=" },            { "-", "=", "-=" },            { "*", "=", "*=" },            { "/", "=", "/=" },            { "&", "=", "&=" },            { "|", "=", "|=" },            { "=", "=", "==" },            { "!", "=", "!=" },            { "<", "=", "<=" },            { ">", "=", ">=" },            { ":", ":", "::" },            { "-", ">", "." },  // Replace "->" with "."            { "private", ":", "private:" },            { "protected", ":", "protected:" },            { "public", ":", "public:" }        };        for (unsigned ui = 0; ui < sizeof(combineWithNext) / sizeof(combineWithNext[0]); ui++)        {            if (tok->str() == combineWithNext[ui][0] && tok->next()->str() == combineWithNext[ui][1])            {                tok->str(combineWithNext[ui][2]);                tok->deleteNext();            }        }    }    // typedef..    for (Token *tok = _tokens; tok;)    {        if (Token::Match(tok, "typedef %type% %type% ;"))        {            const char *type1 = tok->strAt(1);            const char *type2 = tok->strAt(2);            tok = const_cast<Token*>(tok->tokAt(4));            for (Token *tok2 = tok; tok2; tok2 = tok2->next())            {                if (tok2->str() == type2)                    tok2->str(type1);            }            continue;        }        else if (Token::Match(tok, "typedef %type% %type% %type% ;"))        {            const char *type1 = tok->strAt(1);            const char *type2 = tok->strAt(2);            const char *type3 = tok->strAt(3);            tok = const_cast<Token*>(tok->tokAt(5));            for (Token *tok2 = tok; tok2; tok2 = tok2->next())            {                if (tok2->str() == type3)                {                    tok2->str(type1);                    tok2->insertToken(type2);                    tok2 = tok2->next();                }            }            continue;        }        tok = tok->next();    }    // Remove __asm..    for (Token *tok = _tokens; tok; tok = tok->next())    {        if (Token::simpleMatch(tok->next(), "__asm {"))        {            while (tok->next())            {                bool last = Token::simpleMatch(tok->next(), "}");                // Unlink and delete tok->next()                tok->deleteNext();                // break if this was the last token to delete..                if (last)                    break;            }        }    }    // Remove "volatile"    while (Token::simpleMatch(_tokens, "volatile"))    {        Token *tok = _tokens;        _tokens = _tokens->next();        delete tok;    }    for (Token *tok = _tokens; tok; tok = tok->next())    {        while (Token::simpleMatch(tok->next(), "volatile"))        {            tok->deleteNext();        }    }    // Remove "mutable"    while (Token::simpleMatch(_tokens, "mutable"))    {        Token *tok = _tokens;        _tokens = _tokens->next();        delete tok;    }    for (Token *tok = _tokens; tok; tok = tok->next())    {        while (Token::simpleMatch(tok->next(), "mutable"))        {            tok->deleteNext();        }    }}//---------------------------------------------------------------------------void Tokenizer::setVarId(){    // Clear all variable ids
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -