📄 tokenize.cpp
字号:
/* * Cppcheck - A tool for static C/C++ code analysis * Copyright (C) 2007-2009 Daniel Marjamäki, Reijo Tomperi, Nicolas Le Cam, * Leandro Penz, Kimmo Varis, Vesa Pikki * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/ *///---------------------------------------------------------------------------#include "tokenize.h"#include "filelister.h"#include <locale>#include <fstream>#include <string>#include <cstring>#include <iostream>#include <sstream>#include <list>#include <algorithm>#include <cstdlib>#include <cctype>//---------------------------------------------------------------------------Tokenizer::Tokenizer(){ _tokens = 0; _tokensBack = 0;}Tokenizer::~Tokenizer(){ DeallocateTokens();}//---------------------------------------------------------------------------// Helper functions..//---------------------------------------------------------------------------const Token *Tokenizer::tokens() const{ return _tokens;}const std::vector<std::string> *Tokenizer::getFiles() const{ return &_files;}//---------------------------------------------------------------------------//---------------------------------------------------------------------------// addtoken// add a token. Used by 'Tokenizer'//---------------------------------------------------------------------------void Tokenizer::addtoken(const char str[], const unsigned int lineno, const unsigned int fileno){ if (str[0] == 0) return; // Replace hexadecimal value with decimal std::ostringstream str2; if (strncmp(str, "0x", 2) == 0) { str2 << std::strtoul(str + 2, NULL, 16); } else { str2 << str; } if (_tokensBack) { _tokensBack->insertToken(str2.str().c_str()); _tokensBack = _tokensBack->next(); } else { _tokens = new Token; _tokensBack = _tokens; _tokensBack->str(str2.str().c_str()); } _tokensBack->linenr(lineno); _tokensBack->fileIndex(fileno);}//---------------------------------------------------------------------------//---------------------------------------------------------------------------// SizeOfType - gives the size of a type//---------------------------------------------------------------------------int Tokenizer::SizeOfType(const char type[]) const{ if (!type) return 0; std::map<std::string, unsigned int>::const_iterator it = _typeSize.find(type); if (it == _typeSize.end()) return 0; return it->second;}//---------------------------------------------------------------------------//---------------------------------------------------------------------------// InsertTokens - Copy and insert tokens//---------------------------------------------------------------------------void Tokenizer::InsertTokens(Token *dest, Token *src, unsigned int n){ while (n > 0) { dest->insertToken(src->aaaa()); dest = dest->next(); dest->fileIndex(src->fileIndex()); dest->linenr(src->linenr()); dest->varId(src->varId()); src = src->next(); --n; }}//---------------------------------------------------------------------------//---------------------------------------------------------------------------// Tokenize - tokenizes a given file.//---------------------------------------------------------------------------void Tokenizer::tokenize(std::istream &code, const char FileName[]){ // The "_files" vector remembers what files have been tokenized.. _files.push_back(FileLister::simplifyPath(FileName)); // line number in parsed code unsigned int lineno = 1; // The current token being parsed std::string CurrentToken; // lineNumbers holds line numbers for files in fileIndexes // every time an include file is complitely parsed, last item in the vector // is removed and lineno is set to point to that value. std::vector<unsigned int> lineNumbers; // fileIndexes holds index for _files vector about currently parsed files // every time an include file is complitely parsed, last item in the vector // is removed and FileIndex is set to point to that value. std::vector<unsigned int> fileIndexes; // FileIndex. What file in the _files vector is read now? unsigned int FileIndex = 0; // Read one byte at a time from code and create tokens for (char ch = (char)code.get(); code.good(); ch = (char)code.get()) { // We are not handling UTF and stuff like that. Code is supposed to plain simple text. if (ch < 0) continue; if (ch == '\n') { // Add current token.. addtoken(CurrentToken.c_str(), lineno++, FileIndex); CurrentToken.clear(); continue; } // char/string.. if (ch == '\'' || ch == '\"') { // Add previous token addtoken(CurrentToken.c_str(), lineno, FileIndex); CurrentToken.clear(); // read char bool special = false; char c = ch; do { // Append token.. CurrentToken += c; if (c == '\n') ++lineno; // Special sequence '\.' if (special) special = false; else special = (c == '\\'); // Get next character c = (char)code.get(); } while (code.good() && (special || c != ch)); CurrentToken += ch; // Add token and start on next.. addtoken(CurrentToken.c_str(), lineno, FileIndex); CurrentToken.clear(); continue; } if (ch == '#' && CurrentToken.empty()) { // If previous token was "#" then append this to create a "##" token if (Token::simpleMatch(_tokensBack, "#")) { _tokensBack->str("##"); continue; } std::string line("#"); { char chPrev = '#'; bool skip = false; while (code.good()) { ch = (char)code.get(); if (chPrev != '\\' && ch == '\n') break; if (chPrev == '\\') line += chPrev; if (chPrev == '#' && ch == '#') { addtoken("##", lineno, FileIndex); skip = true; break; } if (ch != ' ') chPrev = ch; if (ch != '\\' && ch != '\n') { line += ch; } if (ch == '\n') ++lineno; } if (skip) continue; } if (strncmp(line.c_str(), "#file", 5) == 0 && line.find("\"") != std::string::npos) { // Extract the filename line.erase(0, line.find("\"") + 1); if (line.find("\"") != std::string::npos) line.erase(line.find("\"")); // Has this file been tokenized already? ++lineno; bool foundOurfile = false; fileIndexes.push_back(FileIndex); for (unsigned int i = 0; i < _files.size(); i++) { if (FileLister::SameFileName(_files[i].c_str(), line.c_str())) { // Use this index foundOurfile = true; FileIndex = i; } } if (!foundOurfile) { // The "_files" vector remembers what files have been tokenized.. _files.push_back(FileLister::simplifyPath(line.c_str())); FileIndex = static_cast<unsigned int>(_files.size() - 1); } lineNumbers.push_back(lineno); lineno = 1; continue; } else if (strncmp(line.c_str(), "#endfile", 8) == 0) { if (lineNumbers.empty() || fileIndexes.empty()) { std::cerr << "####### Preprocessor bug! #######\n"; std::exit(0); } lineno = lineNumbers.back(); lineNumbers.pop_back(); FileIndex = fileIndexes.back(); fileIndexes.pop_back(); continue; } else { addtoken(line.c_str(), lineno, FileIndex); } } if (strchr("#+-*/%&|^?!=<>[](){};:,.~", ch)) { if (ch == '.' && CurrentToken.length() > 0 && std::isdigit(CurrentToken[0])) { // Don't separate doubles "5.4" } else if (strchr("+-", ch) && CurrentToken.length() > 0 && std::isdigit(CurrentToken[0]) && CurrentToken[CurrentToken.length()-1] == 'e') { // Don't separate doubles "4.2e+10" } else { addtoken(CurrentToken.c_str(), lineno, FileIndex); CurrentToken.clear(); CurrentToken += ch; // Add "++", "--" or ">>" token if ((ch == '+' || ch == '-' || ch == '>') && (code.peek() == ch)) CurrentToken += (char)code.get(); addtoken(CurrentToken.c_str(), lineno, FileIndex); CurrentToken.clear(); continue; } } if (std::isspace(ch) || std::iscntrl(ch)) { addtoken(CurrentToken.c_str(), lineno, FileIndex); CurrentToken.clear(); continue; } CurrentToken += ch; } addtoken(CurrentToken.c_str(), lineno, FileIndex); // Combine tokens.. for (Token *tok = _tokens; tok && tok->next(); tok = tok->next()) { static const char* combineWithNext[][3] = { { "<", "<", "<<" }, { "&", "&", "&&" }, { "|", "|", "||" }, { "+", "=", "+=" }, { "-", "=", "-=" }, { "*", "=", "*=" }, { "/", "=", "/=" }, { "&", "=", "&=" }, { "|", "=", "|=" }, { "=", "=", "==" }, { "!", "=", "!=" }, { "<", "=", "<=" }, { ">", "=", ">=" }, { ":", ":", "::" }, { "-", ">", "." }, // Replace "->" with "." { "private", ":", "private:" }, { "protected", ":", "protected:" }, { "public", ":", "public:" } }; for (unsigned ui = 0; ui < sizeof(combineWithNext) / sizeof(combineWithNext[0]); ui++) { if (tok->str() == combineWithNext[ui][0] && tok->next()->str() == combineWithNext[ui][1]) { tok->str(combineWithNext[ui][2]); tok->deleteNext(); } } } // typedef.. for (Token *tok = _tokens; tok;) { if (Token::Match(tok, "typedef %type% %type% ;")) { const char *type1 = tok->strAt(1); const char *type2 = tok->strAt(2); tok = const_cast<Token*>(tok->tokAt(4)); for (Token *tok2 = tok; tok2; tok2 = tok2->next()) { if (tok2->str() == type2) tok2->str(type1); } continue; } else if (Token::Match(tok, "typedef %type% %type% %type% ;")) { const char *type1 = tok->strAt(1); const char *type2 = tok->strAt(2); const char *type3 = tok->strAt(3); tok = const_cast<Token*>(tok->tokAt(5)); for (Token *tok2 = tok; tok2; tok2 = tok2->next()) { if (tok2->str() == type3) { tok2->str(type1); tok2->insertToken(type2); tok2 = tok2->next(); } } continue; } tok = tok->next(); } // Remove __asm.. for (Token *tok = _tokens; tok; tok = tok->next()) { if (Token::simpleMatch(tok->next(), "__asm {")) { while (tok->next()) { bool last = Token::simpleMatch(tok->next(), "}"); // Unlink and delete tok->next() tok->deleteNext(); // break if this was the last token to delete.. if (last) break; } } } // Remove "volatile" while (Token::simpleMatch(_tokens, "volatile")) { Token *tok = _tokens; _tokens = _tokens->next(); delete tok; } for (Token *tok = _tokens; tok; tok = tok->next()) { while (Token::simpleMatch(tok->next(), "volatile")) { tok->deleteNext(); } } // Remove "mutable" while (Token::simpleMatch(_tokens, "mutable")) { Token *tok = _tokens; _tokens = _tokens->next(); delete tok; } for (Token *tok = _tokens; tok; tok = tok->next()) { while (Token::simpleMatch(tok->next(), "mutable")) { tok->deleteNext(); } }}//---------------------------------------------------------------------------void Tokenizer::setVarId(){ // Clear all variable ids
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -