Cppcheck
tokenize.h
Go to the documentation of this file.
00001 /*
00002  * Cppcheck - A tool for static C/C++ code analysis
00003  * Copyright (C) 2007-2013 Daniel Marjamäki and Cppcheck team.
00004  *
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  *
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  */
00018 
00019 
00020 //---------------------------------------------------------------------------
00021 #ifndef tokenizeH
00022 #define tokenizeH
00023 //---------------------------------------------------------------------------
00024 
00025 #include "errorlogger.h"
00026 #include "tokenlist.h"
00027 #include "config.h"
00028 
00029 #include <string>
00030 #include <map>
00031 #include <list>
00032 
00033 class Settings;
00034 class SymbolDatabase;
00035 class TimerResults;
00036 
00037 /// @addtogroup Core
00038 /// @{
00039 
00040 /** @brief The main purpose is to tokenize the source code. It also has functions that simplify the token list */
00041 class CPPCHECKLIB Tokenizer {
00042 public:
00043     Tokenizer();
00044     Tokenizer(const Settings * settings, ErrorLogger *errorLogger);
00045     ~Tokenizer();
00046 
00047     void setTimerResults(TimerResults *tr) {
00048         m_timerResults = tr;
00049     }
00050 
00051     /** Returns the source file path. e.g. "file.cpp" */
00052     const std::string& getSourceFilePath() const;
00053 
00054     /** Is the code C. Used for bailouts */
00055     bool isC() const;
00056 
00057     /** Is the code CPP. Used for bailouts */
00058     bool isCPP() const;
00059 
00060     /**
00061      * Check if inner scope ends with a call to a noreturn function
00062      * \param endScopeToken The '}' token
00063      * \param unknown set to true if it's unknown if the scope is noreturn
00064      * \return true if scope ends with a function call that might be 'noreturn'
00065      */
00066     static bool IsScopeNoReturn(const Token *endScopeToken, bool *unknown = 0);
00067 
00068     /**
00069      * Tokenize code
00070      * @param code input stream for code, e.g.
00071      * \code
00072      * #file "p.h"
00073      * class Foo
00074      * {
00075      * private:
00076      * void Bar();
00077      * };
00078      *
00079      * #endfile
00080      * void Foo::Bar()
00081      * {
00082      * }
00083      * \endcode
00084      *
00085      * @param FileName The filename
00086      * @param configuration E.g. "A" for code where "#ifdef A" is true
00087      * @return false if source code contains syntax errors
00088      */
00089     bool tokenize(std::istream &code,
00090                   const char FileName[],
00091                   const std::string &configuration = "");
00092 
00093     /**
00094      * tokenize condition and run simple simplifications on it
00095      * @param code code
00096      * @return true if success.
00097      */
00098     bool tokenizeCondition(const std::string &code);
00099 
00100     /** Set variable id */
00101     void setVarId();
00102 
00103     /**
00104      * Simplify tokenlist
00105      *
00106      * @return false if there is an error that requires aborting
00107      * the checking of this file.
00108      */
00109     bool simplifyTokenList();
00110 
00111     /**
00112      * Deletes dead code between 'begin' and 'end'.
00113      * In general not everything can be erased, such as:
00114      * - code after labels;
00115      * - code outside the scope where the function is called;
00116      * - code after a change of scope caused by 'switch(...);'
00117      *   instructions, like 'case %any%;' or 'default;'
00118      * Also, if the dead code contains a 'switch' block
00119      * and inside it there's a label, the function removes all
00120      * the 'switch(..)' tokens and every occurrence of 'case %any%; | default;'
00121      * expression, such as the 'switch' block is reduced to a simple block.
00122      *
00123      * @param begin Tokens after this have a possibility to be erased.
00124      * @param end Tokens before this have a possibility to be erased.
00125      */
00126     static void eraseDeadCode(Token *begin, const Token *end);
00127 
00128     /**
00129      * Simplify '* & ( %var% ) =' or any combination of '* &' and '()'
00130      * parentheses around '%var%' to '%var% ='
00131      */
00132     void simplifyMulAndParens();
00133 
00134     /**
00135      * Calculates sizeof value for given type.
00136      * @param type Token which will contain e.g. "int", "*", or string.
00137      * @return sizeof for given type, or 0 if it can't be calculated.
00138      */
00139     unsigned int sizeOfType(const Token *type) const;
00140 
00141     /**
00142      * Try to determine if function parameter is passed by value by looking
00143      * at the function declaration.
00144      * @param fpar token for function parameter in the function call
00145      * @return true if the parameter is passed by value. if unsure, false is returned
00146      */
00147     bool isFunctionParameterPassedByValue(const Token *fpar) const;
00148 
00149     /**
00150      * get error messages that the tokenizer generate
00151      */
00152     static void getErrorMessages(ErrorLogger *errorLogger, const Settings *settings);
00153 
00154     /** Simplify assignment in function call "f(x=g());" => "x=g();f(x);"
00155      */
00156     void simplifyAssignmentInFunctionCall();
00157 
00158     /** Simplify assignment where rhs is a block : "x=({123;});" => "{x=123;}" */
00159     void simplifyAssignmentBlock();
00160 
00161     /**
00162      * Simplify constant calculations such as "1+2" => "3"
00163      * @return true if modifications to token-list are done.
00164      *         false if no modifications are done.
00165      */
00166     bool simplifyCalculations();
00167 
00168     /** Insert array size where it isn't given */
00169     void arraySize();
00170 
00171     /** Simplify labels and 'case|default' syntaxes.
00172       * @return true if found nothing or the syntax is correct.
00173       *         false if syntax is found to be wrong.
00174       */
00175     bool simplifyLabelsCaseDefault();
00176 
00177     /** Remove macros in global scope */
00178     void removeMacrosInGlobalScope();
00179 
00180     /** Remove unknown macro in variable declarations: PROGMEM char x; */
00181     void removeMacroInVarDecl();
00182 
00183     /** Remove redundant assignment */
00184     void removeRedundantAssignment();
00185 
00186     /** Simplifies some realloc usage like
00187       * 'x = realloc (0, n);' => 'x = malloc(n);'
00188       * 'x = realloc (y, 0);' => 'x = 0; free(y);'
00189       */
00190     void simplifyRealloc();
00191 
00192     /**
00193      * Replace sizeof() to appropriate size.
00194      * @return true if modifications to token-list are done.
00195      *         false if no modifications are done.
00196      */
00197     bool simplifySizeof();
00198 
00199     /**
00200      * Simplify variable declarations (split up)
00201      * \param only_k_r_fpar Only simplify K&R function parameters
00202      */
00203     void simplifyVarDecl(bool only_k_r_fpar);
00204 
00205     /**
00206      * Simplify variable initialization
00207      * '; int *p(0);' => '; int *p = 0;'
00208      */
00209     void simplifyInitVar();
00210     Token * initVar(Token * tok);
00211 
00212     /**
00213      * Convert platform dependent types to standard types.
00214      * 32 bits: size_t -> unsigned long
00215      * 64 bits: size_t -> unsigned long long
00216      */
00217     void simplifyPlatformTypes();
00218 
00219     /**
00220      * Collapse compound standard types into a single token.
00221      * unsigned long long int => long _isUnsigned=true,_isLong=true
00222      */
00223     void simplifyStdType();
00224 
00225     /**
00226      * Simplify easy constant '?:' operation
00227      * Example: 0 ? (2/0) : 0 => 0
00228      * @return true if something is modified
00229      *         false if nothing is done.
00230      */
00231     bool simplifyConstTernaryOp();
00232 
00233     /**
00234      * Simplify compound assignments
00235      * Example: ";a+=b;" => ";a=a+b;"
00236      */
00237     void simplifyCompoundAssignment();
00238 
00239     /**
00240      * simplify if-assignments
00241      * Example: "if(a=b);" => "a=b;if(a);"
00242      */
00243     void simplifyIfAssign();
00244 
00245     /**
00246      * Simplify multiple assignments.
00247      * Example: "a = b = c = 0;" => "a = 0; b = 0; c = 0;"
00248      */
00249     void simplifyVariableMultipleAssign();
00250 
00251     /**
00252      * simplify if-not
00253      * Example: "if(0==x);" => "if(!x);"
00254      */
00255     void simplifyIfNot();
00256 
00257     /**
00258      * simplify if-not NULL
00259      * Example: "if(0!=x);" => "if(x);"
00260      * Special case: 'x = (0 != x);' is removed.
00261      */
00262     void simplifyIfNotNull();
00263 
00264     /** @brief simplify if (a) { if (a) */
00265     void simplifyIfSameInnerCondition();
00266 
00267     /**
00268      * Simplify the "not" and "and" keywords to "!" and "&&"
00269      * accordingly.
00270      * Examples:
00271      * - "if (not p)" => "if (!p)"
00272      * - "if (p and q)" => "if (p && q)"
00273      */
00274     bool simplifyLogicalOperators();
00275 
00276     /**
00277      * Simplify comma into a semicolon when possible:
00278      * - "delete a, delete b" => "delete a; delete b;"
00279      * - "a = 0, b = 0;" => "a = 0; b = 0;"
00280      * - "return a(), b;" => "a(); return b;"
00281      */
00282     void simplifyComma();
00283 
00284     /** Add braces to an if-block, for-block, etc.
00285      * @return true if no syntax errors
00286      */
00287     bool simplifyAddBraces();
00288 
00289     /** Add braces to an if-block, for-block, etc.
00290      * for command starting at token including else-block
00291      * @return last token of command
00292      *         or input token in case of an error where no braces are added
00293      *         or NULL when syntaxError is called
00294      */
00295     Token * simplifyAddBracesToCommand(Token * tok);
00296 
00297     /** Add pair of braces to an single if-block, else-block, for-block, etc.
00298      * for command starting at token
00299      * @return last token of command
00300      *         or input token in case of an error where no braces are added
00301      *         or NULL when syntaxError is called
00302      */
00303     Token * simplifyAddBracesPair(Token *tok, bool commandWithCondition);
00304 
00305     /**
00306      * typedef A mytype;
00307      * mytype c;
00308      *
00309      * Becomes:
00310      * typedef A mytype;
00311      * A c;
00312      */
00313     void simplifyTypedef();
00314 
00315     /**
00316      * Simplify casts
00317      */
00318     void simplifyCasts();
00319 
00320     /**
00321      * Change (multiple) arrays to (multiple) pointers.
00322      */
00323     void simplifyUndefinedSizeArray();
00324 
00325     /**
00326      * A simplify function that replaces a variable with its value in cases
00327      * when the value is known. e.g. "x=10; if(x)" => "x=10;if(10)"
00328      *
00329      * @return true if modifications to token-list are done.
00330      *         false if no modifications are done.
00331      */
00332     bool simplifyKnownVariables();
00333 
00334     /**
00335      * Utility function for simplifyKnownVariables. Get data about an
00336      * assigned variable.
00337      */
00338     static bool simplifyKnownVariablesGetData(unsigned int varid, Token **_tok2, Token **_tok3, std::string &value, unsigned int &valueVarId, bool &valueIsPointer, bool floatvar);
00339 
00340     /**
00341      * utility function for simplifyKnownVariables. Perform simplification
00342      * of a given variable
00343      */
00344     bool simplifyKnownVariablesSimplify(Token **tok2, Token *tok3, unsigned int varid, const std::string &structname, std::string &value, unsigned int valueVarId, bool valueIsPointer, const Token * const valueToken, int indentlevel) const;
00345 
00346     /** Replace a "goto" with the statements */
00347     void simplifyGoto();
00348 
00349     /** Simplify useless C++ empty namespaces, like: 'namespace %var% { }'*/
00350     void simplifyEmptyNamespaces();
00351 
00352     /** Simplify redundant code placed after control flow statements :
00353      * 'return', 'throw', 'goto', 'break' and 'continue'
00354      */
00355     void simplifyFlowControl();
00356 
00357     /** Expand nested strcat() calls. */
00358     void simplifyNestedStrcat();
00359 
00360     /** Simplify "if else" */
00361     void elseif();
00362 
00363     /**
00364      * Simplify the operator "?:"
00365      */
00366     void simplifyConditionOperator();
00367 
00368     /** Simplify conditions
00369      * @return true if something is modified
00370      *         false if nothing is done.
00371      */
00372     bool simplifyConditions();
00373 
00374     /** Remove redundant code, e.g. if( false ) { int a; } should be
00375      * removed, because it is never executed.
00376      * @return true if something is modified
00377      *         false if nothing is done.
00378      */
00379     bool removeRedundantConditions();
00380 
00381     /**
00382      * Remove redundant for:
00383      * "for (x=0;x<1;x++) { }" => "{ x = 1; }"
00384      */
00385     void removeRedundantFor();
00386 
00387 
00388     /**
00389      * Reduces "; ;" to ";", except in "( ; ; )"
00390      */
00391     void removeRedundantSemicolons();
00392 
00393     /** Simplify function calls - constant return value
00394      * @return true if something is modified
00395      *         false if nothing is done.
00396      */
00397     bool simplifyFunctionReturn();
00398 
00399     /** Struct initialization */
00400     void simplifyStructInit();
00401 
00402     /** Struct simplification
00403      * "struct S { } s;" => "struct S { }; S s;"
00404      */
00405 
00406     void simplifyStructDecl();
00407 
00408     /**
00409      * Remove redundant parentheses:
00410      * - "((x))" => "(x)"
00411      * - "(function())" => "function()"
00412      * - "(delete x)" => "delete x"
00413      * - "(delete [] x)" => "delete [] x"
00414      * @return true if modifications to token-list are done.
00415      *         false if no modifications are done.
00416      */
00417     bool simplifyRedundantParentheses();
00418 
00419     void simplifyCharAt();
00420 
00421     /** Simplify references */
00422     void simplifyReference();
00423 
00424     /**
00425      * Simplify functions like "void f(x) int x; {"
00426      * into "void f(int x) {"
00427      * @return false only if there's a syntax error
00428      */
00429     bool simplifyFunctionParameters();
00430 
00431     /**
00432      * Simplify templates
00433      */
00434     void simplifyTemplates();
00435 
00436     void simplifyDoublePlusAndDoubleMinus();
00437 
00438     void simplifyRedundantConsecutiveBraces();
00439 
00440     void simplifyArrayAccessSyntax();
00441 
00442     void simplifyParameterVoid();
00443 
00444     void fillTypeSizes();
00445 
00446     void combineOperators();
00447 
00448     void combineStrings();
00449 
00450     void concatenateDoubleSharp();
00451 
00452     void simplifyFileAndLineMacro();
00453 
00454     void simplifyNull();
00455 
00456     void concatenateNegativeNumberAndAnyPositive();
00457 
00458     void simplifyExternC();
00459 
00460     void simplifyRoundCurlyParentheses();
00461 
00462     void simplifyDebugNew();
00463 
00464     void simplifySQL();
00465 
00466     bool hasEnumsWithTypedef();
00467 
00468     void simplifyDefaultAndDeleteInsideClass();
00469 
00470     bool hasComplicatedSyntaxErrorsInTemplates();
00471 
00472     /**
00473      * Simplify e.g. 'atol("0")' into '0'
00474      */
00475     void simplifyMathFunctions();
00476 
00477     /**
00478      * Simplify e.g. 'sin(0)' into '0'
00479      */
00480     void simplifyMathExpressions();
00481 
00482     /**
00483      * Modify strings in the token list by replacing hex and oct
00484      * values. E.g. "\x61" -> "a" and "\000" -> "\0"
00485      * @param source The string to be modified, e.g. "\x61"
00486      * @return Modified string, e.g. "a"
00487      */
00488     static std::string simplifyString(const std::string &source);
00489 
00490     /**
00491      * Use "<" comparison instead of ">"
00492      * Use "<=" comparison instead of ">="
00493      */
00494     void simplifyComparisonOrder();
00495 
00496     /**
00497      * Change "int const x;" into "const int x;"
00498      */
00499     void simplifyConst();
00500 
00501     /**
00502      * simplify "while (0)"
00503      */
00504     void simplifyWhile0();
00505 
00506     /**
00507      * Simplify while(func() && errno==EINTR)
00508      */
00509     void simplifyErrNoInWhile();
00510 
00511     /**
00512      * Simplify while(func(f))
00513      */
00514     void simplifyFuncInWhile();
00515 
00516     /**
00517      * Replace enum with constant value
00518      */
00519     void simplifyEnum();
00520 
00521     /**
00522      * Remove "std::" before some function names
00523      */
00524     void simplifyStd();
00525 
00526     /** Simplify pointer to standard type (C only) */
00527     void simplifyPointerToStandardType();
00528 
00529     /** Simplify function pointers */
00530     void simplifyFunctionPointers();
00531 
00532     /**
00533      * Remove exception specifications.
00534      */
00535     void removeExceptionSpecifications();
00536 
00537 
00538     /**
00539      * Send error message to error logger about internal bug.
00540      * @param tok the token that this bug concerns.
00541      */
00542     void cppcheckError(const Token *tok) const;
00543 
00544     /**
00545      * Setup links for tokens so that one can call Token::link().
00546      *
00547      * @return false if there was a mismatch with tokens, this
00548      * should mean that source code was not valid.
00549      */
00550     bool createLinks();
00551 
00552     /**
00553      * Setup links between < and >.
00554      */
00555     void createLinks2();
00556 
00557     /** Syntax error */
00558     void syntaxError(const Token *tok) const;
00559 
00560     /** Syntax error. Example: invalid number of ')' */
00561     void syntaxError(const Token *tok, char c) const;
00562 
00563     /** Report that there is an unhandled "class x y {" code */
00564     void unhandled_macro_class_x_y(const Token *tok);
00565 
00566     /**
00567      * assert that tokens are ok - used during debugging for example
00568      * to catch problems in simplifyTokenList.
00569      * @return always true.
00570      */
00571     bool validate() const;
00572 
00573     /**
00574      * Remove __declspec()
00575      */
00576     void simplifyDeclspec();
00577 
00578     /**
00579      * Remove calling convention
00580      */
00581     void simplifyCallingConvention();
00582 
00583     /**
00584      * Remove __attribute__ ((?))
00585      */
00586     void simplifyAttribute();
00587 
00588     /**
00589      * Remove keywords "volatile", "inline", "register", and "restrict"
00590      */
00591     void simplifyKeyword();
00592 
00593     /**
00594      * Remove __asm
00595      */
00596     void simplifyAsm();
00597 
00598     /**
00599      * Simplify bitfields - the field width is removed as we don't use it.
00600      */
00601     void simplifyBitfields();
00602 
00603     /**
00604      * Remove __builtin_expect(...), likely(...), and unlikely(...)
00605      */
00606     void simplifyBuiltinExpect();
00607 
00608     /**
00609      * Remove unnecessary member qualification
00610      */
00611     void removeUnnecessaryQualification();
00612 
00613     /**
00614      * unnecessary member qualification error
00615      */
00616     void unnecessaryQualificationError(const Token *tok, const std::string &qualification) const;
00617 
00618     /**
00619      * Add std:: in front of std classes, when using namespace std; was given
00620      */
00621     void simplifyNamespaceStd();
00622 
00623     /**
00624      * Remove Microsoft MFC 'DECLARE_MESSAGE_MAP()'
00625      */
00626     void simplifyMicrosoftMFC();
00627 
00628     /**
00629     * Convert Microsoft memory functions
00630     * CopyMemory(dst, src, len) -> memcpy(dst, src, len)
00631     * FillMemory(dst, len, val) -> memset(dst, val, len)
00632     * MoveMemory(dst, src, len) -> memmove(dst, src, len)
00633     * ZeroMemory(dst, len) -> memset(dst, 0, len)
00634     */
00635     void simplifyMicrosoftMemoryFunctions();
00636 
00637     /**
00638     * Convert Microsoft string functions
00639     * _tcscpy -> strcpy
00640     */
00641     void simplifyMicrosoftStringFunctions();
00642 
00643     /**
00644       * Remove Borland code
00645       */
00646     void simplifyBorland();
00647 
00648     /**
00649      * Remove Qt signals and slots
00650      */
00651     void simplifyQtSignalsSlots();
00652 
00653     /**
00654      * Collapse operator name tokens into single token
00655      * operator = => operator=
00656      */
00657     void simplifyOperatorName();
00658 
00659     /**
00660      * check for duplicate enum definition
00661      */
00662     bool duplicateDefinition(Token **tokPtr, const Token *name) const;
00663 
00664     /**
00665      * report error message
00666      */
00667     void reportError(const Token* tok, const Severity::SeverityType severity, const std::string& id, const std::string& msg, bool inconclusive = false) const;
00668     void reportError(const std::list<const Token*>& callstack, Severity::SeverityType severity, const std::string& id, const std::string& msg, bool inconclusive = false) const;
00669 
00670     /**
00671      * duplicate enum definition error
00672      */
00673     void duplicateEnumError(const Token *tok1, const Token *tok2, const std::string & type) const;
00674 
00675     bool duplicateTypedef(Token **tokPtr, const Token *name, const Token *typeDef, bool undefinedStruct) const;
00676     void duplicateTypedefError(const Token *tok1, const Token *tok2, const std::string & type) const;
00677 
00678     /**
00679      * Report error - duplicate declarations
00680      */
00681     void duplicateDeclarationError(const Token *tok1, const Token *tok2, const std::string &type) const;
00682 
00683     void unsupportedTypedef(const Token *tok) const;
00684 
00685     /** Was there templates in the code? */
00686     bool codeWithTemplates() const {
00687         return _codeWithTemplates;
00688     }
00689 
00690     void setSettings(const Settings *settings) {
00691         _settings = settings;
00692         list.setSettings(settings);
00693     }
00694 
00695     const SymbolDatabase *getSymbolDatabase() const {
00696         return _symbolDatabase;
00697     }
00698     void createSymbolDatabase();
00699     void deleteSymbolDatabase();
00700 
00701     Token *deleteInvalidTypedef(Token *typeDef);
00702 
00703     /**
00704      * Get variable count.
00705      * @return number of variables
00706      */
00707     unsigned int varIdCount() const {
00708         return _varId;
00709     }
00710 
00711     /**
00712      * Simplify e.g. 'return(strncat(temp,"a",1));' into
00713      * strncat(temp,"a",1); return temp;
00714      */
00715     void simplifyReturnStrncat();
00716 
00717     /**
00718      * Output list of unknown types.
00719      */
00720     void printUnknownTypes();
00721 
00722 
00723     /**
00724      * Token list: stores all tokens.
00725      */
00726     TokenList list;
00727     // Implement tokens() as a wrapper for convinience when using the TokenList
00728     const Token* tokens() const {
00729         return list.front();
00730     }
00731 
00732     /**
00733      * Copy tokens.
00734      * @param dest destination token where copied tokens will be inserted after
00735      * @param first first token to copy
00736      * @param last last token to copy
00737      * @param one_line true=>copy all tokens to the same line as dest. false=>copy all tokens to dest while keeping the 'line breaks'
00738      * @return new location of last token copied
00739      */
00740     static Token *copyTokens(Token *dest, const Token *first, const Token *last, bool one_line = true);
00741 
00742 private:
00743     /** Disable copy constructor, no implementation */
00744     Tokenizer(const Tokenizer &);
00745 
00746     /** Disable assignment operator, no implementation */
00747     Tokenizer &operator=(const Tokenizer &);
00748 
00749     /** settings */
00750     const Settings * _settings;
00751 
00752     /** errorlogger */
00753     ErrorLogger * const _errorLogger;
00754 
00755     /** Symbol database that all checks etc can use */
00756     SymbolDatabase *_symbolDatabase;
00757 
00758     /** E.g. "A" for code where "#ifdef A" is true. This is used to
00759         print additional information in error situations. */
00760     std::string _configuration;
00761 
00762     /** sizeof information for known types */
00763     std::map<std::string, unsigned int> _typeSize;
00764 
00765     /** variable count */
00766     unsigned int _varId;
00767 
00768     /**
00769      * was there any templates? templates that are "unused" are
00770      * removed from the token list
00771      */
00772     bool _codeWithTemplates;
00773 
00774     /**
00775      * TimerResults
00776      */
00777     TimerResults *m_timerResults;
00778 };
00779 
00780 /// @}
00781 
00782 //---------------------------------------------------------------------------
00783 #endif