|
Cppcheck
|
00001 /* 00002 * Cppcheck - A tool for static C/C++ code analysis 00003 * Copyright (C) 2007-2013 Daniel Marjamäki and Cppcheck team. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 */ 00018 00019 00020 //--------------------------------------------------------------------------- 00021 #ifndef tokenizeH 00022 #define tokenizeH 00023 //--------------------------------------------------------------------------- 00024 00025 #include "errorlogger.h" 00026 #include "tokenlist.h" 00027 #include "config.h" 00028 00029 #include <string> 00030 #include <map> 00031 #include <list> 00032 00033 class Settings; 00034 class SymbolDatabase; 00035 class TimerResults; 00036 00037 /// @addtogroup Core 00038 /// @{ 00039 00040 /** @brief The main purpose is to tokenize the source code. It also has functions that simplify the token list */ 00041 class CPPCHECKLIB Tokenizer { 00042 public: 00043 Tokenizer(); 00044 Tokenizer(const Settings * settings, ErrorLogger *errorLogger); 00045 ~Tokenizer(); 00046 00047 void setTimerResults(TimerResults *tr) { 00048 m_timerResults = tr; 00049 } 00050 00051 /** Returns the source file path. e.g. "file.cpp" */ 00052 const std::string& getSourceFilePath() const; 00053 00054 /** Is the code C. Used for bailouts */ 00055 bool isC() const; 00056 00057 /** Is the code CPP. Used for bailouts */ 00058 bool isCPP() const; 00059 00060 /** 00061 * Check if inner scope ends with a call to a noreturn function 00062 * \param endScopeToken The '}' token 00063 * \param unknown set to true if it's unknown if the scope is noreturn 00064 * \return true if scope ends with a function call that might be 'noreturn' 00065 */ 00066 static bool IsScopeNoReturn(const Token *endScopeToken, bool *unknown = 0); 00067 00068 /** 00069 * Tokenize code 00070 * @param code input stream for code, e.g. 00071 * \code 00072 * #file "p.h" 00073 * class Foo 00074 * { 00075 * private: 00076 * void Bar(); 00077 * }; 00078 * 00079 * #endfile 00080 * void Foo::Bar() 00081 * { 00082 * } 00083 * \endcode 00084 * 00085 * @param FileName The filename 00086 * @param configuration E.g. "A" for code where "#ifdef A" is true 00087 * @return false if source code contains syntax errors 00088 */ 00089 bool tokenize(std::istream &code, 00090 const char FileName[], 00091 const std::string &configuration = ""); 00092 00093 /** 00094 * tokenize condition and run simple simplifications on it 00095 * @param code code 00096 * @return true if success. 00097 */ 00098 bool tokenizeCondition(const std::string &code); 00099 00100 /** Set variable id */ 00101 void setVarId(); 00102 00103 /** 00104 * Simplify tokenlist 00105 * 00106 * @return false if there is an error that requires aborting 00107 * the checking of this file. 00108 */ 00109 bool simplifyTokenList(); 00110 00111 /** 00112 * Deletes dead code between 'begin' and 'end'. 00113 * In general not everything can be erased, such as: 00114 * - code after labels; 00115 * - code outside the scope where the function is called; 00116 * - code after a change of scope caused by 'switch(...);' 00117 * instructions, like 'case %any%;' or 'default;' 00118 * Also, if the dead code contains a 'switch' block 00119 * and inside it there's a label, the function removes all 00120 * the 'switch(..)' tokens and every occurrence of 'case %any%; | default;' 00121 * expression, such as the 'switch' block is reduced to a simple block. 00122 * 00123 * @param begin Tokens after this have a possibility to be erased. 00124 * @param end Tokens before this have a possibility to be erased. 00125 */ 00126 static void eraseDeadCode(Token *begin, const Token *end); 00127 00128 /** 00129 * Simplify '* & ( %var% ) =' or any combination of '* &' and '()' 00130 * parentheses around '%var%' to '%var% =' 00131 */ 00132 void simplifyMulAndParens(); 00133 00134 /** 00135 * Calculates sizeof value for given type. 00136 * @param type Token which will contain e.g. "int", "*", or string. 00137 * @return sizeof for given type, or 0 if it can't be calculated. 00138 */ 00139 unsigned int sizeOfType(const Token *type) const; 00140 00141 /** 00142 * Try to determine if function parameter is passed by value by looking 00143 * at the function declaration. 00144 * @param fpar token for function parameter in the function call 00145 * @return true if the parameter is passed by value. if unsure, false is returned 00146 */ 00147 bool isFunctionParameterPassedByValue(const Token *fpar) const; 00148 00149 /** 00150 * get error messages that the tokenizer generate 00151 */ 00152 static void getErrorMessages(ErrorLogger *errorLogger, const Settings *settings); 00153 00154 /** Simplify assignment in function call "f(x=g());" => "x=g();f(x);" 00155 */ 00156 void simplifyAssignmentInFunctionCall(); 00157 00158 /** Simplify assignment where rhs is a block : "x=({123;});" => "{x=123;}" */ 00159 void simplifyAssignmentBlock(); 00160 00161 /** 00162 * Simplify constant calculations such as "1+2" => "3" 00163 * @return true if modifications to token-list are done. 00164 * false if no modifications are done. 00165 */ 00166 bool simplifyCalculations(); 00167 00168 /** Insert array size where it isn't given */ 00169 void arraySize(); 00170 00171 /** Simplify labels and 'case|default' syntaxes. 00172 * @return true if found nothing or the syntax is correct. 00173 * false if syntax is found to be wrong. 00174 */ 00175 bool simplifyLabelsCaseDefault(); 00176 00177 /** Remove macros in global scope */ 00178 void removeMacrosInGlobalScope(); 00179 00180 /** Remove unknown macro in variable declarations: PROGMEM char x; */ 00181 void removeMacroInVarDecl(); 00182 00183 /** Remove redundant assignment */ 00184 void removeRedundantAssignment(); 00185 00186 /** Simplifies some realloc usage like 00187 * 'x = realloc (0, n);' => 'x = malloc(n);' 00188 * 'x = realloc (y, 0);' => 'x = 0; free(y);' 00189 */ 00190 void simplifyRealloc(); 00191 00192 /** 00193 * Replace sizeof() to appropriate size. 00194 * @return true if modifications to token-list are done. 00195 * false if no modifications are done. 00196 */ 00197 bool simplifySizeof(); 00198 00199 /** 00200 * Simplify variable declarations (split up) 00201 * \param only_k_r_fpar Only simplify K&R function parameters 00202 */ 00203 void simplifyVarDecl(bool only_k_r_fpar); 00204 00205 /** 00206 * Simplify variable initialization 00207 * '; int *p(0);' => '; int *p = 0;' 00208 */ 00209 void simplifyInitVar(); 00210 Token * initVar(Token * tok); 00211 00212 /** 00213 * Convert platform dependent types to standard types. 00214 * 32 bits: size_t -> unsigned long 00215 * 64 bits: size_t -> unsigned long long 00216 */ 00217 void simplifyPlatformTypes(); 00218 00219 /** 00220 * Collapse compound standard types into a single token. 00221 * unsigned long long int => long _isUnsigned=true,_isLong=true 00222 */ 00223 void simplifyStdType(); 00224 00225 /** 00226 * Simplify easy constant '?:' operation 00227 * Example: 0 ? (2/0) : 0 => 0 00228 * @return true if something is modified 00229 * false if nothing is done. 00230 */ 00231 bool simplifyConstTernaryOp(); 00232 00233 /** 00234 * Simplify compound assignments 00235 * Example: ";a+=b;" => ";a=a+b;" 00236 */ 00237 void simplifyCompoundAssignment(); 00238 00239 /** 00240 * simplify if-assignments 00241 * Example: "if(a=b);" => "a=b;if(a);" 00242 */ 00243 void simplifyIfAssign(); 00244 00245 /** 00246 * Simplify multiple assignments. 00247 * Example: "a = b = c = 0;" => "a = 0; b = 0; c = 0;" 00248 */ 00249 void simplifyVariableMultipleAssign(); 00250 00251 /** 00252 * simplify if-not 00253 * Example: "if(0==x);" => "if(!x);" 00254 */ 00255 void simplifyIfNot(); 00256 00257 /** 00258 * simplify if-not NULL 00259 * Example: "if(0!=x);" => "if(x);" 00260 * Special case: 'x = (0 != x);' is removed. 00261 */ 00262 void simplifyIfNotNull(); 00263 00264 /** @brief simplify if (a) { if (a) */ 00265 void simplifyIfSameInnerCondition(); 00266 00267 /** 00268 * Simplify the "not" and "and" keywords to "!" and "&&" 00269 * accordingly. 00270 * Examples: 00271 * - "if (not p)" => "if (!p)" 00272 * - "if (p and q)" => "if (p && q)" 00273 */ 00274 bool simplifyLogicalOperators(); 00275 00276 /** 00277 * Simplify comma into a semicolon when possible: 00278 * - "delete a, delete b" => "delete a; delete b;" 00279 * - "a = 0, b = 0;" => "a = 0; b = 0;" 00280 * - "return a(), b;" => "a(); return b;" 00281 */ 00282 void simplifyComma(); 00283 00284 /** Add braces to an if-block, for-block, etc. 00285 * @return true if no syntax errors 00286 */ 00287 bool simplifyAddBraces(); 00288 00289 /** Add braces to an if-block, for-block, etc. 00290 * for command starting at token including else-block 00291 * @return last token of command 00292 * or input token in case of an error where no braces are added 00293 * or NULL when syntaxError is called 00294 */ 00295 Token * simplifyAddBracesToCommand(Token * tok); 00296 00297 /** Add pair of braces to an single if-block, else-block, for-block, etc. 00298 * for command starting at token 00299 * @return last token of command 00300 * or input token in case of an error where no braces are added 00301 * or NULL when syntaxError is called 00302 */ 00303 Token * simplifyAddBracesPair(Token *tok, bool commandWithCondition); 00304 00305 /** 00306 * typedef A mytype; 00307 * mytype c; 00308 * 00309 * Becomes: 00310 * typedef A mytype; 00311 * A c; 00312 */ 00313 void simplifyTypedef(); 00314 00315 /** 00316 * Simplify casts 00317 */ 00318 void simplifyCasts(); 00319 00320 /** 00321 * Change (multiple) arrays to (multiple) pointers. 00322 */ 00323 void simplifyUndefinedSizeArray(); 00324 00325 /** 00326 * A simplify function that replaces a variable with its value in cases 00327 * when the value is known. e.g. "x=10; if(x)" => "x=10;if(10)" 00328 * 00329 * @return true if modifications to token-list are done. 00330 * false if no modifications are done. 00331 */ 00332 bool simplifyKnownVariables(); 00333 00334 /** 00335 * Utility function for simplifyKnownVariables. Get data about an 00336 * assigned variable. 00337 */ 00338 static bool simplifyKnownVariablesGetData(unsigned int varid, Token **_tok2, Token **_tok3, std::string &value, unsigned int &valueVarId, bool &valueIsPointer, bool floatvar); 00339 00340 /** 00341 * utility function for simplifyKnownVariables. Perform simplification 00342 * of a given variable 00343 */ 00344 bool simplifyKnownVariablesSimplify(Token **tok2, Token *tok3, unsigned int varid, const std::string &structname, std::string &value, unsigned int valueVarId, bool valueIsPointer, const Token * const valueToken, int indentlevel) const; 00345 00346 /** Replace a "goto" with the statements */ 00347 void simplifyGoto(); 00348 00349 /** Simplify useless C++ empty namespaces, like: 'namespace %var% { }'*/ 00350 void simplifyEmptyNamespaces(); 00351 00352 /** Simplify redundant code placed after control flow statements : 00353 * 'return', 'throw', 'goto', 'break' and 'continue' 00354 */ 00355 void simplifyFlowControl(); 00356 00357 /** Expand nested strcat() calls. */ 00358 void simplifyNestedStrcat(); 00359 00360 /** Simplify "if else" */ 00361 void elseif(); 00362 00363 /** 00364 * Simplify the operator "?:" 00365 */ 00366 void simplifyConditionOperator(); 00367 00368 /** Simplify conditions 00369 * @return true if something is modified 00370 * false if nothing is done. 00371 */ 00372 bool simplifyConditions(); 00373 00374 /** Remove redundant code, e.g. if( false ) { int a; } should be 00375 * removed, because it is never executed. 00376 * @return true if something is modified 00377 * false if nothing is done. 00378 */ 00379 bool removeRedundantConditions(); 00380 00381 /** 00382 * Remove redundant for: 00383 * "for (x=0;x<1;x++) { }" => "{ x = 1; }" 00384 */ 00385 void removeRedundantFor(); 00386 00387 00388 /** 00389 * Reduces "; ;" to ";", except in "( ; ; )" 00390 */ 00391 void removeRedundantSemicolons(); 00392 00393 /** Simplify function calls - constant return value 00394 * @return true if something is modified 00395 * false if nothing is done. 00396 */ 00397 bool simplifyFunctionReturn(); 00398 00399 /** Struct initialization */ 00400 void simplifyStructInit(); 00401 00402 /** Struct simplification 00403 * "struct S { } s;" => "struct S { }; S s;" 00404 */ 00405 00406 void simplifyStructDecl(); 00407 00408 /** 00409 * Remove redundant parentheses: 00410 * - "((x))" => "(x)" 00411 * - "(function())" => "function()" 00412 * - "(delete x)" => "delete x" 00413 * - "(delete [] x)" => "delete [] x" 00414 * @return true if modifications to token-list are done. 00415 * false if no modifications are done. 00416 */ 00417 bool simplifyRedundantParentheses(); 00418 00419 void simplifyCharAt(); 00420 00421 /** Simplify references */ 00422 void simplifyReference(); 00423 00424 /** 00425 * Simplify functions like "void f(x) int x; {" 00426 * into "void f(int x) {" 00427 * @return false only if there's a syntax error 00428 */ 00429 bool simplifyFunctionParameters(); 00430 00431 /** 00432 * Simplify templates 00433 */ 00434 void simplifyTemplates(); 00435 00436 void simplifyDoublePlusAndDoubleMinus(); 00437 00438 void simplifyRedundantConsecutiveBraces(); 00439 00440 void simplifyArrayAccessSyntax(); 00441 00442 void simplifyParameterVoid(); 00443 00444 void fillTypeSizes(); 00445 00446 void combineOperators(); 00447 00448 void combineStrings(); 00449 00450 void concatenateDoubleSharp(); 00451 00452 void simplifyFileAndLineMacro(); 00453 00454 void simplifyNull(); 00455 00456 void concatenateNegativeNumberAndAnyPositive(); 00457 00458 void simplifyExternC(); 00459 00460 void simplifyRoundCurlyParentheses(); 00461 00462 void simplifyDebugNew(); 00463 00464 void simplifySQL(); 00465 00466 bool hasEnumsWithTypedef(); 00467 00468 void simplifyDefaultAndDeleteInsideClass(); 00469 00470 bool hasComplicatedSyntaxErrorsInTemplates(); 00471 00472 /** 00473 * Simplify e.g. 'atol("0")' into '0' 00474 */ 00475 void simplifyMathFunctions(); 00476 00477 /** 00478 * Simplify e.g. 'sin(0)' into '0' 00479 */ 00480 void simplifyMathExpressions(); 00481 00482 /** 00483 * Modify strings in the token list by replacing hex and oct 00484 * values. E.g. "\x61" -> "a" and "\000" -> "\0" 00485 * @param source The string to be modified, e.g. "\x61" 00486 * @return Modified string, e.g. "a" 00487 */ 00488 static std::string simplifyString(const std::string &source); 00489 00490 /** 00491 * Use "<" comparison instead of ">" 00492 * Use "<=" comparison instead of ">=" 00493 */ 00494 void simplifyComparisonOrder(); 00495 00496 /** 00497 * Change "int const x;" into "const int x;" 00498 */ 00499 void simplifyConst(); 00500 00501 /** 00502 * simplify "while (0)" 00503 */ 00504 void simplifyWhile0(); 00505 00506 /** 00507 * Simplify while(func() && errno==EINTR) 00508 */ 00509 void simplifyErrNoInWhile(); 00510 00511 /** 00512 * Simplify while(func(f)) 00513 */ 00514 void simplifyFuncInWhile(); 00515 00516 /** 00517 * Replace enum with constant value 00518 */ 00519 void simplifyEnum(); 00520 00521 /** 00522 * Remove "std::" before some function names 00523 */ 00524 void simplifyStd(); 00525 00526 /** Simplify pointer to standard type (C only) */ 00527 void simplifyPointerToStandardType(); 00528 00529 /** Simplify function pointers */ 00530 void simplifyFunctionPointers(); 00531 00532 /** 00533 * Remove exception specifications. 00534 */ 00535 void removeExceptionSpecifications(); 00536 00537 00538 /** 00539 * Send error message to error logger about internal bug. 00540 * @param tok the token that this bug concerns. 00541 */ 00542 void cppcheckError(const Token *tok) const; 00543 00544 /** 00545 * Setup links for tokens so that one can call Token::link(). 00546 * 00547 * @return false if there was a mismatch with tokens, this 00548 * should mean that source code was not valid. 00549 */ 00550 bool createLinks(); 00551 00552 /** 00553 * Setup links between < and >. 00554 */ 00555 void createLinks2(); 00556 00557 /** Syntax error */ 00558 void syntaxError(const Token *tok) const; 00559 00560 /** Syntax error. Example: invalid number of ')' */ 00561 void syntaxError(const Token *tok, char c) const; 00562 00563 /** Report that there is an unhandled "class x y {" code */ 00564 void unhandled_macro_class_x_y(const Token *tok); 00565 00566 /** 00567 * assert that tokens are ok - used during debugging for example 00568 * to catch problems in simplifyTokenList. 00569 * @return always true. 00570 */ 00571 bool validate() const; 00572 00573 /** 00574 * Remove __declspec() 00575 */ 00576 void simplifyDeclspec(); 00577 00578 /** 00579 * Remove calling convention 00580 */ 00581 void simplifyCallingConvention(); 00582 00583 /** 00584 * Remove __attribute__ ((?)) 00585 */ 00586 void simplifyAttribute(); 00587 00588 /** 00589 * Remove keywords "volatile", "inline", "register", and "restrict" 00590 */ 00591 void simplifyKeyword(); 00592 00593 /** 00594 * Remove __asm 00595 */ 00596 void simplifyAsm(); 00597 00598 /** 00599 * Simplify bitfields - the field width is removed as we don't use it. 00600 */ 00601 void simplifyBitfields(); 00602 00603 /** 00604 * Remove __builtin_expect(...), likely(...), and unlikely(...) 00605 */ 00606 void simplifyBuiltinExpect(); 00607 00608 /** 00609 * Remove unnecessary member qualification 00610 */ 00611 void removeUnnecessaryQualification(); 00612 00613 /** 00614 * unnecessary member qualification error 00615 */ 00616 void unnecessaryQualificationError(const Token *tok, const std::string &qualification) const; 00617 00618 /** 00619 * Add std:: in front of std classes, when using namespace std; was given 00620 */ 00621 void simplifyNamespaceStd(); 00622 00623 /** 00624 * Remove Microsoft MFC 'DECLARE_MESSAGE_MAP()' 00625 */ 00626 void simplifyMicrosoftMFC(); 00627 00628 /** 00629 * Convert Microsoft memory functions 00630 * CopyMemory(dst, src, len) -> memcpy(dst, src, len) 00631 * FillMemory(dst, len, val) -> memset(dst, val, len) 00632 * MoveMemory(dst, src, len) -> memmove(dst, src, len) 00633 * ZeroMemory(dst, len) -> memset(dst, 0, len) 00634 */ 00635 void simplifyMicrosoftMemoryFunctions(); 00636 00637 /** 00638 * Convert Microsoft string functions 00639 * _tcscpy -> strcpy 00640 */ 00641 void simplifyMicrosoftStringFunctions(); 00642 00643 /** 00644 * Remove Borland code 00645 */ 00646 void simplifyBorland(); 00647 00648 /** 00649 * Remove Qt signals and slots 00650 */ 00651 void simplifyQtSignalsSlots(); 00652 00653 /** 00654 * Collapse operator name tokens into single token 00655 * operator = => operator= 00656 */ 00657 void simplifyOperatorName(); 00658 00659 /** 00660 * check for duplicate enum definition 00661 */ 00662 bool duplicateDefinition(Token **tokPtr, const Token *name) const; 00663 00664 /** 00665 * report error message 00666 */ 00667 void reportError(const Token* tok, const Severity::SeverityType severity, const std::string& id, const std::string& msg, bool inconclusive = false) const; 00668 void reportError(const std::list<const Token*>& callstack, Severity::SeverityType severity, const std::string& id, const std::string& msg, bool inconclusive = false) const; 00669 00670 /** 00671 * duplicate enum definition error 00672 */ 00673 void duplicateEnumError(const Token *tok1, const Token *tok2, const std::string & type) const; 00674 00675 bool duplicateTypedef(Token **tokPtr, const Token *name, const Token *typeDef, bool undefinedStruct) const; 00676 void duplicateTypedefError(const Token *tok1, const Token *tok2, const std::string & type) const; 00677 00678 /** 00679 * Report error - duplicate declarations 00680 */ 00681 void duplicateDeclarationError(const Token *tok1, const Token *tok2, const std::string &type) const; 00682 00683 void unsupportedTypedef(const Token *tok) const; 00684 00685 /** Was there templates in the code? */ 00686 bool codeWithTemplates() const { 00687 return _codeWithTemplates; 00688 } 00689 00690 void setSettings(const Settings *settings) { 00691 _settings = settings; 00692 list.setSettings(settings); 00693 } 00694 00695 const SymbolDatabase *getSymbolDatabase() const { 00696 return _symbolDatabase; 00697 } 00698 void createSymbolDatabase(); 00699 void deleteSymbolDatabase(); 00700 00701 Token *deleteInvalidTypedef(Token *typeDef); 00702 00703 /** 00704 * Get variable count. 00705 * @return number of variables 00706 */ 00707 unsigned int varIdCount() const { 00708 return _varId; 00709 } 00710 00711 /** 00712 * Simplify e.g. 'return(strncat(temp,"a",1));' into 00713 * strncat(temp,"a",1); return temp; 00714 */ 00715 void simplifyReturnStrncat(); 00716 00717 /** 00718 * Output list of unknown types. 00719 */ 00720 void printUnknownTypes(); 00721 00722 00723 /** 00724 * Token list: stores all tokens. 00725 */ 00726 TokenList list; 00727 // Implement tokens() as a wrapper for convinience when using the TokenList 00728 const Token* tokens() const { 00729 return list.front(); 00730 } 00731 00732 /** 00733 * Copy tokens. 00734 * @param dest destination token where copied tokens will be inserted after 00735 * @param first first token to copy 00736 * @param last last token to copy 00737 * @param one_line true=>copy all tokens to the same line as dest. false=>copy all tokens to dest while keeping the 'line breaks' 00738 * @return new location of last token copied 00739 */ 00740 static Token *copyTokens(Token *dest, const Token *first, const Token *last, bool one_line = true); 00741 00742 private: 00743 /** Disable copy constructor, no implementation */ 00744 Tokenizer(const Tokenizer &); 00745 00746 /** Disable assignment operator, no implementation */ 00747 Tokenizer &operator=(const Tokenizer &); 00748 00749 /** settings */ 00750 const Settings * _settings; 00751 00752 /** errorlogger */ 00753 ErrorLogger * const _errorLogger; 00754 00755 /** Symbol database that all checks etc can use */ 00756 SymbolDatabase *_symbolDatabase; 00757 00758 /** E.g. "A" for code where "#ifdef A" is true. This is used to 00759 print additional information in error situations. */ 00760 std::string _configuration; 00761 00762 /** sizeof information for known types */ 00763 std::map<std::string, unsigned int> _typeSize; 00764 00765 /** variable count */ 00766 unsigned int _varId; 00767 00768 /** 00769 * was there any templates? templates that are "unused" are 00770 * removed from the token list 00771 */ 00772 bool _codeWithTemplates; 00773 00774 /** 00775 * TimerResults 00776 */ 00777 TimerResults *m_timerResults; 00778 }; 00779 00780 /// @} 00781 00782 //--------------------------------------------------------------------------- 00783 #endif
1.7.6.1