Cppcheck
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tokenize.h
Go to the documentation of this file.
1 /*
2  * Cppcheck - A tool for static C/C++ code analysis
3  * Copyright (C) 2007-2014 Daniel Marjam√§ki and Cppcheck team.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 //---------------------------------------------------------------------------
20 #ifndef tokenizeH
21 #define tokenizeH
22 //---------------------------------------------------------------------------
23 
24 #include "errorlogger.h"
25 #include "tokenlist.h"
26 #include "config.h"
27 
28 #include <string>
29 #include <map>
30 #include <list>
31 #include <ctime>
32 
33 class Settings;
34 class SymbolDatabase;
35 class TimerResults;
36 
37 /// @addtogroup Core
38 /// @{
39 
40 /** @brief The main purpose is to tokenize the source code. It also has functions that simplify the token list */
42 public:
43  Tokenizer();
44  Tokenizer(const Settings * settings, ErrorLogger *errorLogger);
45  ~Tokenizer();
46 
48  m_timerResults = tr;
49  }
50 
51  /** Returns the source file path. e.g. "file.cpp" */
52  const std::string& getSourceFilePath() const;
53 
54  /** Is the code C. Used for bailouts */
55  bool isC() const;
56 
57  /** Is the code CPP. Used for bailouts */
58  bool isCPP() const;
59 
60  /**
61  * Check if inner scope ends with a call to a noreturn function
62  * \param endScopeToken The '}' token
63  * \param unknown set to true if it's unknown if the scope is noreturn
64  * \return true if scope ends with a function call that might be 'noreturn'
65  */
66  bool IsScopeNoReturn(const Token *endScopeToken, bool *unknown = nullptr) const;
67 
68  /**
69  * Tokenize code
70  * @param code input stream for code, e.g.
71  * \code
72  * #file "p.h"
73  * class Foo
74  * {
75  * private:
76  * void Bar();
77  * };
78  *
79  * #endfile
80  * void Foo::Bar()
81  * {
82  * }
83  * \endcode
84  *
85  * @param FileName The filename
86  * @param configuration E.g. "A" for code where "#ifdef A" is true
87  * @return false if source code contains syntax errors
88  */
89  bool tokenize(std::istream &code,
90  const char FileName[],
91  const std::string &configuration = "");
92 
93  /**
94  * tokenize condition and run simple simplifications on it
95  * @param code code
96  * @return true if success.
97  */
98  bool tokenizeCondition(const std::string &code);
99 
100  /** Set variable id */
101  void setVarId();
102 
103  /**
104  * Basic simplification of tokenlist
105  *
106  * @param FileName The filename to run; used to do
107  * markup checks.
108  *
109  * @return false if there is an error that requires aborting
110  * the checking of this file.
111  */
112  bool simplifyTokenList1(const char FileName[]);
113 
114  /**
115  * Most aggressive simplification of tokenlist
116  *
117  * @return false if there is an error that requires aborting
118  * the checking of this file.
119  */
120  bool simplifyTokenList2();
121 
122  /**
123  * Deletes dead code between 'begin' and 'end'.
124  * In general not everything can be erased, such as:
125  * - code after labels;
126  * - code outside the scope where the function is called;
127  * - code after a change of scope caused by 'switch(...);'
128  * instructions, like 'case %any%;' or 'default;'
129  * Also, if the dead code contains a 'switch' block
130  * and inside it there's a label, the function removes all
131  * the 'switch(..)' tokens and every occurrence of 'case %any%; | default;'
132  * expression, such as the 'switch' block is reduced to a simple block.
133  *
134  * @param begin Tokens after this have a possibility to be erased.
135  * @param end Tokens before this have a possibility to be erased.
136  */
137  static void eraseDeadCode(Token *begin, const Token *end);
138 
139  /**
140  * Simplify '* & ( %var% ) =' or any combination of '* &' and '()'
141  * parentheses around '%var%' to '%var% ='
142  */
143  void simplifyMulAndParens();
144 
145  /**
146  * Calculates sizeof value for given type.
147  * @param type Token which will contain e.g. "int", "*", or string.
148  * @return sizeof for given type, or 0 if it can't be calculated.
149  */
150  unsigned int sizeOfType(const Token *type) const;
151 
152  /**
153  * Try to determine if function parameter is passed by value by looking
154  * at the function declaration.
155  * @param fpar token for function parameter in the function call
156  * @return true if the parameter is passed by value. if unsure, false is returned
157  */
158  bool isFunctionParameterPassedByValue(const Token *fpar) const;
159 
160  /**
161  * get error messages that the tokenizer generate
162  */
163  static void getErrorMessages(ErrorLogger *errorLogger, const Settings *settings);
164 
165  /** Simplify assignment in function call "f(x=g());" => "x=g();f(x);"
166  */
167  void simplifyAssignmentInFunctionCall();
168 
169  /** Simplify assignment where rhs is a block : "x=({123;});" => "{x=123;}" */
170  void simplifyAssignmentBlock();
171 
172  /**
173  * Simplify constant calculations such as "1+2" => "3"
174  * @return true if modifications to token-list are done.
175  * false if no modifications are done.
176  */
177  bool simplifyCalculations();
178 
179  /**
180  * Simplify dereferencing a pointer offset by a number:
181  * "*(ptr + num)" => "ptr[num]"
182  * "*(ptr - num)" => "ptr[-num]"
183  */
184  void simplifyOffsetPointerDereference();
185 
186  /** Insert array size where it isn't given */
187  void arraySize();
188 
189  /** Simplify labels and 'case|default' syntaxes.
190  * @return true if found nothing or the syntax is correct.
191  * false if syntax is found to be wrong.
192  */
193  void simplifyLabelsCaseDefault();
194 
195  /** Remove macros in global scope */
196  void removeMacrosInGlobalScope();
197 
198  /** Remove unknown macro in variable declarations: PROGMEM char x; */
199  void removeMacroInVarDecl();
200 
201  /** Remove redundant assignment */
202  void removeRedundantAssignment();
203 
204  /** Simplifies some realloc usage like
205  * 'x = realloc (0, n);' => 'x = malloc(n);'
206  * 'x = realloc (y, 0);' => 'x = 0; free(y);'
207  */
208  void simplifyRealloc();
209 
210  /** Add parentheses for sizeof: sizeof x => sizeof(x) */
211  void sizeofAddParentheses();
212 
213  /**
214  * Replace sizeof() to appropriate size.
215  * @return true if modifications to token-list are done.
216  * false if no modifications are done.
217  */
218  bool simplifySizeof();
219 
220  /**
221  * Simplify variable declarations (split up)
222  * \param only_k_r_fpar Only simplify K&R function parameters
223  */
224  void simplifyVarDecl(bool only_k_r_fpar);
225  void simplifyVarDecl(Token * tokBegin, Token * tokEnd, bool only_k_r_fpar);
226 
227  /**
228  * Simplify variable initialization
229  * '; int *p(0);' => '; int *p = 0;'
230  */
231  void simplifyInitVar();
232  Token * initVar(Token * tok);
233 
234  /**
235  * Convert platform dependent types to standard types.
236  * 32 bits: size_t -> unsigned long
237  * 64 bits: size_t -> unsigned long long
238  */
239  void simplifyPlatformTypes();
240 
241  /**
242  * Collapse compound standard types into a single token.
243  * unsigned long long int => long _isUnsigned=true,_isLong=true
244  */
245  void simplifyStdType();
246 
247  /**
248  * Simplify easy constant '?:' operation
249  * Example: 0 ? (2/0) : 0 => 0
250  * @return true if something is modified
251  * false if nothing is done.
252  */
253  bool simplifyConstTernaryOp();
254 
255  /**
256  * Simplify compound assignments
257  * Example: ";a+=b;" => ";a=a+b;"
258  */
259  void simplifyCompoundAssignment();
260 
261  /**
262  * Simplify assignments in "if" and "while" conditions
263  * Example: "if(a=b);" => "a=b;if(a);"
264  * Example: "while(a=b) { f(a); }" => "a = b; while(a){ f(a); a = b; }"
265  * Example: "do { f(a); } while(a=b);" => "do { f(a); a = b; } while(a);"
266  */
267  void simplifyIfAndWhileAssign();
268 
269  /**
270  * Simplify multiple assignments.
271  * Example: "a = b = c = 0;" => "a = 0; b = 0; c = 0;"
272  */
273  void simplifyVariableMultipleAssign();
274 
275  /**
276  * simplify if-not
277  * Example: "if(0==x);" => "if(!x);"
278  */
279  void simplifyIfNot();
280 
281  /**
282  * simplify if-not NULL
283  * Example: "if(0!=x);" => "if(x);"
284  * Special case: 'x = (0 != x);' is removed.
285  */
286  void simplifyIfNotNull();
287 
288  /** @brief simplify if (a) { if (a) */
289  void simplifyIfSameInnerCondition();
290 
291  /**
292  * Simplify the 'C Alternative Tokens'
293  * Examples:
294  * "if(s and t)" => "if(s && t)"
295  * "while((r bitand s) and not t)" => while((r & s) && !t)"
296  * "a and_eq b;" => "a &= b;"
297  */
298  bool simplifyCAlternativeTokens();
299 
300  /**
301  * Simplify comma into a semicolon when possible:
302  * - "delete a, delete b" => "delete a; delete b;"
303  * - "a = 0, b = 0;" => "a = 0; b = 0;"
304  * - "return a(), b;" => "a(); return b;"
305  */
306  void simplifyComma();
307 
308  /** Add braces to an if-block, for-block, etc.
309  * @return true if no syntax errors
310  */
311  bool simplifyAddBraces();
312 
313  /** Add braces to an if-block, for-block, etc.
314  * for command starting at token including else-block
315  * @return last token of command
316  * or input token in case of an error where no braces are added
317  * or NULL when syntaxError is called
318  */
319  Token * simplifyAddBracesToCommand(Token * tok);
320 
321  /** Add pair of braces to an single if-block, else-block, for-block, etc.
322  * for command starting at token
323  * @return last token of command
324  * or input token in case of an error where no braces are added
325  * or NULL when syntaxError is called
326  */
327  Token * simplifyAddBracesPair(Token *tok, bool commandWithCondition);
328 
329  /**
330  * typedef A mytype;
331  * mytype c;
332  *
333  * Becomes:
334  * typedef A mytype;
335  * A c;
336  */
337  void simplifyTypedef();
338 
339  /**
340  * Simplify float casts (float)1 => 1.0
341  */
342  void simplifyFloatCasts();
343 
344  /**
345  * Simplify casts
346  */
347  void simplifyCasts();
348 
349  /**
350  * Change (multiple) arrays to (multiple) pointers.
351  */
352  void simplifyUndefinedSizeArray();
353 
354  /**
355  * A simplify function that replaces a variable with its value in cases
356  * when the value is known. e.g. "x=10; if(x)" => "x=10;if(10)"
357  *
358  * @return true if modifications to token-list are done.
359  * false if no modifications are done.
360  */
361  bool simplifyKnownVariables();
362 
363  /**
364  * Utility function for simplifyKnownVariables. Get data about an
365  * assigned variable.
366  */
367  static bool simplifyKnownVariablesGetData(unsigned int varid, Token **_tok2, Token **_tok3, std::string &value, unsigned int &valueVarId, bool &valueIsPointer, bool floatvar);
368 
369  /**
370  * utility function for simplifyKnownVariables. Perform simplification
371  * of a given variable
372  */
373  bool simplifyKnownVariablesSimplify(Token **tok2, Token *tok3, unsigned int varid, const std::string &structname, std::string &value, unsigned int valueVarId, bool valueIsPointer, const Token * const valueToken, int indentlevel) const;
374 
375  /** Simplify useless C++ empty namespaces, like: 'namespace %var% { }'*/
376  void simplifyEmptyNamespaces();
377 
378  /** Simplify redundant code placed after control flow statements :
379  * 'return', 'throw', 'goto', 'break' and 'continue'
380  */
381  void simplifyFlowControl();
382 
383  /** Expand nested strcat() calls. */
384  void simplifyNestedStrcat();
385 
386  /** Simplify "if else" */
387  void elseif();
388 
389  /**
390  * Simplify the operator "?:"
391  */
392  void simplifyConditionOperator();
393 
394  /** Simplify conditions
395  * @return true if something is modified
396  * false if nothing is done.
397  */
398  bool simplifyConditions();
399 
400  /** Remove redundant code, e.g. if( false ) { int a; } should be
401  * removed, because it is never executed.
402  * @return true if something is modified
403  * false if nothing is done.
404  */
405  bool removeRedundantConditions();
406 
407  /**
408  * Remove redundant for:
409  * "for (x=0;x<1;x++) { }" => "{ x = 1; }"
410  */
411  void removeRedundantFor();
412 
413 
414  /**
415  * Reduces "; ;" to ";", except in "( ; ; )"
416  */
417  void removeRedundantSemicolons();
418 
419  /** Simplify function calls - constant return value
420  * @return true if something is modified
421  * false if nothing is done.
422  */
423  bool simplifyFunctionReturn();
424 
425  /** Struct initialization */
426  void simplifyStructInit();
427 
428  /** Struct simplification
429  * "struct S { } s;" => "struct S { }; S s;"
430  */
431 
432  void simplifyStructDecl();
433 
434  /**
435  * Remove redundant parentheses:
436  * - "((x))" => "(x)"
437  * - "(function())" => "function()"
438  * - "(delete x)" => "delete x"
439  * - "(delete [] x)" => "delete [] x"
440  * @return true if modifications to token-list are done.
441  * false if no modifications are done.
442  */
443  bool simplifyRedundantParentheses();
444 
445  void simplifyCharAt();
446 
447  /** Simplify references */
448  void simplifyReference();
449 
450  /**
451  * Simplify functions like "void f(x) int x; {"
452  * into "void f(int x) {"
453  * @return false only if there's a syntax error
454  */
455  void simplifyFunctionParameters();
456 
457  /**
458  * Simplify templates
459  */
460  void simplifyTemplates();
461 
462  void simplifyDoublePlusAndDoubleMinus();
463 
464  void simplifyRedundantConsecutiveBraces();
465 
466  void simplifyArrayAccessSyntax();
467 
468  void simplifyParameterVoid();
469 
470  void fillTypeSizes();
471 
472  void combineOperators();
473 
474  void combineStrings();
475 
476  void concatenateDoubleSharp();
477 
478  void simplifyFileAndLineMacro();
479 
480  void simplifyNull();
481 
482  void concatenateNegativeNumberAndAnyPositive();
483 
484  void simplifyExternC();
485 
486  void simplifyRoundCurlyParentheses();
487 
488  void simplifyDebugNew();
489 
490  void simplifySQL();
491 
492  bool hasEnumsWithTypedef();
493 
494  void simplifyDefaultAndDeleteInsideClass();
495 
496  void findComplicatedSyntaxErrorsInTemplates();
497 
498  /**
499  * Simplify e.g. 'atol("0")' into '0'
500  * @return returns true if simplifcations performed and false otherwise.
501  */
502  bool simplifyMathFunctions();
503 
504  /**
505  * Simplify e.g. 'sin(0)' into '0'
506  */
507  void simplifyMathExpressions();
508 
509  /**
510  * Modify strings in the token list by replacing hex and oct
511  * values. E.g. "\x61" -> "a" and "\000" -> "\0"
512  * @param source The string to be modified, e.g. "\x61"
513  * @return Modified string, e.g. "a"
514  */
515  static std::string simplifyString(const std::string &source);
516 
517  /**
518  * Change "int const x;" into "const int x;"
519  */
520  void simplifyConst();
521 
522  /**
523  * simplify "while (0)"
524  */
525  void simplifyWhile0();
526 
527  /**
528  * Simplify while(func() && errno==EINTR)
529  */
530  void simplifyErrNoInWhile();
531 
532  /**
533  * Simplify while(func(f))
534  */
535  void simplifyFuncInWhile();
536 
537  /**
538  * Replace enum with constant value
539  */
540  void simplifyEnum();
541 
542  /**
543  * Remove "std::" before some function names
544  */
545  void simplifyStd();
546 
547  /** Simplify pointer to standard type (C only) */
548  void simplifyPointerToStandardType();
549 
550  /** Simplify function pointers */
551  void simplifyFunctionPointers();
552 
553  /**
554  * Remove exception specifications.
555  */
556  void removeExceptionSpecifications();
557 
558 
559  /**
560  * Send error message to error logger about internal bug.
561  * @param tok the token that this bug concerns.
562  */
563  void cppcheckError(const Token *tok) const;
564 
565  /**
566  * Setup links for tokens so that one can call Token::link().
567  *
568  * @return false if there was a mismatch with tokens, this
569  * should mean that source code was not valid.
570  */
571  void createLinks();
572 
573  /**
574  * Setup links between < and >.
575  */
576  void createLinks2();
577 
578  /** Syntax error */
579  void syntaxError(const Token *tok) const;
580 
581  /** Syntax error. Example: invalid number of ')' */
582  void syntaxError(const Token *tok, char c) const;
583 
584  /** Report that there is an unhandled "class x y {" code */
585  void unhandled_macro_class_x_y(const Token *tok) const;
586 
587  /**
588  * assert that tokens are ok - used during debugging for example
589  * to catch problems in simplifyTokenList.
590  * @return always true.
591  */
592  void validate() const;
593 
594  /**
595  * Remove __declspec()
596  */
597  void simplifyDeclspec();
598 
599  /**
600  * Remove calling convention
601  */
602  void simplifyCallingConvention();
603 
604  /**
605  * Remove __attribute__ ((?))
606  */
607  void simplifyAttribute();
608 
609  /**
610  * Remove keywords "volatile", "inline", "register", and "restrict"
611  */
612  void simplifyKeyword();
613 
614  /**
615  * Remove __asm
616  */
617  void simplifyAsm();
618 
619  /**
620  * Simplify bitfields - the field width is removed as we don't use it.
621  */
622  void simplifyBitfields();
623 
624  /**
625  * Remove __builtin_expect(...), likely(...), and unlikely(...)
626  */
627  void simplifyBuiltinExpect();
628 
629  /**
630  * Remove unnecessary member qualification
631  */
632  void removeUnnecessaryQualification();
633 
634  /**
635  * unnecessary member qualification error
636  */
637  void unnecessaryQualificationError(const Token *tok, const std::string &qualification) const;
638 
639  /**
640  * Add std:: in front of std classes, when using namespace std; was given
641  */
642  void simplifyNamespaceStd();
643 
644  /**
645  * Remove Microsoft MFC 'DECLARE_MESSAGE_MAP()'
646  */
647  void simplifyMicrosoftMFC();
648 
649  /**
650  * Convert Microsoft memory functions
651  * CopyMemory(dst, src, len) -> memcpy(dst, src, len)
652  * FillMemory(dst, len, val) -> memset(dst, val, len)
653  * MoveMemory(dst, src, len) -> memmove(dst, src, len)
654  * ZeroMemory(dst, len) -> memset(dst, 0, len)
655  */
656  void simplifyMicrosoftMemoryFunctions();
657 
658  /**
659  * Convert Microsoft string functions
660  * _tcscpy -> strcpy
661  */
662  void simplifyMicrosoftStringFunctions();
663 
664  /**
665  * Remove Borland code
666  */
667  void simplifyBorland();
668 
669  /**
670  * Remove Qt signals and slots
671  */
672  void simplifyQtSignalsSlots();
673 
674  /**
675  * Collapse operator name tokens into single token
676  * operator = => operator=
677  */
678  void simplifyOperatorName();
679 
680  /**
681  * check for duplicate enum definition
682  */
683  bool duplicateDefinition(Token **tokPtr, const Token *name) const;
684 
685  /**
686  * report error message
687  */
688  void reportError(const Token* tok, const Severity::SeverityType severity, const std::string& id, const std::string& msg, bool inconclusive = false) const;
689  void reportError(const std::list<const Token*>& callstack, Severity::SeverityType severity, const std::string& id, const std::string& msg, bool inconclusive = false) const;
690 
691  /**
692  * duplicate enum definition error
693  */
694  void duplicateEnumError(const Token *tok1, const Token *tok2, const std::string & type) const;
695 
696  bool duplicateTypedef(Token **tokPtr, const Token *name, const Token *typeDef, bool undefinedStruct) const;
697  void duplicateTypedefError(const Token *tok1, const Token *tok2, const std::string & type) const;
698 
699  /**
700  * Report error - duplicate declarations
701  */
702  void duplicateDeclarationError(const Token *tok1, const Token *tok2, const std::string &type) const;
703 
704  void unsupportedTypedef(const Token *tok) const;
705 
706  /** Was there templates in the code? */
707  bool codeWithTemplates() const {
708  return _codeWithTemplates;
709  }
710 
711  void setSettings(const Settings *settings) {
712  _settings = settings;
713  list.setSettings(settings);
714  }
715 
717  return _symbolDatabase;
718  }
719  void createSymbolDatabase();
720  void deleteSymbolDatabase();
721 
722  void printDebugOutput() const;
723 
724  Token *deleteInvalidTypedef(Token *typeDef);
725 
726  /**
727  * Get variable count.
728  * @return number of variables
729  */
730  unsigned int varIdCount() const {
731  return _varId;
732  }
733 
734  /**
735  * Simplify e.g. 'return(strncat(temp,"a",1));' into
736  * strncat(temp,"a",1); return temp;
737  */
738  void simplifyReturnStrncat();
739 
740  /**
741  * Output list of unknown types.
742  */
743  void printUnknownTypes() const;
744 
745 
746  /**
747  * Token list: stores all tokens.
748  */
750  // Implement tokens() as a wrapper for convinience when using the TokenList
751  const Token* tokens() const {
752  return list.front();
753  }
754 
755  /**
756  * Copy tokens.
757  * @param dest destination token where copied tokens will be inserted after
758  * @param first first token to copy
759  * @param last last token to copy
760  * @param one_line true=>copy all tokens to the same line as dest. false=>copy all tokens to dest while keeping the 'line breaks'
761  * @return new location of last token copied
762  */
763  static Token *copyTokens(Token *dest, const Token *first, const Token *last, bool one_line = true);
764 
765  /**
766  * Helper function to check wether number is zero (0 or 0.0 or 0E+0) or not?
767  * @param s --> a string to check
768  * @return true in case is is zero and false otherwise.
769  */
770  static bool isZeroNumber(const std::string &s);
771 
772  /**
773  * Helper function to check wether number is one (1 or 0.1E+1 or 1E+0) or not?
774  * @param s --> a string to check
775  * @return true in case is is one and false otherwise.
776  */
777  static bool isOneNumber(const std::string &s);
778 
779  /**
780  * Helper function to check wether number is one (2 or 0.2E+1 or 2E+0) or not?
781  * @param s --> a string to check
782  * @return true in case is is two and false otherwise.
783  */
784  static bool isTwoNumber(const std::string &s);
785 
786 private:
787  /** Disable copy constructor, no implementation */
788  Tokenizer(const Tokenizer &);
789 
790  /** Disable assignment operator, no implementation */
791  Tokenizer &operator=(const Tokenizer &);
792 
793  /** settings */
795 
796  /** errorlogger */
798 
799  /** Symbol database that all checks etc can use */
801 
802  /** E.g. "A" for code where "#ifdef A" is true. This is used to
803  print additional information in error situations. */
804  std::string _configuration;
805 
806  /** sizeof information for known types */
807  std::map<std::string, unsigned int> _typeSize;
808 
809  /** variable count */
810  unsigned int _varId;
811 
812  /**
813  * was there any templates? templates that are "unused" are
814  * removed from the token list
815  */
817 
818  /**
819  * TimerResults
820  */
822 #ifdef MAXTIME
823  /** Tokenizer maxtime */
824  std::time_t maxtime;
825 #endif
826 };
827 
828 /// @}
829 
830 //---------------------------------------------------------------------------
831 #endif // tokenizeH