|
Cppcheck
|
00001 /* 00002 * Cppcheck - A tool for static C/C++ code analysis 00003 * Copyright (C) 2007-2013 Daniel Marjamäki and Cppcheck team. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 */ 00018 00019 #ifndef TokenH 00020 #define TokenH 00021 00022 #include <string> 00023 #include <vector> 00024 #include <ostream> 00025 #include "config.h" 00026 00027 class Scope; 00028 class Function; 00029 class Variable; 00030 00031 /// @addtogroup Core 00032 /// @{ 00033 00034 /** 00035 * @brief The token list that the TokenList generates is a linked-list of this class. 00036 * 00037 * Tokens are stored as strings. The "if", "while", etc are stored in plain text. 00038 * The reason the Token class is needed (instead of using the string class) is that some extra functionality is also needed for tokens: 00039 * - location of the token is stored (linenr, fileIndex) 00040 * - functions for classifying the token (isName, isNumber, isBoolean, isStandardType) 00041 * 00042 * The Token class also has other functions for management of token list, matching tokens, etc. 00043 */ 00044 class CPPCHECKLIB Token { 00045 private: 00046 Token **tokensBack; 00047 00048 // Not implemented.. 00049 Token(); 00050 Token(const Token &); 00051 Token operator=(const Token &); 00052 00053 public: 00054 enum Type { 00055 eVariable, eType, eFunction, eName, // Names: Variable (varId), Type (typeId, later), Function (FuncId, later), Name (unknown identifier) 00056 eNumber, eString, eChar, eBoolean, eLiteral, // Literals: Number, String, Character, User defined literal (C++11) 00057 eArithmeticalOp, eComparisonOp, eAssignmentOp, eLogicalOp, eBitOp, eIncDecOp, eExtendedOp, // Operators: Arithmetical, Comparison, Assignment, Logical, Bitwise, ++/--, Extended 00058 eBracket, // {, }, <, >: < and > only if link() is set. Otherwise they are comparison operators. 00059 eOther, 00060 eNone 00061 }; 00062 00063 explicit Token(Token **tokensBack); 00064 ~Token(); 00065 00066 void str(const std::string &s); 00067 00068 /** 00069 * Concatenate two (quoted) strings. Automatically cuts of the last/first character. 00070 * Example: "hello ""world" -> "hello world". Used by the token simplifier. 00071 */ 00072 void concatStr(std::string const& b); 00073 00074 const std::string &str() const { 00075 return _str; 00076 } 00077 00078 /** 00079 * Unlink and delete the next 'index' tokens. 00080 */ 00081 void deleteNext(unsigned long index = 1); 00082 00083 /** 00084 * Returns token in given index, related to this token. 00085 * For example index 1 would return next token, and 2 00086 * would return next from that one. 00087 */ 00088 const Token *tokAt(int index) const; 00089 Token *tokAt(int index) { 00090 return const_cast<Token *>(static_cast<const Token *>(this)->tokAt(index)); 00091 } 00092 00093 /** 00094 * Returns the link to the token in given index, related to this token. 00095 * For example index 1 would return the link to next token. 00096 */ 00097 const Token *linkAt(int index) const; 00098 Token *linkAt(int index) { 00099 return const_cast<Token *>(static_cast<const Token *>(this)->linkAt(index)); 00100 } 00101 00102 const std::string &strAt(int index) const; 00103 00104 /** 00105 * Match given token (or list of tokens) to a pattern list. 00106 * 00107 * Possible patterns 00108 * "someRandomText" If token contains "someRandomText". 00109 * @note Use Match() if you want to use flags in patterns 00110 * 00111 * The patterns can be also combined to compare to multiple tokens at once 00112 * by separating tokens with a space, e.g. 00113 * ") void {" will return true if first token is ')' next token 00114 * is "void" and token after that is '{'. If even one of the tokens does 00115 * not match its pattern, false is returned. 00116 * 00117 * @param tok List of tokens to be compared to the pattern 00118 * @param pattern The pattern against which the tokens are compared, 00119 * e.g. "const" or ") void {". 00120 * @return true if given token matches with given pattern 00121 * false if given token does not match with given pattern 00122 */ 00123 static bool simpleMatch(const Token *tok, const char pattern[]); 00124 00125 /** 00126 * Match given token (or list of tokens) to a pattern list. 00127 * 00128 * Possible patterns 00129 * - "%any%" any token 00130 * - "%var%" any token which is a name or type e.g. "hello" or "int" 00131 * - "%type%" Anything that can be a variable type, e.g. "int", but not "delete". 00132 * - "%num%" Any numeric token, e.g. "23" 00133 * - "%bool%" true or false 00134 * - "%char%" Any token enclosed in '-character. 00135 * - "%comp%" Any token such that isComparisonOp() returns true. 00136 * - "%str%" Any token starting with "-character (C-string). 00137 * - "%varid%" Match with parameter varid 00138 * - "%op%" Any token such that isOp() returns true. 00139 * - "%cop%" Any token such that isConstOp() returns true. 00140 * - "%or%" A bitwise-or operator '|' 00141 * - "%oror%" A logical-or operator '||' 00142 * - "[abc]" Any of the characters 'a' or 'b' or 'c' 00143 * - "int|void|char" Any of the strings, int, void or char 00144 * - "int|void|char|" Any of the strings, int, void or char or empty string 00145 * - "!!else" No tokens or any token that is not "else". 00146 * - "someRandomText" If token contains "someRandomText". 00147 * 00148 * multi-compare patterns such as "int|void|char" can contain %or%, %oror% and %op% 00149 * but it is not recommended to put such an %cmd% as the first pattern. 00150 * 00151 * It's possible to use multi-compare patterns with all the other %cmds%, 00152 * except for %varid%, and normal names, but the %cmds% should be put as 00153 * the first patterns in the list, then the normal names. 00154 * For example: "%var%|%num%|)" means yes to a variable, a number or ')'. 00155 * 00156 * @todo Make it possible to use the %cmds% and the normal names in the 00157 * multicompare list without an order. 00158 * 00159 * The patterns can be also combined to compare to multiple tokens at once 00160 * by separating tokens with a space, e.g. 00161 * ") const|void {" will return true if first token is ')' next token is either 00162 * "const" or "void" and token after that is '{'. If even one of the tokens does not 00163 * match its pattern, false is returned. 00164 * 00165 * @param tok List of tokens to be compared to the pattern 00166 * @param pattern The pattern against which the tokens are compared, 00167 * e.g. "const" or ") const|volatile| {". 00168 * @param varid if %varid% is given in the pattern the Token::varId 00169 * will be matched against this argument 00170 * @return true if given token matches with given pattern 00171 * false if given token does not match with given pattern 00172 */ 00173 static bool Match(const Token *tok, const char pattern[], unsigned int varid = 0); 00174 00175 /** 00176 * Return length of C-string. 00177 * 00178 * Should be called for %str% tokens only. 00179 * 00180 * @param tok token with C-string 00181 **/ 00182 static std::size_t getStrLength(const Token *tok); 00183 00184 /** 00185 * Return char of C-string at index (possible escaped "\\n") 00186 * 00187 * Should be called for %str% tokens only. 00188 * 00189 * @param tok token with C-string 00190 * @param index position of character 00191 **/ 00192 static std::string getCharAt(const Token *tok, std::size_t index); 00193 00194 Type type() const { 00195 return _type; 00196 } 00197 void type(Type t) { 00198 _type = t; 00199 } 00200 bool isName() const { 00201 return _type == eName || _type == eType || _type == eVariable || _type == eFunction || 00202 _type == eBoolean; // TODO: "true"/"false" aren't really a name... 00203 } 00204 bool isUpperCaseName() const; 00205 bool isLiteral() const { 00206 return _type == eNumber || _type == eString || _type == eChar || 00207 _type == eBoolean || _type == eLiteral; 00208 } 00209 bool isNumber() const { 00210 return _type == eNumber; 00211 } 00212 bool isOp() const { 00213 return (isConstOp() || 00214 isAssignmentOp() || 00215 _type == eIncDecOp); 00216 } 00217 bool isConstOp() const { 00218 return (isArithmeticalOp() || 00219 _type == eLogicalOp || 00220 _type == eComparisonOp || 00221 _type == eBitOp); 00222 } 00223 bool isExtendedOp() const { 00224 return isConstOp() || 00225 _type == eExtendedOp; 00226 } 00227 bool isArithmeticalOp() const { 00228 return _type == eArithmeticalOp; 00229 } 00230 bool isComparisonOp() const { 00231 return _type == eComparisonOp; 00232 } 00233 bool isAssignmentOp() const { 00234 return _type == eAssignmentOp; 00235 } 00236 bool isBoolean() const { 00237 return _type == eBoolean; 00238 } 00239 00240 bool isUnsigned() const { 00241 return _isUnsigned; 00242 } 00243 void isUnsigned(bool sign) { 00244 _isUnsigned = sign; 00245 } 00246 bool isSigned() const { 00247 return _isSigned; 00248 } 00249 void isSigned(bool sign) { 00250 _isSigned = sign; 00251 } 00252 bool isPointerCompare() const { 00253 return _isPointerCompare; 00254 } 00255 void isPointerCompare(bool b) { 00256 _isPointerCompare = b; 00257 } 00258 bool isLong() const { 00259 return _isLong; 00260 } 00261 void isLong(bool size) { 00262 _isLong = size; 00263 } 00264 bool isUnused() const { 00265 return _isUnused; 00266 } 00267 void isUnused(bool used) { 00268 _isUnused = used; 00269 } 00270 bool isStandardType() const { 00271 return _isStandardType; 00272 } 00273 bool isExpandedMacro() const { 00274 return _isExpandedMacro; 00275 } 00276 void setExpandedMacro(bool m) { 00277 _isExpandedMacro = m; 00278 } 00279 00280 static const Token *findsimplematch(const Token *tok, const char pattern[]); 00281 static const Token *findsimplematch(const Token *tok, const char pattern[], const Token *end); 00282 static const Token *findmatch(const Token *tok, const char pattern[], unsigned int varId = 0); 00283 static const Token *findmatch(const Token *tok, const char pattern[], const Token *end, unsigned int varId = 0); 00284 static Token *findsimplematch(Token *tok, const char pattern[]) { 00285 return const_cast<Token *>(findsimplematch(static_cast<const Token *>(tok), pattern)); 00286 } 00287 static Token *findsimplematch(Token *tok, const char pattern[], const Token *end) { 00288 return const_cast<Token *>(findsimplematch(static_cast<const Token *>(tok), pattern, end)); 00289 } 00290 static Token *findmatch(Token *tok, const char pattern[], unsigned int varId = 0) { 00291 return const_cast<Token *>(findmatch(static_cast<const Token *>(tok), pattern, varId)); 00292 } 00293 static Token *findmatch(Token *tok, const char pattern[], const Token *end, unsigned int varId = 0) { 00294 return const_cast<Token *>(findmatch(static_cast<const Token *>(tok), pattern, end, varId)); 00295 } 00296 00297 /** 00298 * Needle is build from multiple alternatives. If one of 00299 * them is equal to haystack, return value is 1. If there 00300 * are no matches, but one alternative to needle is empty 00301 * string, return value is 0. If needle was not found, return 00302 * value is -1. 00303 * 00304 * @param tok Current token 00305 * @param haystack e.g. "one|two" or "|one|two" 00306 * @param needle e.g. "one", "two" or "invalid" 00307 * @return 1 if needle is found from the haystack 00308 * 0 if needle was empty string 00309 * -1 if needle was not found 00310 */ 00311 static int multiCompare(const Token *tok, const char *haystack, const char *needle); 00312 00313 unsigned int linenr() const { 00314 return _linenr; 00315 } 00316 void linenr(unsigned int lineNumber) { 00317 _linenr = lineNumber; 00318 } 00319 00320 unsigned int fileIndex() const { 00321 return _fileIndex; 00322 } 00323 void fileIndex(unsigned int indexOfFile) { 00324 _fileIndex = indexOfFile; 00325 } 00326 00327 Token *next() const { 00328 return _next; 00329 } 00330 00331 00332 /** 00333 * Delete tokens between begin and end. E.g. if begin = 1 00334 * and end = 5, tokens 2,3 and 4 would be erased. 00335 * 00336 * @param begin Tokens after this will be erased. 00337 * @param end Tokens before this will be erased. 00338 */ 00339 static void eraseTokens(Token *begin, const Token *end); 00340 00341 /** 00342 * Insert new token after this token. This function will handle 00343 * relations between next and previous token also. 00344 * @param tokenStr String for the new token. 00345 * @param prepend Insert the new token before this token when it's not 00346 * the first one on the tokens list. 00347 */ 00348 void insertToken(const std::string &tokenStr, bool prepend=false); 00349 00350 Token *previous() const { 00351 return _previous; 00352 } 00353 00354 00355 unsigned int varId() const { 00356 return _varId; 00357 } 00358 void varId(unsigned int id) { 00359 _varId = id; 00360 if (id != 0) 00361 _type = eVariable; 00362 else 00363 update_property_info(); 00364 } 00365 00366 /** 00367 * For debugging purposes, prints token and all tokens 00368 * followed by it. 00369 * @param title Title for the printout or use default parameter or 0 00370 * for no title. 00371 */ 00372 void printOut(const char *title = 0) const; 00373 00374 /** 00375 * For debugging purposes, prints token and all tokens 00376 * followed by it. 00377 * @param title Title for the printout or use default parameter or 0 00378 * for no title. 00379 * @param fileNames Prints out file name instead of file index. 00380 * File index should match the index of the string in this vector. 00381 */ 00382 void printOut(const char *title, const std::vector<std::string> &fileNames) const; 00383 00384 /** 00385 * Replace token replaceThis with tokens between start and end, 00386 * including start and end. The replaceThis token is deleted. 00387 * @param replaceThis This token will be deleted. 00388 * @param start This will be in the place of replaceThis 00389 * @param end This is also in the place of replaceThis 00390 */ 00391 static void replace(Token *replaceThis, Token *start, Token *end); 00392 00393 /** 00394 * Stringify a token 00395 * @param os The result is shifted into that output stream 00396 * @param varid Print varids. (Style: "varname@id") 00397 * @param attributes Print attributes of tokens like "unsigned" in front of it. 00398 */ 00399 void stringify(std::ostream& os, bool varid, bool attributes) const; 00400 00401 /** 00402 * Stringify a list of token, from current instance on. 00403 * @param varid Print varids. (Style: "varname@id") 00404 * @param attributes Print attributes of tokens like "unsigned" in front of it. 00405 * @param linenumbers Print line number in front of each line 00406 * @param linebreaks Insert \n into string when line number changes 00407 * @param files print Files as numbers or as names (if fileNames is given) 00408 * @param fileNames Vector of filenames. Used (if given) to print filenames as strings instead of numbers. 00409 * @param end Stringification ends before this token is reached. 0 to stringify until end of list. 00410 * @return Stringified token list as a string 00411 */ 00412 std::string stringifyList(bool varid, bool attributes, bool linenumbers, bool linebreaks, bool files, const std::vector<std::string>* fileNames = 0, const Token* end = 0) const; 00413 std::string stringifyList(const Token* end, bool attributes = true) const; 00414 std::string stringifyList(bool varid = false) const; 00415 00416 /** 00417 * Remove the contents for this token from the token list. 00418 * 00419 * The contents are replaced with the contents of the next token and 00420 * the next token is unlinked and deleted from the token list. 00421 * 00422 * So this token will still be valid after the 'deleteThis()'. 00423 */ 00424 void deleteThis(); 00425 00426 /** 00427 * Create link to given token 00428 * @param linkToToken The token where this token should link 00429 * to. 00430 */ 00431 void link(Token *linkToToken) { 00432 _link = linkToToken; 00433 if (_str == "<" || _str == ">") 00434 update_property_info(); 00435 } 00436 00437 /** 00438 * Return token where this token links to. 00439 * Supported links are: 00440 * "{" <-> "}" 00441 * "(" <-> ")" 00442 * "[" <-> "]" 00443 * 00444 * @return The token where this token links to. 00445 */ 00446 Token *link() const { 00447 return _link; 00448 } 00449 00450 /** 00451 * Associate this token with given scope 00452 * @param s Scope to be associated 00453 */ 00454 void scope(const Scope *s) { 00455 _scope = s; 00456 } 00457 00458 /** 00459 * Returns a pointer to the scope containing this token. 00460 */ 00461 const Scope *scope() const { 00462 return _scope; 00463 } 00464 00465 /** 00466 * Associate this token with given function 00467 * @param f Function to be associated 00468 */ 00469 void function(const Function *f) { 00470 _function = f; 00471 if (f) 00472 _type = eFunction; 00473 else if (_type == eFunction) 00474 _type = eName; 00475 } 00476 00477 /** 00478 * Returns a pointer to the Function associated with this token. 00479 */ 00480 const Function *function() const { 00481 return _type == eFunction ? _function : 0; 00482 } 00483 00484 /** 00485 * Associate this token with given variable 00486 * @param v Variable to be associated 00487 */ 00488 void variable(const Variable *v) { 00489 _variable = v; 00490 if (v || _varId) 00491 _type = eVariable; 00492 else if (_type == eVariable) 00493 _type = eName; 00494 } 00495 00496 /** 00497 * Returns a pointer to the variable associated with this token. 00498 */ 00499 const Variable *variable() const { 00500 return _type == eVariable ? _variable : 0; 00501 } 00502 00503 /** 00504 * Links two elements against each other. 00505 **/ 00506 static void createMutualLinks(Token *begin, Token *end); 00507 00508 /** 00509 * This can be called only for tokens that are strings, else 00510 * the assert() is called. If Token is e.g. '"hello"', this will return 00511 * 'hello' (removing the double quotes). 00512 * @return String value 00513 */ 00514 std::string strValue() const; 00515 00516 /** 00517 * Move srcStart and srcEnd tokens and all tokens between them 00518 * into new a location. Only links between tokens are changed. 00519 * @param srcStart This is the first token to be moved 00520 * @param srcEnd The last token to be moved 00521 * @param newLocation srcStart will be placed after this token. 00522 */ 00523 static void move(Token *srcStart, Token *srcEnd, Token *newLocation); 00524 00525 /** Get progressValue */ 00526 unsigned int progressValue() const { 00527 return _progressValue; 00528 } 00529 00530 /** Calculate progress values for all tokens */ 00531 void assignProgressValues() { 00532 unsigned int total_count = 0; 00533 for (Token *tok = this; tok; tok = tok->next()) 00534 ++total_count; 00535 unsigned int count = 0; 00536 for (Token *tok = this; tok; tok = tok->next()) 00537 tok->_progressValue = count++ * 100 / total_count; 00538 } 00539 00540 /** 00541 * Returns the first token of the next argument. Does only work on argument 00542 * lists. Returns 0, if there is no next argument 00543 */ 00544 Token* nextArgument() const; 00545 00546 /** 00547 * Returns the closing bracket of opening '<'. Should only be used if link() 00548 * is unavailable. 00549 * @param closing The closing token is stored in that parameter 00550 * @return success 00551 */ 00552 bool findClosingBracket(const Token*& closing) const; 00553 bool findClosingBracket(Token*& closing) const { 00554 const Token* tok; 00555 bool retVal = findClosingBracket(tok); 00556 closing = const_cast<Token*>(tok); 00557 return retVal; 00558 } 00559 00560 private: 00561 void next(Token *nextToken) { 00562 _next = nextToken; 00563 } 00564 void previous(Token *previousToken) { 00565 _previous = previousToken; 00566 } 00567 00568 /** 00569 * Works almost like strcmp() except returns only true or false and 00570 * if str has empty space ' ' character, that character is handled 00571 * as if it were '\\0' 00572 */ 00573 static bool firstWordEquals(const char *str, const char *word); 00574 00575 /** 00576 * Works almost like strchr() except 00577 * if str has empty space ' ' character, that character is handled 00578 * as if it were '\\0' 00579 */ 00580 static const char *chrInFirstWord(const char *str, char c); 00581 00582 /** 00583 * Works almost like strlen() except 00584 * if str has empty space ' ' character, that character is handled 00585 * as if it were '\\0' 00586 */ 00587 static int firstWordLen(const char *str); 00588 00589 00590 Token *_next; 00591 Token *_previous; 00592 Token *_link; 00593 00594 // symbol database information 00595 const Scope *_scope; 00596 union { 00597 const Function *_function; 00598 const Variable *_variable; 00599 }; 00600 00601 std::string _str; 00602 unsigned int _varId; 00603 unsigned int _fileIndex; 00604 unsigned int _linenr; 00605 00606 /** 00607 * A value from 0-100 that provides a rough idea about where in the token 00608 * list this token is located. 00609 */ 00610 unsigned int _progressValue; 00611 00612 Type _type; 00613 bool _isUnsigned; 00614 bool _isSigned; 00615 bool _isPointerCompare; 00616 bool _isLong; 00617 bool _isUnused; 00618 bool _isStandardType; 00619 bool _isExpandedMacro; 00620 00621 /** Updates internal property cache like _isName or _isBoolean. 00622 Called after any _str() modification. */ 00623 void update_property_info(); 00624 00625 /** Update internal property cache about isStandardType() */ 00626 void update_property_isStandardType(); 00627 00628 // AST.. 00629 Token *_astOperand1; 00630 Token *_astOperand2; 00631 Token *_astParent; 00632 public: 00633 void astOperand1(Token *tok); 00634 void astOperand2(Token *tok); 00635 void astFunctionCall(); 00636 void astHandleParentheses(); 00637 00638 const Token * astOperand1() const { 00639 return _astOperand1; 00640 } 00641 const Token * astOperand2() const { 00642 return _astOperand2; 00643 } 00644 const Token *astTop() const { 00645 const Token *ret = this; 00646 while (ret->_astParent) 00647 ret = ret->_astParent; 00648 return ret; 00649 } 00650 00651 std::string astString() const { 00652 std::string ret; 00653 if (_astOperand1) 00654 ret = _astOperand1->astString(); 00655 if (_astOperand2) 00656 ret += _astOperand2->astString(); 00657 return ret+_str; 00658 } 00659 }; 00660 00661 /// @} 00662 00663 #endif // TokenH
1.7.6.1