Cppcheck
token.h
Go to the documentation of this file.
00001 /*
00002  * Cppcheck - A tool for static C/C++ code analysis
00003  * Copyright (C) 2007-2013 Daniel Marjamäki and Cppcheck team.
00004  *
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  *
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  */
00018 
00019 #ifndef TokenH
00020 #define TokenH
00021 
00022 #include <string>
00023 #include <vector>
00024 #include <ostream>
00025 #include "config.h"
00026 
00027 class Scope;
00028 class Function;
00029 class Variable;
00030 
00031 /// @addtogroup Core
00032 /// @{
00033 
00034 /**
00035  * @brief The token list that the TokenList generates is a linked-list of this class.
00036  *
00037  * Tokens are stored as strings. The "if", "while", etc are stored in plain text.
00038  * The reason the Token class is needed (instead of using the string class) is that some extra functionality is also needed for tokens:
00039  *  - location of the token is stored (linenr, fileIndex)
00040  *  - functions for classifying the token (isName, isNumber, isBoolean, isStandardType)
00041  *
00042  * The Token class also has other functions for management of token list, matching tokens, etc.
00043  */
00044 class CPPCHECKLIB Token {
00045 private:
00046     Token **tokensBack;
00047 
00048     // Not implemented..
00049     Token();
00050     Token(const Token &);
00051     Token operator=(const Token &);
00052 
00053 public:
00054     enum Type {
00055         eVariable, eType, eFunction, eName, // Names: Variable (varId), Type (typeId, later), Function (FuncId, later), Name (unknown identifier)
00056         eNumber, eString, eChar, eBoolean, eLiteral, // Literals: Number, String, Character, User defined literal (C++11)
00057         eArithmeticalOp, eComparisonOp, eAssignmentOp, eLogicalOp, eBitOp, eIncDecOp, eExtendedOp, // Operators: Arithmetical, Comparison, Assignment, Logical, Bitwise, ++/--, Extended
00058         eBracket, // {, }, <, >: < and > only if link() is set. Otherwise they are comparison operators.
00059         eOther,
00060         eNone
00061     };
00062 
00063     explicit Token(Token **tokensBack);
00064     ~Token();
00065 
00066     void str(const std::string &s);
00067 
00068     /**
00069      * Concatenate two (quoted) strings. Automatically cuts of the last/first character.
00070      * Example: "hello ""world" -> "hello world". Used by the token simplifier.
00071      */
00072     void concatStr(std::string const& b);
00073 
00074     const std::string &str() const {
00075         return _str;
00076     }
00077 
00078     /**
00079      * Unlink and delete the next 'index' tokens.
00080      */
00081     void deleteNext(unsigned long index = 1);
00082 
00083     /**
00084      * Returns token in given index, related to this token.
00085      * For example index 1 would return next token, and 2
00086      * would return next from that one.
00087      */
00088     const Token *tokAt(int index) const;
00089     Token *tokAt(int index) {
00090         return const_cast<Token *>(static_cast<const Token *>(this)->tokAt(index));
00091     }
00092 
00093     /**
00094      * Returns the link to the token in given index, related to this token.
00095      * For example index 1 would return the link to next token.
00096      */
00097     const Token *linkAt(int index) const;
00098     Token *linkAt(int index) {
00099         return const_cast<Token *>(static_cast<const Token *>(this)->linkAt(index));
00100     }
00101 
00102     const std::string &strAt(int index) const;
00103 
00104     /**
00105      * Match given token (or list of tokens) to a pattern list.
00106      *
00107      * Possible patterns
00108      * "someRandomText" If token contains "someRandomText".
00109      * @note Use Match() if you want to use flags in patterns
00110      *
00111      * The patterns can be also combined to compare to multiple tokens at once
00112      * by separating tokens with a space, e.g.
00113      * ") void {" will return true if first token is ')' next token
00114      * is "void" and token after that is '{'. If even one of the tokens does
00115      * not match its pattern, false is returned.
00116      *
00117      * @param tok List of tokens to be compared to the pattern
00118      * @param pattern The pattern against which the tokens are compared,
00119      * e.g. "const" or ") void {".
00120      * @return true if given token matches with given pattern
00121      *         false if given token does not match with given pattern
00122      */
00123     static bool simpleMatch(const Token *tok, const char pattern[]);
00124 
00125     /**
00126      * Match given token (or list of tokens) to a pattern list.
00127      *
00128      * Possible patterns
00129      * - "%any%" any token
00130      * - "%var%" any token which is a name or type e.g. "hello" or "int"
00131      * - "%type%" Anything that can be a variable type, e.g. "int", but not "delete".
00132      * - "%num%" Any numeric token, e.g. "23"
00133      * - "%bool%" true or false
00134      * - "%char%" Any token enclosed in &apos;-character.
00135      * - "%comp%" Any token such that isComparisonOp() returns true.
00136      * - "%str%" Any token starting with &quot;-character (C-string).
00137      * - "%varid%" Match with parameter varid
00138      * - "%op%" Any token such that isOp() returns true.
00139      * - "%cop%" Any token such that isConstOp() returns true.
00140      * - "%or%" A bitwise-or operator '|'
00141      * - "%oror%" A logical-or operator '||'
00142      * - "[abc]" Any of the characters 'a' or 'b' or 'c'
00143      * - "int|void|char" Any of the strings, int, void or char
00144      * - "int|void|char|" Any of the strings, int, void or char or empty string
00145      * - "!!else" No tokens or any token that is not "else".
00146      * - "someRandomText" If token contains "someRandomText".
00147      *
00148      * multi-compare patterns such as "int|void|char" can contain %or%, %oror% and %op%
00149      * but it is not recommended to put such an %cmd% as the first pattern.
00150      *
00151      * It's possible to use multi-compare patterns with all the other %cmds%,
00152      * except for %varid%, and normal names, but the %cmds% should be put as
00153      * the first patterns in the list, then the normal names.
00154      * For example: "%var%|%num%|)" means yes to a variable, a number or ')'.
00155      *
00156      * @todo Make it possible to use the %cmds% and the normal names in the
00157      * multicompare list without an order.
00158      *
00159      * The patterns can be also combined to compare to multiple tokens at once
00160      * by separating tokens with a space, e.g.
00161      * ") const|void {" will return true if first token is ')' next token is either
00162      * "const" or "void" and token after that is '{'. If even one of the tokens does not
00163      * match its pattern, false is returned.
00164      *
00165      * @param tok List of tokens to be compared to the pattern
00166      * @param pattern The pattern against which the tokens are compared,
00167      * e.g. "const" or ") const|volatile| {".
00168      * @param varid if %varid% is given in the pattern the Token::varId
00169      * will be matched against this argument
00170      * @return true if given token matches with given pattern
00171      *         false if given token does not match with given pattern
00172      */
00173     static bool Match(const Token *tok, const char pattern[], unsigned int varid = 0);
00174 
00175     /**
00176      * Return length of C-string.
00177      *
00178      * Should be called for %str% tokens only.
00179      *
00180      * @param tok token with C-string
00181      **/
00182     static std::size_t getStrLength(const Token *tok);
00183 
00184     /**
00185      * Return char of C-string at index (possible escaped "\\n")
00186      *
00187      * Should be called for %str% tokens only.
00188      *
00189      * @param tok token with C-string
00190      * @param index position of character
00191      **/
00192     static std::string getCharAt(const Token *tok, std::size_t index);
00193 
00194     Type type() const {
00195         return _type;
00196     }
00197     void type(Type t) {
00198         _type = t;
00199     }
00200     bool isName() const {
00201         return _type == eName || _type == eType || _type == eVariable || _type == eFunction ||
00202                _type == eBoolean; // TODO: "true"/"false" aren't really a name...
00203     }
00204     bool isUpperCaseName() const;
00205     bool isLiteral() const {
00206         return _type == eNumber || _type == eString || _type == eChar ||
00207                _type == eBoolean || _type == eLiteral;
00208     }
00209     bool isNumber() const {
00210         return _type == eNumber;
00211     }
00212     bool isOp() const {
00213         return (isConstOp() ||
00214                 isAssignmentOp() ||
00215                 _type == eIncDecOp);
00216     }
00217     bool isConstOp() const {
00218         return (isArithmeticalOp() ||
00219                 _type == eLogicalOp ||
00220                 _type == eComparisonOp ||
00221                 _type == eBitOp);
00222     }
00223     bool isExtendedOp() const {
00224         return isConstOp() ||
00225                _type == eExtendedOp;
00226     }
00227     bool isArithmeticalOp() const {
00228         return _type == eArithmeticalOp;
00229     }
00230     bool isComparisonOp() const {
00231         return _type == eComparisonOp;
00232     }
00233     bool isAssignmentOp() const {
00234         return _type == eAssignmentOp;
00235     }
00236     bool isBoolean() const {
00237         return _type == eBoolean;
00238     }
00239 
00240     bool isUnsigned() const {
00241         return _isUnsigned;
00242     }
00243     void isUnsigned(bool sign) {
00244         _isUnsigned = sign;
00245     }
00246     bool isSigned() const {
00247         return _isSigned;
00248     }
00249     void isSigned(bool sign) {
00250         _isSigned = sign;
00251     }
00252     bool isPointerCompare() const {
00253         return _isPointerCompare;
00254     }
00255     void isPointerCompare(bool b) {
00256         _isPointerCompare = b;
00257     }
00258     bool isLong() const {
00259         return _isLong;
00260     }
00261     void isLong(bool size) {
00262         _isLong = size;
00263     }
00264     bool isUnused() const {
00265         return _isUnused;
00266     }
00267     void isUnused(bool used) {
00268         _isUnused = used;
00269     }
00270     bool isStandardType() const {
00271         return _isStandardType;
00272     }
00273     bool isExpandedMacro() const {
00274         return _isExpandedMacro;
00275     }
00276     void setExpandedMacro(bool m) {
00277         _isExpandedMacro = m;
00278     }
00279 
00280     static const Token *findsimplematch(const Token *tok, const char pattern[]);
00281     static const Token *findsimplematch(const Token *tok, const char pattern[], const Token *end);
00282     static const Token *findmatch(const Token *tok, const char pattern[], unsigned int varId = 0);
00283     static const Token *findmatch(const Token *tok, const char pattern[], const Token *end, unsigned int varId = 0);
00284     static Token *findsimplematch(Token *tok, const char pattern[]) {
00285         return const_cast<Token *>(findsimplematch(static_cast<const Token *>(tok), pattern));
00286     }
00287     static Token *findsimplematch(Token *tok, const char pattern[], const Token *end) {
00288         return const_cast<Token *>(findsimplematch(static_cast<const Token *>(tok), pattern, end));
00289     }
00290     static Token *findmatch(Token *tok, const char pattern[], unsigned int varId = 0) {
00291         return const_cast<Token *>(findmatch(static_cast<const Token *>(tok), pattern, varId));
00292     }
00293     static Token *findmatch(Token *tok, const char pattern[], const Token *end, unsigned int varId = 0) {
00294         return const_cast<Token *>(findmatch(static_cast<const Token *>(tok), pattern, end, varId));
00295     }
00296 
00297     /**
00298      * Needle is build from multiple alternatives. If one of
00299      * them is equal to haystack, return value is 1. If there
00300      * are no matches, but one alternative to needle is empty
00301      * string, return value is 0. If needle was not found, return
00302      * value is -1.
00303      *
00304      * @param tok Current token
00305      * @param haystack e.g. "one|two" or "|one|two"
00306      * @param needle e.g. "one", "two" or "invalid"
00307      * @return 1 if needle is found from the haystack
00308      *         0 if needle was empty string
00309      *        -1 if needle was not found
00310      */
00311     static int multiCompare(const Token *tok, const char *haystack, const char *needle);
00312 
00313     unsigned int linenr() const {
00314         return _linenr;
00315     }
00316     void linenr(unsigned int lineNumber) {
00317         _linenr = lineNumber;
00318     }
00319 
00320     unsigned int fileIndex() const {
00321         return _fileIndex;
00322     }
00323     void fileIndex(unsigned int indexOfFile) {
00324         _fileIndex = indexOfFile;
00325     }
00326 
00327     Token *next() const {
00328         return _next;
00329     }
00330 
00331 
00332     /**
00333      * Delete tokens between begin and end. E.g. if begin = 1
00334      * and end = 5, tokens 2,3 and 4 would be erased.
00335      *
00336      * @param begin Tokens after this will be erased.
00337      * @param end Tokens before this will be erased.
00338      */
00339     static void eraseTokens(Token *begin, const Token *end);
00340 
00341     /**
00342      * Insert new token after this token. This function will handle
00343      * relations between next and previous token also.
00344      * @param tokenStr String for the new token.
00345      * @param prepend Insert the new token before this token when it's not
00346      * the first one on the tokens list.
00347      */
00348     void insertToken(const std::string &tokenStr, bool prepend=false);
00349 
00350     Token *previous() const {
00351         return _previous;
00352     }
00353 
00354 
00355     unsigned int varId() const {
00356         return _varId;
00357     }
00358     void varId(unsigned int id) {
00359         _varId = id;
00360         if (id != 0)
00361             _type = eVariable;
00362         else
00363             update_property_info();
00364     }
00365 
00366     /**
00367      * For debugging purposes, prints token and all tokens
00368      * followed by it.
00369      * @param title Title for the printout or use default parameter or 0
00370      * for no title.
00371      */
00372     void printOut(const char *title = 0) const;
00373 
00374     /**
00375      * For debugging purposes, prints token and all tokens
00376      * followed by it.
00377      * @param title Title for the printout or use default parameter or 0
00378      * for no title.
00379      * @param fileNames Prints out file name instead of file index.
00380      * File index should match the index of the string in this vector.
00381      */
00382     void printOut(const char *title, const std::vector<std::string> &fileNames) const;
00383 
00384     /**
00385      * Replace token replaceThis with tokens between start and end,
00386      * including start and end. The replaceThis token is deleted.
00387      * @param replaceThis This token will be deleted.
00388      * @param start This will be in the place of replaceThis
00389      * @param end This is also in the place of replaceThis
00390      */
00391     static void replace(Token *replaceThis, Token *start, Token *end);
00392 
00393     /**
00394      * Stringify a token
00395      * @param os The result is shifted into that output stream
00396      * @param varid Print varids. (Style: "varname@id")
00397      * @param attributes Print attributes of tokens like "unsigned" in front of it.
00398      */
00399     void stringify(std::ostream& os, bool varid, bool attributes) const;
00400 
00401     /**
00402      * Stringify a list of token, from current instance on.
00403      * @param varid Print varids. (Style: "varname@id")
00404      * @param attributes Print attributes of tokens like "unsigned" in front of it.
00405      * @param linenumbers Print line number in front of each line
00406      * @param linebreaks Insert \n into string when line number changes
00407      * @param files print Files as numbers or as names (if fileNames is given)
00408      * @param fileNames Vector of filenames. Used (if given) to print filenames as strings instead of numbers.
00409      * @param end Stringification ends before this token is reached. 0 to stringify until end of list.
00410      * @return Stringified token list as a string
00411      */
00412     std::string stringifyList(bool varid, bool attributes, bool linenumbers, bool linebreaks, bool files, const std::vector<std::string>* fileNames = 0, const Token* end = 0) const;
00413     std::string stringifyList(const Token* end, bool attributes = true) const;
00414     std::string stringifyList(bool varid = false) const;
00415 
00416     /**
00417      * Remove the contents for this token from the token list.
00418      *
00419      * The contents are replaced with the contents of the next token and
00420      * the next token is unlinked and deleted from the token list.
00421      *
00422      * So this token will still be valid after the 'deleteThis()'.
00423      */
00424     void deleteThis();
00425 
00426     /**
00427      * Create link to given token
00428      * @param linkToToken The token where this token should link
00429      * to.
00430      */
00431     void link(Token *linkToToken) {
00432         _link = linkToToken;
00433         if (_str == "<" || _str == ">")
00434             update_property_info();
00435     }
00436 
00437     /**
00438      * Return token where this token links to.
00439      * Supported links are:
00440      * "{" <-> "}"
00441      * "(" <-> ")"
00442      * "[" <-> "]"
00443      *
00444      * @return The token where this token links to.
00445      */
00446     Token *link() const {
00447         return _link;
00448     }
00449 
00450     /**
00451      * Associate this token with given scope
00452      * @param s Scope to be associated
00453      */
00454     void scope(const Scope *s) {
00455         _scope = s;
00456     }
00457 
00458     /**
00459      * Returns a pointer to the scope containing this token.
00460      */
00461     const Scope *scope() const {
00462         return _scope;
00463     }
00464 
00465     /**
00466      * Associate this token with given function
00467      * @param f Function to be associated
00468      */
00469     void function(const Function *f) {
00470         _function = f;
00471         if (f)
00472             _type = eFunction;
00473         else if (_type == eFunction)
00474             _type = eName;
00475     }
00476 
00477     /**
00478      * Returns a pointer to the Function associated with this token.
00479      */
00480     const Function *function() const {
00481         return _type == eFunction ? _function : 0;
00482     }
00483 
00484     /**
00485      * Associate this token with given variable
00486      * @param v Variable to be associated
00487      */
00488     void variable(const Variable *v) {
00489         _variable = v;
00490         if (v || _varId)
00491             _type = eVariable;
00492         else if (_type == eVariable)
00493             _type = eName;
00494     }
00495 
00496     /**
00497      * Returns a pointer to the variable associated with this token.
00498      */
00499     const Variable *variable() const {
00500         return _type == eVariable ? _variable : 0;
00501     }
00502 
00503     /**
00504      * Links two elements against each other.
00505      **/
00506     static void createMutualLinks(Token *begin, Token *end);
00507 
00508     /**
00509      * This can be called only for tokens that are strings, else
00510      * the assert() is called. If Token is e.g. '"hello"', this will return
00511      * 'hello' (removing the double quotes).
00512      * @return String value
00513      */
00514     std::string strValue() const;
00515 
00516     /**
00517      * Move srcStart and srcEnd tokens and all tokens between them
00518      * into new a location. Only links between tokens are changed.
00519      * @param srcStart This is the first token to be moved
00520      * @param srcEnd The last token to be moved
00521      * @param newLocation srcStart will be placed after this token.
00522      */
00523     static void move(Token *srcStart, Token *srcEnd, Token *newLocation);
00524 
00525     /** Get progressValue */
00526     unsigned int progressValue() const {
00527         return _progressValue;
00528     }
00529 
00530     /** Calculate progress values for all tokens */
00531     void assignProgressValues() {
00532         unsigned int total_count = 0;
00533         for (Token *tok = this; tok; tok = tok->next())
00534             ++total_count;
00535         unsigned int count = 0;
00536         for (Token *tok = this; tok; tok = tok->next())
00537             tok->_progressValue = count++ * 100 / total_count;
00538     }
00539 
00540     /**
00541      * Returns the first token of the next argument. Does only work on argument
00542      * lists. Returns 0, if there is no next argument
00543      */
00544     Token* nextArgument() const;
00545 
00546     /**
00547      * Returns the closing bracket of opening '<'. Should only be used if link()
00548      * is unavailable.
00549      * @param closing The closing token is stored in that parameter
00550      * @return success
00551      */
00552     bool findClosingBracket(const Token*& closing) const;
00553     bool findClosingBracket(Token*& closing) const {
00554         const Token* tok;
00555         bool retVal = findClosingBracket(tok);
00556         closing = const_cast<Token*>(tok);
00557         return retVal;
00558     }
00559 
00560 private:
00561     void next(Token *nextToken) {
00562         _next = nextToken;
00563     }
00564     void previous(Token *previousToken) {
00565         _previous = previousToken;
00566     }
00567 
00568     /**
00569      * Works almost like strcmp() except returns only true or false and
00570      * if str has empty space &apos; &apos; character, that character is handled
00571      * as if it were &apos;\\0&apos;
00572      */
00573     static bool firstWordEquals(const char *str, const char *word);
00574 
00575     /**
00576      * Works almost like strchr() except
00577      * if str has empty space &apos; &apos; character, that character is handled
00578      * as if it were &apos;\\0&apos;
00579      */
00580     static const char *chrInFirstWord(const char *str, char c);
00581 
00582     /**
00583      * Works almost like strlen() except
00584      * if str has empty space &apos; &apos; character, that character is handled
00585      * as if it were &apos;\\0&apos;
00586      */
00587     static int firstWordLen(const char *str);
00588 
00589 
00590     Token *_next;
00591     Token *_previous;
00592     Token *_link;
00593 
00594     // symbol database information
00595     const Scope *_scope;
00596     union {
00597         const Function *_function;
00598         const Variable *_variable;
00599     };
00600 
00601     std::string _str;
00602     unsigned int _varId;
00603     unsigned int _fileIndex;
00604     unsigned int _linenr;
00605 
00606     /**
00607      * A value from 0-100 that provides a rough idea about where in the token
00608      * list this token is located.
00609      */
00610     unsigned int _progressValue;
00611 
00612     Type _type;
00613     bool _isUnsigned;
00614     bool _isSigned;
00615     bool _isPointerCompare;
00616     bool _isLong;
00617     bool _isUnused;
00618     bool _isStandardType;
00619     bool _isExpandedMacro;
00620 
00621     /** Updates internal property cache like _isName or _isBoolean.
00622         Called after any _str() modification. */
00623     void update_property_info();
00624 
00625     /** Update internal property cache about isStandardType() */
00626     void update_property_isStandardType();
00627 
00628     // AST..
00629     Token *_astOperand1;
00630     Token *_astOperand2;
00631     Token *_astParent;
00632 public:
00633     void astOperand1(Token *tok);
00634     void astOperand2(Token *tok);
00635     void astFunctionCall();
00636     void astHandleParentheses();
00637 
00638     const Token * astOperand1() const {
00639         return _astOperand1;
00640     }
00641     const Token * astOperand2() const {
00642         return _astOperand2;
00643     }
00644     const Token *astTop() const {
00645         const Token *ret = this;
00646         while (ret->_astParent)
00647             ret = ret->_astParent;
00648         return ret;
00649     }
00650 
00651     std::string astString() const {
00652         std::string ret;
00653         if (_astOperand1)
00654             ret = _astOperand1->astString();
00655         if (_astOperand2)
00656             ret += _astOperand2->astString();
00657         return ret+_str;
00658     }
00659 };
00660 
00661 /// @}
00662 
00663 #endif // TokenH