diff options
Diffstat (limited to 'include/expr/Lexer.h')
-rw-r--r-- | include/expr/Lexer.h | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/include/expr/Lexer.h b/include/expr/Lexer.h new file mode 100644 index 00000000..4ae760a0 --- /dev/null +++ b/include/expr/Lexer.h @@ -0,0 +1,114 @@ +//===-- Lexer.h -------------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_EXPR_LEXER_H +#define KLEE_EXPR_LEXER_H + +#include <string> + +namespace llvm { + class MemoryBuffer; +} + +namespace klee { +namespace expr { + struct Token { + enum Kind { + At, /// '@' + Arrow, /// '->' + Colon, /// ':' + Comma, /// ',' + Comment, /// #[^\n]+ + EndOfFile, /// <end of file> + Equals, /// ' = ' + Identifier, /// [a-zA-Z_][a-zA-Z0-9._]* + KWFalse, /// 'false' + KWQuery, /// 'query' + KWReserved, /// fp[0-9]+([.].*)?, i[0-9]+ + KWTrue, /// 'true' + KWWidth, /// w[0-9]+ + LBrace, /// '{' + LParen, /// '(' + LSquare, /// '[' + Number, /// [+-]?[0-9][a-zA-Z0-9_]+ + RBrace, /// '}' + RParen, /// ')' + RSquare, /// ']' + Semicolon, /// ';' + Unknown /// <other> + }; + + Kind kind; /// The token kind. + const char *start; /// The beginning of the token string. + unsigned length; /// The length of the token. + unsigned line; /// The line number of the start of this token. + unsigned column; /// The column number at the start of + /// this token. + + /// getKindName - The name of this token's kind. + const char *getKindName() const; + + /// getString - The string spanned by this token. This is not + /// particularly efficient, use start and length when reasonable. + std::string getString() const { return std::string(start, length); } + + /// isKeyword - True if this token is a keyword. + bool isKeyword() const { + return kind >= KWFalse && kind <= KWTrue; + } + + // dump - Dump the token to stderr. + void dump(); + }; + + /// Lexer - Interface for lexing tokens from a .pc language file. + class Lexer { + const char *BufferPos; /// The current lexer position. + const char *BufferEnd; /// The buffer end position. + unsigned LineNumber; /// The current line. + unsigned ColumnNumber; /// The current column. + + /// GetNextChar - Eat a character or -1 from the stream. + int GetNextChar(); + + /// PeekNextChar - Return the next character without consuming it + /// from the stream. This does not perform newline + /// canonicalization. + int PeekNextChar(); + + /// SetTokenKind - Set the token kind and length (using the + /// token's start pointer, which must have been initialized). + Token &SetTokenKind(Token &Result, Token::Kind k); + + /// SetTokenKind - Set an identifiers token kind. This has the + /// same requirements as SetTokenKind and additionally takes care + /// of keyword recognition. + Token &SetIdentifierTokenKind(Token &Result); + + void SkipToEndOfLine(); + + /// LexNumber - Lex a number which does not have a base specifier. + Token &LexNumber(Token &Result); + + /// LexIdentifier - Lex an identifier. + Token &LexIdentifier(Token &Result); + + public: + explicit Lexer(const llvm::MemoryBuffer *_buf); + ~Lexer(); + + /// Lex - Return the next token from the file or EOF continually + /// when the end of the file is reached. The input argument is + /// used as the result, for convenience. + Token &Lex(Token &Result); + }; +} +} + +#endif |