diff options
author | Daniel Dunbar <daniel@zuster.org> | 2009-05-21 04:36:41 +0000 |
---|---|---|
committer | Daniel Dunbar <daniel@zuster.org> | 2009-05-21 04:36:41 +0000 |
commit | 6f290d8f9e9d7faac295cb51fc96884a18f4ded4 (patch) | |
tree | 46e7d426abc0c9f06ac472ac6f7f9e661b5d78cb /lib/Expr/Lexer.cpp | |
parent | a55960edd4dcd7535526de8d2277642522aa0209 (diff) | |
download | klee-6f290d8f9e9d7faac295cb51fc96884a18f4ded4.tar.gz |
Initial KLEE checkin.
- Lots more tweaks, documentation, and web page content is needed, but this should compile & work on OS X & Linux. git-svn-id: https://llvm.org/svn/llvm-project/klee/trunk@72205 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Expr/Lexer.cpp')
-rw-r--r-- | lib/Expr/Lexer.cpp | 261 |
1 files changed, 261 insertions, 0 deletions
diff --git a/lib/Expr/Lexer.cpp b/lib/Expr/Lexer.cpp new file mode 100644 index 00000000..77e25f62 --- /dev/null +++ b/lib/Expr/Lexer.cpp @@ -0,0 +1,261 @@ +//===-- Lexer.cpp ---------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "expr/Lexer.h" + +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Streams.h" + +#include <iomanip> +#include <iostream> +#include <string.h> + +using namespace llvm; +using namespace klee; +using namespace klee::expr; + +/// + +const char *Token::getKindName() const { + switch (kind) { + default: + case Unknown: return "Unknown"; + case Arrow: return "Arrow"; + case At: return "At"; + case Colon: return "Colon"; + case Comma: return "Comma"; + case Comment: return "Comment"; + case EndOfFile: return "EndOfFile"; + case Equals: return "Equals"; + case Identifier: return "Identifier"; + case KWFalse: return "KWFalse"; + case KWQuery: return "KWQuery"; + case KWReserved: return "KWReserved"; + case KWTrue: return "KWTrue"; + case KWWidth: return "KWWidth"; + case LBrace: return "LBrace"; + case LParen: return "LParen"; + case LSquare: return "LSquare"; + case Number: return "Number"; + case RBrace: return "RBrace"; + case RParen: return "RParen"; + case RSquare: return "RSquare"; + case Semicolon: return "Semicolon"; + } +} + +void Token::dump() { + llvm::cerr << "(Token \"" << getKindName() << "\" " + << (void*) start << " " << length << " " + << line << " " << column << ")"; +} + +/// + +static inline bool isInternalIdentifierChar(int Char) { + return isalnum(Char) || Char == '_' || Char == '.'; +} + +Lexer::Lexer(const llvm::MemoryBuffer *MB) + : BufferPos(MB->getBufferStart()), BufferEnd(MB->getBufferEnd()), + LineNumber(1), ColumnNumber(0) { +} + +Lexer::~Lexer() { +} + +int Lexer::PeekNextChar() { + if (BufferPos == BufferEnd) + return -1; + return *BufferPos; +} + +int Lexer::GetNextChar() { + if (BufferPos == BufferEnd) + return -1; + + // Handle DOS/Mac newlines here, by stripping duplicates and by + // returning '\n' for both. + char Result = *BufferPos++; + if (Result == '\n' || Result == '\r') { + if (BufferPos != BufferEnd && *BufferPos == ('\n' + '\r' - Result)) + ++BufferPos; + Result = '\n'; + } + + if (Result == '\n') { + ++LineNumber; + ColumnNumber = 0; + } else { + ++ColumnNumber; + } + + return Result; +} + +Token &Lexer::SetTokenKind(Token &Result, Token::Kind k) { + Result.kind = k; + Result.length = BufferPos - Result.start; + return Result; +} + +static bool isReservedKW(const char *Str, unsigned N) { + unsigned i; + + // Check for i[0-9]+ + if (N>1 && Str[0] == 'i') { + for (i=1; i<N; ++i) + if (!isdigit(Str[i])) + break; + if (i==N) + return true; + } + + // Check for fp[0-9]+([.].*)?$ + if (N>3 && Str[0]=='f' && Str[1]=='p' && isdigit(Str[2])) { + for (i=3; i<N; ++i) + if (!isdigit(Str[i])) + break; + if (i==N || Str[i]=='.') + return true; + } + + return false; +} +static bool isWidthKW(const char *Str, unsigned N) { + if (N<2 || Str[0] != 'w') + return false; + for (unsigned i=1; i<N; ++i) + if (!isdigit(Str[i])) + return false; + return true; +} +Token &Lexer::SetIdentifierTokenKind(Token &Result) { + unsigned Length = BufferPos - Result.start; + switch (Length) { + case 3: + if (memcmp("def", Result.start, 3) == 0) + return SetTokenKind(Result, Token::KWReserved); + if (memcmp("var", Result.start, 3) == 0) + return SetTokenKind(Result, Token::KWReserved); + break; + + case 4: + if (memcmp("true", Result.start, 4) == 0) + return SetTokenKind(Result, Token::KWTrue); + break; + + case 5: + if (memcmp("array", Result.start, 5) == 0) + return SetTokenKind(Result, Token::KWReserved); + if (memcmp("false", Result.start, 5) == 0) + return SetTokenKind(Result, Token::KWFalse); + if (memcmp("query", Result.start, 5) == 0) + return SetTokenKind(Result, Token::KWQuery); + break; + + case 6: + if (memcmp("define", Result.start, 6) == 0) + return SetTokenKind(Result, Token::KWReserved); + break; + + case 7: + if (memcmp("declare", Result.start, 7) == 0) + return SetTokenKind(Result, Token::KWReserved); + break; + } + + if (isReservedKW(Result.start, Length)) + return SetTokenKind(Result, Token::KWReserved); + if (isWidthKW(Result.start, Length)) + return SetTokenKind(Result, Token::KWWidth); + + return SetTokenKind(Result, Token::Identifier); +} + +void Lexer::SkipToEndOfLine() { + for (;;) { + int Char = GetNextChar(); + if (Char == -1 || Char =='\n') + break; + } +} + +Token &Lexer::LexNumber(Token &Result) { + while (isalnum(PeekNextChar()) || PeekNextChar()=='_') + GetNextChar(); + return SetTokenKind(Result, Token::Number); +} + +Token &Lexer::LexIdentifier(Token &Result) { + while (isInternalIdentifierChar(PeekNextChar())) + GetNextChar(); + + // Recognize keywords specially. + return SetIdentifierTokenKind(Result); +} + +Token &Lexer::Lex(Token &Result) { + Result.kind = Token::Unknown; + Result.length = 0; + Result.start = BufferPos; + + // Skip whitespace. + while (isspace(PeekNextChar())) + GetNextChar(); + + Result.start = BufferPos; + Result.line = LineNumber; + Result.column = ColumnNumber; + int Char = GetNextChar(); + switch (Char) { + case -1: return SetTokenKind(Result, Token::EndOfFile); + + case '(': return SetTokenKind(Result, Token::LParen); + case ')': return SetTokenKind(Result, Token::RParen); + case ',': return SetTokenKind(Result, Token::Comma); + case ':': return SetTokenKind(Result, Token::Colon); + case ';': return SetTokenKind(Result, Token::Semicolon); + case '=': return SetTokenKind(Result, Token::Equals); + case '@': return SetTokenKind(Result, Token::At); + case '[': return SetTokenKind(Result, Token::LSquare); + case ']': return SetTokenKind(Result, Token::RSquare); + case '{': return SetTokenKind(Result, Token::LBrace); + case '}': return SetTokenKind(Result, Token::RBrace); + + case '#': + SkipToEndOfLine(); + return SetTokenKind(Result, Token::Comment); + + case '+': { + if (isdigit(PeekNextChar())) + return LexNumber(Result); + else + return SetTokenKind(Result, Token::Unknown); + } + + case '-': { + int Next = PeekNextChar(); + if (Next == '>') + return GetNextChar(), SetTokenKind(Result, Token::Arrow); + else if (isdigit(Next)) + return LexNumber(Result); + else + return SetTokenKind(Result, Token::Unknown); + break; + } + + default: + if (isdigit(Char)) + return LexNumber(Result); + else if (isalpha(Char) || Char == '_') + return LexIdentifier(Result); + return SetTokenKind(Result, Token::Unknown); + } +} |