aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/Expr/Lexer.cpp
diff options
context:
space:
mode:
authorDaniel Dunbar <daniel@zuster.org>2009-05-21 04:36:41 +0000
committerDaniel Dunbar <daniel@zuster.org>2009-05-21 04:36:41 +0000
commit6f290d8f9e9d7faac295cb51fc96884a18f4ded4 (patch)
tree46e7d426abc0c9f06ac472ac6f7f9e661b5d78cb /lib/Expr/Lexer.cpp
parenta55960edd4dcd7535526de8d2277642522aa0209 (diff)
downloadklee-6f290d8f9e9d7faac295cb51fc96884a18f4ded4.tar.gz
Initial KLEE checkin.
- Lots more tweaks, documentation, and web page content is needed, but this should compile & work on OS X & Linux. git-svn-id: https://llvm.org/svn/llvm-project/klee/trunk@72205 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Expr/Lexer.cpp')
-rw-r--r--lib/Expr/Lexer.cpp261
1 files changed, 261 insertions, 0 deletions
diff --git a/lib/Expr/Lexer.cpp b/lib/Expr/Lexer.cpp
new file mode 100644
index 00000000..77e25f62
--- /dev/null
+++ b/lib/Expr/Lexer.cpp
@@ -0,0 +1,261 @@
+//===-- Lexer.cpp ---------------------------------------------------------===//
+//
+// The KLEE Symbolic Virtual Machine
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "expr/Lexer.h"
+
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Streams.h"
+
+#include <iomanip>
+#include <iostream>
+#include <string.h>
+
+using namespace llvm;
+using namespace klee;
+using namespace klee::expr;
+
+///
+
+const char *Token::getKindName() const {
+ switch (kind) {
+ default:
+ case Unknown: return "Unknown";
+ case Arrow: return "Arrow";
+ case At: return "At";
+ case Colon: return "Colon";
+ case Comma: return "Comma";
+ case Comment: return "Comment";
+ case EndOfFile: return "EndOfFile";
+ case Equals: return "Equals";
+ case Identifier: return "Identifier";
+ case KWFalse: return "KWFalse";
+ case KWQuery: return "KWQuery";
+ case KWReserved: return "KWReserved";
+ case KWTrue: return "KWTrue";
+ case KWWidth: return "KWWidth";
+ case LBrace: return "LBrace";
+ case LParen: return "LParen";
+ case LSquare: return "LSquare";
+ case Number: return "Number";
+ case RBrace: return "RBrace";
+ case RParen: return "RParen";
+ case RSquare: return "RSquare";
+ case Semicolon: return "Semicolon";
+ }
+}
+
+void Token::dump() {
+ llvm::cerr << "(Token \"" << getKindName() << "\" "
+ << (void*) start << " " << length << " "
+ << line << " " << column << ")";
+}
+
+///
+
+static inline bool isInternalIdentifierChar(int Char) {
+ return isalnum(Char) || Char == '_' || Char == '.';
+}
+
+Lexer::Lexer(const llvm::MemoryBuffer *MB)
+ : BufferPos(MB->getBufferStart()), BufferEnd(MB->getBufferEnd()),
+ LineNumber(1), ColumnNumber(0) {
+}
+
+Lexer::~Lexer() {
+}
+
+int Lexer::PeekNextChar() {
+ if (BufferPos == BufferEnd)
+ return -1;
+ return *BufferPos;
+}
+
+int Lexer::GetNextChar() {
+ if (BufferPos == BufferEnd)
+ return -1;
+
+ // Handle DOS/Mac newlines here, by stripping duplicates and by
+ // returning '\n' for both.
+ char Result = *BufferPos++;
+ if (Result == '\n' || Result == '\r') {
+ if (BufferPos != BufferEnd && *BufferPos == ('\n' + '\r' - Result))
+ ++BufferPos;
+ Result = '\n';
+ }
+
+ if (Result == '\n') {
+ ++LineNumber;
+ ColumnNumber = 0;
+ } else {
+ ++ColumnNumber;
+ }
+
+ return Result;
+}
+
+Token &Lexer::SetTokenKind(Token &Result, Token::Kind k) {
+ Result.kind = k;
+ Result.length = BufferPos - Result.start;
+ return Result;
+}
+
+static bool isReservedKW(const char *Str, unsigned N) {
+ unsigned i;
+
+ // Check for i[0-9]+
+ if (N>1 && Str[0] == 'i') {
+ for (i=1; i<N; ++i)
+ if (!isdigit(Str[i]))
+ break;
+ if (i==N)
+ return true;
+ }
+
+ // Check for fp[0-9]+([.].*)?$
+ if (N>3 && Str[0]=='f' && Str[1]=='p' && isdigit(Str[2])) {
+ for (i=3; i<N; ++i)
+ if (!isdigit(Str[i]))
+ break;
+ if (i==N || Str[i]=='.')
+ return true;
+ }
+
+ return false;
+}
+static bool isWidthKW(const char *Str, unsigned N) {
+ if (N<2 || Str[0] != 'w')
+ return false;
+ for (unsigned i=1; i<N; ++i)
+ if (!isdigit(Str[i]))
+ return false;
+ return true;
+}
+Token &Lexer::SetIdentifierTokenKind(Token &Result) {
+ unsigned Length = BufferPos - Result.start;
+ switch (Length) {
+ case 3:
+ if (memcmp("def", Result.start, 3) == 0)
+ return SetTokenKind(Result, Token::KWReserved);
+ if (memcmp("var", Result.start, 3) == 0)
+ return SetTokenKind(Result, Token::KWReserved);
+ break;
+
+ case 4:
+ if (memcmp("true", Result.start, 4) == 0)
+ return SetTokenKind(Result, Token::KWTrue);
+ break;
+
+ case 5:
+ if (memcmp("array", Result.start, 5) == 0)
+ return SetTokenKind(Result, Token::KWReserved);
+ if (memcmp("false", Result.start, 5) == 0)
+ return SetTokenKind(Result, Token::KWFalse);
+ if (memcmp("query", Result.start, 5) == 0)
+ return SetTokenKind(Result, Token::KWQuery);
+ break;
+
+ case 6:
+ if (memcmp("define", Result.start, 6) == 0)
+ return SetTokenKind(Result, Token::KWReserved);
+ break;
+
+ case 7:
+ if (memcmp("declare", Result.start, 7) == 0)
+ return SetTokenKind(Result, Token::KWReserved);
+ break;
+ }
+
+ if (isReservedKW(Result.start, Length))
+ return SetTokenKind(Result, Token::KWReserved);
+ if (isWidthKW(Result.start, Length))
+ return SetTokenKind(Result, Token::KWWidth);
+
+ return SetTokenKind(Result, Token::Identifier);
+}
+
+void Lexer::SkipToEndOfLine() {
+ for (;;) {
+ int Char = GetNextChar();
+ if (Char == -1 || Char =='\n')
+ break;
+ }
+}
+
+Token &Lexer::LexNumber(Token &Result) {
+ while (isalnum(PeekNextChar()) || PeekNextChar()=='_')
+ GetNextChar();
+ return SetTokenKind(Result, Token::Number);
+}
+
+Token &Lexer::LexIdentifier(Token &Result) {
+ while (isInternalIdentifierChar(PeekNextChar()))
+ GetNextChar();
+
+ // Recognize keywords specially.
+ return SetIdentifierTokenKind(Result);
+}
+
+Token &Lexer::Lex(Token &Result) {
+ Result.kind = Token::Unknown;
+ Result.length = 0;
+ Result.start = BufferPos;
+
+ // Skip whitespace.
+ while (isspace(PeekNextChar()))
+ GetNextChar();
+
+ Result.start = BufferPos;
+ Result.line = LineNumber;
+ Result.column = ColumnNumber;
+ int Char = GetNextChar();
+ switch (Char) {
+ case -1: return SetTokenKind(Result, Token::EndOfFile);
+
+ case '(': return SetTokenKind(Result, Token::LParen);
+ case ')': return SetTokenKind(Result, Token::RParen);
+ case ',': return SetTokenKind(Result, Token::Comma);
+ case ':': return SetTokenKind(Result, Token::Colon);
+ case ';': return SetTokenKind(Result, Token::Semicolon);
+ case '=': return SetTokenKind(Result, Token::Equals);
+ case '@': return SetTokenKind(Result, Token::At);
+ case '[': return SetTokenKind(Result, Token::LSquare);
+ case ']': return SetTokenKind(Result, Token::RSquare);
+ case '{': return SetTokenKind(Result, Token::LBrace);
+ case '}': return SetTokenKind(Result, Token::RBrace);
+
+ case '#':
+ SkipToEndOfLine();
+ return SetTokenKind(Result, Token::Comment);
+
+ case '+': {
+ if (isdigit(PeekNextChar()))
+ return LexNumber(Result);
+ else
+ return SetTokenKind(Result, Token::Unknown);
+ }
+
+ case '-': {
+ int Next = PeekNextChar();
+ if (Next == '>')
+ return GetNextChar(), SetTokenKind(Result, Token::Arrow);
+ else if (isdigit(Next))
+ return LexNumber(Result);
+ else
+ return SetTokenKind(Result, Token::Unknown);
+ break;
+ }
+
+ default:
+ if (isdigit(Char))
+ return LexNumber(Result);
+ else if (isalpha(Char) || Char == '_')
+ return LexIdentifier(Result);
+ return SetTokenKind(Result, Token::Unknown);
+ }
+}