about summary refs log tree commit diff homepage
path: root/lib/Expr/Lexer.cpp
diff options
context:
space:
mode:
authorDaniel Dunbar <daniel@zuster.org>2009-05-21 04:36:41 +0000
committerDaniel Dunbar <daniel@zuster.org>2009-05-21 04:36:41 +0000
commit6f290d8f9e9d7faac295cb51fc96884a18f4ded4 (patch)
tree46e7d426abc0c9f06ac472ac6f7f9e661b5d78cb /lib/Expr/Lexer.cpp
parenta55960edd4dcd7535526de8d2277642522aa0209 (diff)
downloadklee-6f290d8f9e9d7faac295cb51fc96884a18f4ded4.tar.gz
Initial KLEE checkin.
 - Lots more tweaks, documentation, and web page content is needed,
   but this should compile & work on OS X & Linux.


git-svn-id: https://llvm.org/svn/llvm-project/klee/trunk@72205 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Expr/Lexer.cpp')
-rw-r--r--lib/Expr/Lexer.cpp261
1 files changed, 261 insertions, 0 deletions
diff --git a/lib/Expr/Lexer.cpp b/lib/Expr/Lexer.cpp
new file mode 100644
index 00000000..77e25f62
--- /dev/null
+++ b/lib/Expr/Lexer.cpp
@@ -0,0 +1,261 @@
+//===-- Lexer.cpp ---------------------------------------------------------===//
+//
+//                     The KLEE Symbolic Virtual Machine
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "expr/Lexer.h"
+
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Streams.h"
+
+#include <iomanip>
+#include <iostream>
+#include <string.h>
+
+using namespace llvm;
+using namespace klee;
+using namespace klee::expr;
+
+///
+
+const char *Token::getKindName() const {
+  switch (kind) {
+  default:
+  case Unknown:    return "Unknown";
+  case Arrow:      return "Arrow";
+  case At:         return "At";
+  case Colon:      return "Colon";
+  case Comma:      return "Comma";
+  case Comment:    return "Comment";
+  case EndOfFile:  return "EndOfFile";
+  case Equals:     return "Equals";
+  case Identifier: return "Identifier";
+  case KWFalse:    return "KWFalse";
+  case KWQuery:    return "KWQuery";
+  case KWReserved: return "KWReserved";
+  case KWTrue:     return "KWTrue";
+  case KWWidth:    return "KWWidth";
+  case LBrace:     return "LBrace";
+  case LParen:     return "LParen";
+  case LSquare:    return "LSquare";
+  case Number:     return "Number";
+  case RBrace:     return "RBrace";
+  case RParen:     return "RParen";
+  case RSquare:    return "RSquare";
+  case Semicolon:  return "Semicolon";
+  }
+}
+
+void Token::dump() {
+  llvm::cerr << "(Token \"" << getKindName() << "\" "
+             << (void*) start << " " << length << " "
+             << line << " " << column << ")";
+}
+
+///
+
+static inline bool isInternalIdentifierChar(int Char) {
+  return isalnum(Char) || Char == '_' || Char == '.';
+}
+
+Lexer::Lexer(const llvm::MemoryBuffer *MB) 
+  : BufferPos(MB->getBufferStart()), BufferEnd(MB->getBufferEnd()), 
+    LineNumber(1), ColumnNumber(0) {
+}
+
+Lexer::~Lexer() {
+}
+
+int Lexer::PeekNextChar() {
+  if (BufferPos == BufferEnd)
+    return -1;
+  return *BufferPos;
+}
+
+int Lexer::GetNextChar() {
+  if (BufferPos == BufferEnd)
+    return -1;
+
+  // Handle DOS/Mac newlines here, by stripping duplicates and by
+  // returning '\n' for both.
+  char Result = *BufferPos++;
+  if (Result == '\n' || Result == '\r') {
+    if (BufferPos != BufferEnd && *BufferPos == ('\n' + '\r' - Result))
+      ++BufferPos;
+    Result = '\n';
+  }
+
+  if (Result == '\n') {
+    ++LineNumber;
+    ColumnNumber = 0;
+  } else {
+    ++ColumnNumber;
+  }
+
+  return Result;
+}
+
+Token &Lexer::SetTokenKind(Token &Result, Token::Kind k) {
+  Result.kind = k;
+  Result.length = BufferPos - Result.start;
+  return Result;
+}
+
+static bool isReservedKW(const char *Str, unsigned N) {
+    unsigned i;
+
+  // Check for i[0-9]+
+  if (N>1 && Str[0] == 'i') {
+    for (i=1; i<N; ++i)
+      if (!isdigit(Str[i]))
+        break;
+    if (i==N)
+      return true;
+  }
+
+  // Check for fp[0-9]+([.].*)?$
+  if (N>3 && Str[0]=='f' && Str[1]=='p' && isdigit(Str[2])) {
+    for (i=3; i<N; ++i)
+      if (!isdigit(Str[i]))
+        break;
+    if (i==N || Str[i]=='.')
+      return true;
+  }
+  
+  return false;
+}
+static bool isWidthKW(const char *Str, unsigned N) {
+  if (N<2 || Str[0] != 'w')
+    return false;
+  for (unsigned i=1; i<N; ++i)
+    if (!isdigit(Str[i]))
+      return false;
+  return true;
+}
+Token &Lexer::SetIdentifierTokenKind(Token &Result) {
+  unsigned Length = BufferPos - Result.start;
+  switch (Length) {
+  case 3:
+    if (memcmp("def", Result.start, 3) == 0)
+      return SetTokenKind(Result, Token::KWReserved);
+    if (memcmp("var", Result.start, 3) == 0)
+      return SetTokenKind(Result, Token::KWReserved);
+    break;
+
+  case 4:
+    if (memcmp("true", Result.start, 4) == 0)
+      return SetTokenKind(Result, Token::KWTrue);
+    break;
+
+  case 5:
+    if (memcmp("array", Result.start, 5) == 0)
+      return SetTokenKind(Result, Token::KWReserved);
+    if (memcmp("false", Result.start, 5) == 0)
+      return SetTokenKind(Result, Token::KWFalse);
+    if (memcmp("query", Result.start, 5) == 0)
+      return SetTokenKind(Result, Token::KWQuery);
+    break;      
+    
+  case 6:
+    if (memcmp("define", Result.start, 6) == 0)
+      return SetTokenKind(Result, Token::KWReserved);
+    break;
+
+  case 7:
+    if (memcmp("declare", Result.start, 7) == 0)
+      return SetTokenKind(Result, Token::KWReserved);
+    break;
+  }
+
+  if (isReservedKW(Result.start, Length))
+    return SetTokenKind(Result, Token::KWReserved);
+  if (isWidthKW(Result.start, Length))
+    return SetTokenKind(Result, Token::KWWidth);
+
+  return SetTokenKind(Result, Token::Identifier);
+}
+
+void Lexer::SkipToEndOfLine() {
+  for (;;) {
+    int Char = GetNextChar();
+    if (Char == -1 || Char =='\n')
+      break;
+  }
+}
+
+Token &Lexer::LexNumber(Token &Result) {
+  while (isalnum(PeekNextChar()) || PeekNextChar()=='_')
+    GetNextChar();
+  return SetTokenKind(Result, Token::Number);
+}
+
+Token &Lexer::LexIdentifier(Token &Result) {
+  while (isInternalIdentifierChar(PeekNextChar()))
+    GetNextChar();
+
+  // Recognize keywords specially.
+  return SetIdentifierTokenKind(Result);
+}
+
+Token &Lexer::Lex(Token &Result) {
+  Result.kind = Token::Unknown;
+  Result.length = 0;
+  Result.start = BufferPos;
+  
+  // Skip whitespace.
+  while (isspace(PeekNextChar()))
+    GetNextChar();
+
+  Result.start = BufferPos;
+  Result.line = LineNumber;
+  Result.column = ColumnNumber;
+  int Char = GetNextChar();
+  switch (Char) {
+  case -1:  return SetTokenKind(Result, Token::EndOfFile);
+    
+  case '(': return SetTokenKind(Result, Token::LParen);
+  case ')': return SetTokenKind(Result, Token::RParen);
+  case ',': return SetTokenKind(Result, Token::Comma);
+  case ':': return SetTokenKind(Result, Token::Colon);
+  case ';': return SetTokenKind(Result, Token::Semicolon);
+  case '=': return SetTokenKind(Result, Token::Equals);
+  case '@': return SetTokenKind(Result, Token::At);
+  case '[': return SetTokenKind(Result, Token::LSquare);
+  case ']': return SetTokenKind(Result, Token::RSquare);
+  case '{': return SetTokenKind(Result, Token::LBrace);
+  case '}': return SetTokenKind(Result, Token::RBrace);
+
+  case '#':
+    SkipToEndOfLine();
+    return SetTokenKind(Result, Token::Comment);
+
+  case '+': {
+    if (isdigit(PeekNextChar()))
+      return LexNumber(Result);
+    else
+      return SetTokenKind(Result, Token::Unknown);
+  }
+
+  case '-': {
+    int Next = PeekNextChar();
+    if (Next == '>')
+      return GetNextChar(), SetTokenKind(Result, Token::Arrow);
+    else if (isdigit(Next))
+      return LexNumber(Result);
+    else
+      return SetTokenKind(Result, Token::Unknown);
+    break;
+  }
+
+  default:
+    if (isdigit(Char))
+      return LexNumber(Result);
+    else if (isalpha(Char) || Char == '_')
+      return LexIdentifier(Result);
+    return SetTokenKind(Result, Token::Unknown);
+  }
+}