//===-- Lexer.cpp ---------------------------------------------------------===//
//
//                     The KLEE Symbolic Virtual Machine
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include "expr/Lexer.h"

#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"

#include <iomanip>
#include <string.h>

using namespace llvm;
using namespace klee;
using namespace klee::expr;

///

const char *Token::getKindName() const {
  switch (kind) {
  default:
  case Unknown:    return "Unknown";
  case Arrow:      return "Arrow";
  case At:         return "At";
  case Colon:      return "Colon";
  case Comma:      return "Comma";
  case Comment:    return "Comment";
  case EndOfFile:  return "EndOfFile";
  case Equals:     return "Equals";
  case Identifier: return "Identifier";
  case KWArray:    return "KWArray";
  case KWFalse:    return "KWFalse";
  case KWQuery:    return "KWQuery";
  case KWReserved: return "KWReserved";
  case KWSymbolic: return "KWSymbolic";
  case KWTrue:     return "KWTrue";
  case KWWidth:    return "KWWidth";
  case LBrace:     return "LBrace";
  case LParen:     return "LParen";
  case LSquare:    return "LSquare";
  case Number:     return "Number";
  case RBrace:     return "RBrace";
  case RParen:     return "RParen";
  case RSquare:    return "RSquare";
  case Semicolon:  return "Semicolon";
  }
}

void Token::dump() {
  llvm::errs() << "(Token \"" << getKindName() << "\" "
               << (const void*) start << " " << length << " "
               << line << " " << column << ")";
}

///

static inline bool isInternalIdentifierChar(int Char) {
  return isalnum(Char) || Char == '_' || Char == '.' || Char == '-';
}

Lexer::Lexer(const llvm::MemoryBuffer *MB) 
  : BufferPos(MB->getBufferStart()), BufferEnd(MB->getBufferEnd()), 
    LineNumber(1), ColumnNumber(0) {
}

Lexer::~Lexer() {
}

int Lexer::PeekNextChar() {
  if (BufferPos == BufferEnd)
    return -1;
  return *BufferPos;
}

int Lexer::GetNextChar() {
  if (BufferPos == BufferEnd)
    return -1;

  // Handle DOS/Mac newlines here, by stripping duplicates and by
  // returning '\n' for both.
  char Result = *BufferPos++;
  if (Result == '\n' || Result == '\r') {
    if (BufferPos != BufferEnd && *BufferPos == ('\n' + '\r' - Result))
      ++BufferPos;
    Result = '\n';
  }

  if (Result == '\n') {
    ++LineNumber;
    ColumnNumber = 0;
  } else {
    ++ColumnNumber;
  }

  return Result;
}

Token &Lexer::SetTokenKind(Token &Result, Token::Kind k) {
  Result.kind = k;
  Result.length = BufferPos - Result.start;
  return Result;
}

static bool isReservedKW(const char *Str, unsigned N) {
    unsigned i;

  // Check for i[0-9]+
  if (N>1 && Str[0] == 'i') {
    for (i=1; i<N; ++i)
      if (!isdigit(Str[i]))
        break;
    if (i==N)
      return true;
  }

  // Check for fp[0-9]+([.].*)?$
  if (N>3 && Str[0]=='f' && Str[1]=='p' && isdigit(Str[2])) {
    for (i=3; i<N; ++i)
      if (!isdigit(Str[i]))
        break;
    if (i==N || Str[i]=='.')
      return true;
  }
  
  return false;
}
static bool isWidthKW(const char *Str, unsigned N) {
  if (N<2 || Str[0] != 'w')
    return false;
  for (unsigned i=1; i<N; ++i)
    if (!isdigit(Str[i]))
      return false;
  return true;
}
Token &Lexer::SetIdentifierTokenKind(Token &Result) {
  unsigned Length = BufferPos - Result.start;
  switch (Length) {
  case 3:
    if (memcmp("def", Result.start, 3) == 0)
      return SetTokenKind(Result, Token::KWReserved);
    if (memcmp("var", Result.start, 3) == 0)
      return SetTokenKind(Result, Token::KWReserved);
    break;

  case 4:
    if (memcmp("true", Result.start, 4) == 0)
      return SetTokenKind(Result, Token::KWTrue);
    break;

  case 5:
    if (memcmp("array", Result.start, 5) == 0)
      return SetTokenKind(Result, Token::KWArray);
    if (memcmp("false", Result.start, 5) == 0)
      return SetTokenKind(Result, Token::KWFalse);
    if (memcmp("query", Result.start, 5) == 0)
      return SetTokenKind(Result, Token::KWQuery);
    break;      
    
  case 6:
    if (memcmp("define", Result.start, 6) == 0)
      return SetTokenKind(Result, Token::KWReserved);
    break;

  case 7:
    if (memcmp("declare", Result.start, 7) == 0)
      return SetTokenKind(Result, Token::KWReserved);
    break;

  case 8:
    if (memcmp("symbolic", Result.start, 8) == 0)
      return SetTokenKind(Result, Token::KWSymbolic);
    break;
  }

  if (isReservedKW(Result.start, Length))
    return SetTokenKind(Result, Token::KWReserved);
  if (isWidthKW(Result.start, Length))
    return SetTokenKind(Result, Token::KWWidth);

  return SetTokenKind(Result, Token::Identifier);
}

void Lexer::SkipToEndOfLine() {
  for (;;) {
    int Char = GetNextChar();
    if (Char == -1 || Char =='\n')
      break;
  }
}

Token &Lexer::LexNumber(Token &Result) {
  while (isalnum(PeekNextChar()) || PeekNextChar()=='_')
    GetNextChar();
  return SetTokenKind(Result, Token::Number);
}

Token &Lexer::LexIdentifier(Token &Result) {
  while (isInternalIdentifierChar(PeekNextChar()))
    GetNextChar();

  // Recognize keywords specially.
  return SetIdentifierTokenKind(Result);
}

Token &Lexer::Lex(Token &Result) {
  Result.kind = Token::Unknown;
  Result.length = 0;
  Result.start = BufferPos;
  
  // Skip whitespace.
  while (isspace(PeekNextChar()))
    GetNextChar();

  Result.start = BufferPos;
  Result.line = LineNumber;
  Result.column = ColumnNumber;
  int Char = GetNextChar();
  switch (Char) {
  case -1:  return SetTokenKind(Result, Token::EndOfFile);
    
  case '(': return SetTokenKind(Result, Token::LParen);
  case ')': return SetTokenKind(Result, Token::RParen);
  case ',': return SetTokenKind(Result, Token::Comma);
  case ':': return SetTokenKind(Result, Token::Colon);
  case ';': return SetTokenKind(Result, Token::Semicolon);
  case '=': return SetTokenKind(Result, Token::Equals);
  case '@': return SetTokenKind(Result, Token::At);
  case '[': return SetTokenKind(Result, Token::LSquare);
  case ']': return SetTokenKind(Result, Token::RSquare);
  case '{': return SetTokenKind(Result, Token::LBrace);
  case '}': return SetTokenKind(Result, Token::RBrace);

  case '#':
    SkipToEndOfLine();
    return SetTokenKind(Result, Token::Comment);

  case '+': {
    if (isdigit(PeekNextChar()))
      return LexNumber(Result);
    else
      return SetTokenKind(Result, Token::Unknown);
  }

  case '-': {
    int Next = PeekNextChar();
    if (Next == '>')
      return GetNextChar(), SetTokenKind(Result, Token::Arrow);
    else if (isdigit(Next))
      return LexNumber(Result);
    else
      return SetTokenKind(Result, Token::Unknown);
    break;
  }

  default:
    if (isdigit(Char))
      return LexNumber(Result);
    else if (isalpha(Char) || Char == '_')
      return LexIdentifier(Result);
    return SetTokenKind(Result, Token::Unknown);
  }
}