about summary refs log tree commit diff homepage
path: root/lib/Module/KModule.cpp
diff options
context:
space:
mode:
authorDaniel Dunbar <daniel@zuster.org>2009-05-21 04:36:41 +0000
committerDaniel Dunbar <daniel@zuster.org>2009-05-21 04:36:41 +0000
commit6f290d8f9e9d7faac295cb51fc96884a18f4ded4 (patch)
tree46e7d426abc0c9f06ac472ac6f7f9e661b5d78cb /lib/Module/KModule.cpp
parenta55960edd4dcd7535526de8d2277642522aa0209 (diff)
downloadklee-6f290d8f9e9d7faac295cb51fc96884a18f4ded4.tar.gz
Initial KLEE checkin.
 - Lots more tweaks, documentation, and web page content is needed,
   but this should compile & work on OS X & Linux.


git-svn-id: https://llvm.org/svn/llvm-project/klee/trunk@72205 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Module/KModule.cpp')
-rw-r--r--lib/Module/KModule.cpp506
1 files changed, 506 insertions, 0 deletions
diff --git a/lib/Module/KModule.cpp b/lib/Module/KModule.cpp
new file mode 100644
index 00000000..5d88fbda
--- /dev/null
+++ b/lib/Module/KModule.cpp
@@ -0,0 +1,506 @@
+//===-- KModule.cpp -------------------------------------------------------===//
+//
+//                     The KLEE Symbolic Virtual Machine
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: This does not belong here.
+#include "../Core/Common.h"
+
+#include "klee/Internal/Module/KModule.h"
+
+#include "Passes.h"
+
+#include "klee/Interpreter.h"
+#include "klee/Internal/Module/Cell.h"
+#include "klee/Internal/Module/KInstruction.h"
+#include "klee/Internal/Module/InstructionInfoTable.h"
+#include "klee/Internal/Support/ModuleUtil.h"
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/System/Path.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+
+#include <sstream>
+
+using namespace llvm;
+using namespace klee;
+
+namespace {
+  enum SwitchImplType {
+    eSwitchTypeSimple,
+    eSwitchTypeLLVM,
+    eSwitchTypeInternal
+  };
+
+  cl::list<std::string>
+  MergeAtExit("merge-at-exit");
+    
+  cl::opt<bool>
+  NoTruncateSourceLines("no-truncate-source-lines",
+                        cl::desc("Don't truncate long lines in the output source"));
+
+  cl::opt<bool>
+  OutputSource("output-source",
+               cl::desc("Write the assembly for the final transformed source"),
+               cl::init(true));
+
+  cl::opt<bool>
+  OutputModule("output-module",
+               cl::desc("Write the bitcode for the final transformed module"),
+               cl::init(false));
+
+  cl::opt<SwitchImplType>
+  SwitchType("switch-type", cl::desc("Select the implementation of switch"),
+             cl::values(clEnumValN(eSwitchTypeSimple, "simple", 
+                                   "lower to ordered branches"),
+                        clEnumValN(eSwitchTypeLLVM, "llvm", 
+                                   "lower using LLVM"),
+                        clEnumValN(eSwitchTypeInternal, "internal", 
+                                   "execute switch internally"),
+                        clEnumValEnd),
+             cl::init(eSwitchTypeInternal));
+  
+  cl::opt<bool>
+  DebugPrintEscapingFunctions("debug-print-escaping-functions", 
+                              cl::desc("Print functions whose address is taken."));
+}
+
+KModule::KModule(Module *_module) 
+  : module(_module),
+    targetData(new TargetData(module)),
+    dbgStopPointFn(0),
+    kleeMergeFn(0),
+    infos(0),
+    constantTable(0) {
+}
+
+KModule::~KModule() {
+  delete[] constantTable;
+  delete infos;
+
+  for (std::vector<KFunction*>::iterator it = functions.begin(), 
+         ie = functions.end(); it != ie; ++it)
+    delete *it;
+
+  delete targetData;
+  delete module;
+}
+
+/***/
+
+namespace llvm {
+extern void Optimize(Module*);
+}
+
+// what a hack
+static Function *getStubFunctionForCtorList(Module *m,
+                                            GlobalVariable *gv, 
+                                            std::string name) {
+  assert(!gv->isDeclaration() && !gv->hasInternalLinkage() &&
+         "do not support old LLVM style constructor/destructor lists");
+  
+  std::vector<const Type*> nullary;
+
+  Function *fn = Function::Create(FunctionType::get(Type::VoidTy, 
+						    nullary, false),
+				  GlobalVariable::InternalLinkage, 
+				  name,
+                              m);
+  BasicBlock *bb = BasicBlock::Create("entry", fn);
+  
+  // From lli:
+  // Should be an array of '{ int, void ()* }' structs.  The first value is
+  // the init priority, which we ignore.
+  ConstantArray *arr = dyn_cast<ConstantArray>(gv->getInitializer());
+  if (arr) {
+    for (unsigned i=0; i<arr->getNumOperands(); i++) {
+      ConstantStruct *cs = cast<ConstantStruct>(arr->getOperand(i));
+      assert(cs->getNumOperands()==2 && "unexpected element in ctor initializer list");
+      
+      Constant *fp = cs->getOperand(1);      
+      if (!fp->isNullValue()) {
+        if (llvm::ConstantExpr *ce = dyn_cast<llvm::ConstantExpr>(fp))
+          fp = ce->getOperand(0);
+
+        if (Function *f = dyn_cast<Function>(fp)) {
+	  CallInst::Create(f, "", bb);
+        } else {
+          assert(0 && "unable to get function pointer from ctor initializer list");
+        }
+      }
+    }
+  }
+  
+  ReturnInst::Create(bb);
+
+  return fn;
+}
+
+static void injectStaticConstructorsAndDestructors(Module *m) {
+  GlobalVariable *ctors = m->getNamedGlobal("llvm.global_ctors");
+  GlobalVariable *dtors = m->getNamedGlobal("llvm.global_dtors");
+  
+  if (ctors || dtors) {
+    Function *mainFn = m->getFunction("main");
+    assert(mainFn && "unable to find main function");
+
+    if (ctors)
+    CallInst::Create(getStubFunctionForCtorList(m, ctors, "klee.ctor_stub"),
+		     "", mainFn->begin()->begin());
+    if (dtors) {
+      Function *dtorStub = getStubFunctionForCtorList(m, dtors, "klee.dtor_stub");
+      for (Function::iterator it = mainFn->begin(), ie = mainFn->end();
+           it != ie; ++it) {
+        if (isa<ReturnInst>(it->getTerminator()))
+	  CallInst::Create(dtorStub, "", it->getTerminator());
+      }
+    }
+  }
+}
+
+static void forceImport(Module *m, const char *name, const Type *retType, ...) {
+  // If module lacks an externally visible symbol for the name then we
+  // need to create one. We have to look in the symbol table because
+  // we want to check everything (global variables, functions, and
+  // aliases).
+
+  Value *v = m->getValueSymbolTable().lookup(name);
+  GlobalValue *gv = dyn_cast_or_null<GlobalValue>(v);
+
+  if (!gv || gv->hasInternalLinkage()) {
+    va_list ap;
+
+    va_start(ap, retType);
+    std::vector<const Type *> argTypes;
+    while (const Type *t = va_arg(ap, const Type*))
+      argTypes.push_back(t);
+    va_end(ap);
+
+    m->getOrInsertFunction(name, FunctionType::get(retType, argTypes, false));
+  }
+}
+
+void KModule::prepare(const Interpreter::ModuleOptions &opts,
+                      InterpreterHandler *ih) {
+  if (!MergeAtExit.empty()) {
+    Function *mergeFn = module->getFunction("klee_merge");
+    if (!mergeFn) {
+      const llvm::FunctionType *Ty = 
+        FunctionType::get(Type::VoidTy, std::vector<const Type*>(), false);
+      mergeFn = Function::Create(Ty, GlobalVariable::ExternalLinkage,
+				 "klee_merge",
+				 module);
+    }
+
+    for (cl::list<std::string>::iterator it = MergeAtExit.begin(), 
+           ie = MergeAtExit.end(); it != ie; ++it) {
+      std::string &name = *it;
+      Function *f = module->getFunction(name);
+      if (!f) {
+        klee_error("cannot insert merge-at-exit for: %s (cannot find)",
+                   name.c_str());
+      } else if (f->isDeclaration()) {
+        klee_error("cannot insert merge-at-exit for: %s (external)",
+                   name.c_str());
+      }
+
+      BasicBlock *exit = BasicBlock::Create("exit", f);
+      PHINode *result = 0;
+      if (f->getReturnType() != Type::VoidTy)
+        result = PHINode::Create(f->getReturnType(), "retval", exit);
+      CallInst::Create(mergeFn, "", exit);
+      ReturnInst::Create(result, exit);
+
+      llvm::cerr << "KLEE: adding klee_merge at exit of: " << name << "\n";
+      for (llvm::Function::iterator bbit = f->begin(), bbie = f->end(); 
+           bbit != bbie; ++bbit) {
+        if (&*bbit != exit) {
+          Instruction *i = bbit->getTerminator();
+          if (i->getOpcode()==Instruction::Ret) {
+            if (result) {
+              result->addIncoming(i->getOperand(0), bbit);
+            }
+            i->eraseFromParent();
+	    BranchInst::Create(exit, bbit);
+          }
+        }
+      }
+    }
+  }
+
+  // Inject checks prior to optimization... we also perform the
+  // invariant transformations that we will end up doing later so that
+  // optimize is seeing what is as close as possible to the final
+  // module.
+  PassManager pm;
+  pm.add(new RaiseAsmPass());
+  if (opts.CheckDivZero) pm.add(new DivCheckPass());
+  // FIXME: This false here is to work around a bug in
+  // IntrinsicLowering which caches values which may eventually be
+  // deleted (via RAUW). This can be removed once LLVM fixes this
+  // issue.
+  pm.add(new IntrinsicCleanerPass(*targetData, false));
+  pm.run(*module);
+
+  if (opts.Optimize)
+    Optimize(module);
+
+  // Force importing functions required by intrinsic lowering. Kind of
+  // unfortunate clutter when we don't need them but we won't know
+  // that until after all linking and intrinsic lowering is
+  // done. After linking and passes we just try to manually trim these
+  // by name. We only add them if such a function doesn't exist to
+  // avoid creating stale uses.
+
+  forceImport(module, "memcpy", PointerType::getUnqual(Type::Int8Ty),
+              PointerType::getUnqual(Type::Int8Ty),
+              PointerType::getUnqual(Type::Int8Ty),
+              targetData->getIntPtrType(), (Type*) 0);
+  forceImport(module, "memmove", PointerType::getUnqual(Type::Int8Ty),
+              PointerType::getUnqual(Type::Int8Ty),
+              PointerType::getUnqual(Type::Int8Ty),
+              targetData->getIntPtrType(), (Type*) 0);
+  forceImport(module, "memset", PointerType::getUnqual(Type::Int8Ty),
+              PointerType::getUnqual(Type::Int8Ty),
+              Type::Int32Ty,
+              targetData->getIntPtrType(), (Type*) 0);
+
+  // FIXME: Missing force import for various math functions.
+
+  // FIXME: Find a way that we can test programs without requiring
+  // this to be linked in, it makes low level debugging much more
+  // annoying.
+  llvm::sys::Path path(opts.LibraryDir);
+  path.appendComponent("libintrinsic.bca");
+  module = linkWithLibrary(module, path.c_str());
+
+  // Needs to happen after linking (since ctors/dtors can be modified)
+  // and optimization (since global optimization can rewrite lists).
+  injectStaticConstructorsAndDestructors(module);
+
+  // Finally, run the passes that maintain invariants we expect during
+  // interpretation. We run the intrinsic cleaner just in case we
+  // linked in something with intrinsics but any external calls are
+  // going to be unresolved. We really need to handle the intrinsics
+  // directly I think?
+  PassManager pm3;
+  pm3.add(createCFGSimplificationPass());
+  switch(SwitchType) {
+  case eSwitchTypeInternal: break;
+  case eSwitchTypeSimple: pm3.add(new LowerSwitchPass()); break;
+  case eSwitchTypeLLVM:  pm3.add(createLowerSwitchPass()); break;
+  default: klee_error("invalid --switch-type");
+  }
+  pm3.add(new IntrinsicCleanerPass(*targetData));
+  pm3.add(new PhiCleanerPass());
+  pm3.run(*module);
+
+  // For cleanliness see if we can discard any of the functions we
+  // forced to import.
+  Function *f;
+  f = module->getFunction("memcpy");
+  if (f && f->use_empty()) f->eraseFromParent();
+  f = module->getFunction("memmove");
+  if (f && f->use_empty()) f->eraseFromParent();
+  f = module->getFunction("memset");
+  if (f && f->use_empty()) f->eraseFromParent();
+
+
+  // Write out the .ll assembly file. We truncate long lines to work
+  // around a kcachegrind parsing bug (it puts them on new lines), so
+  // that source browsing works.
+  if (OutputSource) {
+    std::ostream *os = ih->openOutputFile("assembly.ll");
+    assert(os && os->good() && "unable to open source output");
+
+    // We have an option for this in case the user wants a .ll they
+    // can compile.
+    if (NoTruncateSourceLines) {
+      *os << *module;
+    } else {
+      bool truncated = false;
+      std::stringstream buffer;
+      buffer << *module;
+      std::string string = buffer.str();
+      const char *position = string.c_str();
+
+      for (;;) {
+        const char *end = index(position, '\n');
+        if (!end) {
+          *os << position;
+          break;
+        } else {
+          unsigned count = (end - position) + 1;
+          if (count<255) {
+            os->write(position, count);
+          } else {
+            os->write(position, 254);
+            *os << "\n";
+            truncated = true;
+          }
+          position = end+1;
+        }
+      }
+    }
+
+    delete os;
+  }
+
+  if (OutputModule) {
+    std::ostream *f = ih->openOutputFile("final.bc");
+    WriteBitcodeToFile(module, *f);
+    delete f;
+  }
+
+  dbgStopPointFn = module->getFunction("llvm.dbg.stoppoint");
+  kleeMergeFn = module->getFunction("klee_merge");
+
+  /* Build shadow structures */
+
+  infos = new InstructionInfoTable(module);  
+  
+  for (Module::iterator it = module->begin(), ie = module->end();
+       it != ie; ++it) {
+    if (it->isDeclaration())
+      continue;
+
+    KFunction *kf = new KFunction(it, this);
+    
+    for (unsigned i=0; i<kf->numInstructions; ++i) {
+      KInstruction *ki = kf->instructions[i];
+      ki->info = &infos->getInfo(ki->inst);
+    }
+
+    functions.push_back(kf);
+    functionMap.insert(std::make_pair(it, kf));
+  }
+
+  /* Compute various interesting properties */
+
+  for (std::vector<KFunction*>::iterator it = functions.begin(), 
+         ie = functions.end(); it != ie; ++it) {
+    KFunction *kf = *it;
+    if (functionEscapes(kf->function))
+      escapingFunctions.insert(kf->function);
+  }
+
+  if (DebugPrintEscapingFunctions && !escapingFunctions.empty()) {
+    llvm::cerr << "KLEE: escaping functions: [";
+    for (std::set<Function*>::iterator it = escapingFunctions.begin(), 
+         ie = escapingFunctions.end(); it != ie; ++it) {
+      llvm::cerr << (*it)->getName() << ", ";
+    }
+    llvm::cerr << "]\n";
+  }
+}
+
+KConstant* KModule::getKConstant(Constant *c) {
+  std::map<llvm::Constant*, KConstant*>::iterator it = constantMap.find(c);
+  if (it != constantMap.end())
+    return it->second;
+  return NULL;
+}
+
+unsigned KModule::getConstantID(Constant *c, KInstruction* ki) {
+  KConstant *kc = getKConstant(c);
+  if (kc)
+    return kc->id;  
+
+  unsigned id = constants.size();
+  kc = new KConstant(c, id, ki);
+  constantMap.insert(std::make_pair(c, kc));
+  constants.push_back(c);
+  return id;
+}
+
+/***/
+
+KConstant::KConstant(llvm::Constant* _ct, unsigned _id, KInstruction* _ki) {
+  ct = _ct;
+  id = _id;
+  ki = _ki;
+}
+
+/***/
+
+KFunction::KFunction(llvm::Function *_function,
+                     KModule *km) 
+  : function(_function),
+    numArgs(function->arg_size()),
+    numInstructions(0),
+    trackCoverage(true) {
+  for (llvm::Function::iterator bbit = function->begin(), 
+         bbie = function->end(); bbit != bbie; ++bbit) {
+    BasicBlock *bb = bbit;
+    basicBlockEntry[bb] = numInstructions;
+    numInstructions += bb->size();
+  }
+
+  instructions = new KInstruction*[numInstructions];
+
+  std::map<Instruction*, unsigned> registerMap;
+
+  // The first arg_size() registers are reserved for formals.
+  unsigned rnum = numArgs;
+  for (llvm::Function::iterator bbit = function->begin(), 
+         bbie = function->end(); bbit != bbie; ++bbit) {
+    for (llvm::BasicBlock::iterator it = bbit->begin(), ie = bbit->end();
+         it != ie; ++it)
+      registerMap[it] = rnum++;
+  }
+  numRegisters = rnum;
+  
+  unsigned i = 0;
+  for (llvm::Function::iterator bbit = function->begin(), 
+         bbie = function->end(); bbit != bbie; ++bbit) {
+    for (llvm::BasicBlock::iterator it = bbit->begin(), ie = bbit->end();
+         it != ie; ++it) {
+      KInstruction *ki;
+
+      switch(it->getOpcode()) {
+      case Instruction::GetElementPtr:
+        ki = new KGEPInstruction(); break;
+      default:
+        ki = new KInstruction(); break;
+      }
+
+      unsigned numOperands = it->getNumOperands();
+      ki->inst = it;      
+      ki->operands = new int[numOperands];
+      ki->dest = registerMap[it];
+      for (unsigned j=0; j<numOperands; j++) {
+        Value *v = it->getOperand(j);
+        
+        if (Instruction *inst = dyn_cast<Instruction>(v)) {
+          ki->operands[j] = registerMap[inst];
+        } else if (Argument *a = dyn_cast<Argument>(v)) {
+          ki->operands[j] = a->getArgNo();
+        } else if (isa<BasicBlock>(v) || isa<InlineAsm>(v)) {
+          ki->operands[j] = -1;
+        } else {
+          assert(isa<Constant>(v));
+          Constant *c = cast<Constant>(v);
+          ki->operands[j] = -(km->getConstantID(c, ki) + 2);
+        }
+      }
+
+      instructions[i++] = ki;
+    }
+  }
+}
+
+KFunction::~KFunction() {
+  for (unsigned i=0; i<numInstructions; ++i)
+    delete instructions[i];
+  delete[] instructions;
+}