about summary refs log tree commit diff homepage
diff options
context:
space:
mode:
authorDan Liew <daniel.liew@imperial.ac.uk>2017-05-24 12:57:48 +0100
committerDan Liew <delcypher@gmail.com>2017-05-24 14:24:47 +0100
commit5dd3eee423c866aac6659dc2db44310737cf201d (patch)
tree900559852fc063cb5acf4d4ad594a41234858678
parenta3e82239a74cdc43c44bd5200434cb48c7dd1edb (diff)
downloadklee-5dd3eee423c866aac6659dc2db44310737cf201d.tar.gz
Rearchitect ExternalDispatcher
Previous changes for LLVM 3.6 using the MCJIT were incredibly hacky.
Those changes required creating and destroying the ExternalDispatcher
for every call to an external function. This is really bad

* It's very poor design. The Executor should not need to know
  about the internal implementation details of the ExternalDispatcher.
* It's likely very inefficient to keep creating and destroying the
  external dispatcher.

The new code does several things.

* Moves all of the implementation details into a `ExternalDispatcherImpl`
  class so that implementation details are not exposed in
  `ExternalDispatcher.h`.
* When using the MCJIT a module is compiled for every (instruction, function)
  tuple. This is necessary because the MCJIT compiles whole modules at a
  time and once a module is compiled it cannot be modified and
  re-compiled. Doing this means we get to reuse already generated code
  for call sites which hopefully will reduce the overhead of repeatedly
  executing the same call site.

A consequence of this change is that now the dispatcher function name
needs to be unique across all modules. To do this we just append the
module name because we guarantee that the module name is unique by
construction.

The code has also been clang-formatted.
-rw-r--r--lib/Core/Executor.cpp9
-rw-r--r--lib/Core/ExternalDispatcher.cpp267
-rw-r--r--lib/Core/ExternalDispatcher.h53
3 files changed, 197 insertions, 132 deletions
diff --git a/lib/Core/Executor.cpp b/lib/Core/Executor.cpp
index 1cdb770b..d71d767d 100644
--- a/lib/Core/Executor.cpp
+++ b/lib/Core/Executor.cpp
@@ -3091,16 +3091,7 @@ void Executor::callExternalFunction(ExecutionState &state,
     else
       klee_warning_once(function, "%s", os.str().c_str());
   }
-#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 6)
-  // MCJIT needs unique module, so we create quick external dispatcher for call.
-  // reference:
-  // http://blog.llvm.org/2013/07/using-mcjit-with-kaleidoscope-tutorial.html
-  ExternalDispatcher *e = new ExternalDispatcher(function->getContext());
-  bool success = e->executeCall(function, target->inst, args);
-  delete e;
-#else
   bool success = externalDispatcher->executeCall(function, target->inst, args);
-#endif
   if (!success) {
     terminateStateOnError(state, "failed external call: " + function->getName(),
                           External);
diff --git a/lib/Core/ExternalDispatcher.cpp b/lib/Core/ExternalDispatcher.cpp
index 9701d35a..df0dd9a9 100644
--- a/lib/Core/ExternalDispatcher.cpp
+++ b/lib/Core/ExternalDispatcher.cpp
@@ -11,17 +11,17 @@
 #include "klee/Config/Version.h"
 
 #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3)
-#include "llvm/IR/Module.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #else
-#include "llvm/Module.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #endif
 #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 6)
 #include "llvm/ExecutionEngine/MCJIT.h"
@@ -60,12 +60,48 @@ extern "C" {
 static void sigsegv_handler(int signal, siginfo_t *info, void *context) {
   longjmp(escapeCallJmpBuf, 1);
 }
+}
 
+namespace klee {
+
+class ExternalDispatcherImpl {
+private:
+  typedef std::map<const llvm::Instruction *, llvm::Function *> dispatchers_ty;
+  dispatchers_ty dispatchers;
+  llvm::Function *createDispatcher(llvm::Function *f, llvm::Instruction *i,
+                                   llvm::Module *module);
+  llvm::ExecutionEngine *executionEngine;
+  LLVMContext &ctx;
+  std::map<std::string, void *> preboundFunctions;
+  bool runProtectedCall(llvm::Function *f, uint64_t *args);
+  llvm::Module *singleDispatchModule;
+  std::vector<std::string> moduleIDs;
+  std::string &getFreshModuleID();
+
+public:
+  ExternalDispatcherImpl(llvm::LLVMContext &ctx);
+  ~ExternalDispatcherImpl();
+  bool executeCall(llvm::Function *function, llvm::Instruction *i,
+                   uint64_t *args);
+  void *resolveSymbol(const std::string &name);
+};
+
+std::string &ExternalDispatcherImpl::getFreshModuleID() {
+  // We store the module IDs because `llvm::Module` constructor takes the
+  // module ID as a StringRef so it doesn't own the ID.  Therefore we need to
+  // own the ID.
+  static uint64_t counter = 0;
+  std::string underlyingString;
+  llvm::raw_string_ostream ss(underlyingString);
+  ss << "ExternalDispatcherModule_" << counter;
+  moduleIDs.push_back(ss.str()); // moduleIDs now has a copy
+  ++counter;                     // Increment for next call
+  return moduleIDs.back();
 }
 
-void *ExternalDispatcher::resolveSymbol(const std::string &name) {
+void *ExternalDispatcherImpl::resolveSymbol(const std::string &name) {
   assert(executionEngine);
-  
+
   const char *str = name.c_str();
 
   // We use this to validate that function names can be resolved so we
@@ -79,10 +115,10 @@ void *ExternalDispatcher::resolveSymbol(const std::string &name) {
   void *addr = sys::DynamicLibrary::SearchForAddressOfSymbol(str);
   if (addr)
     return addr;
-  
+
   // If it has an asm specifier and starts with an underscore we retry
   // without the underscore. I (DWD) don't know why.
-  if (name[0] == 1 && str[0]=='_') { 
+  if (name[0] == 1 && str[0] == '_') {
     ++str;
     addr = sys::DynamicLibrary::SearchForAddressOfSymbol(str);
   }
@@ -90,16 +126,24 @@ void *ExternalDispatcher::resolveSymbol(const std::string &name) {
   return addr;
 }
 
-ExternalDispatcher::ExternalDispatcher(LLVMContext &ctx) {
-  dispatchModule = new Module("ExternalDispatcher", ctx);
-
+ExternalDispatcherImpl::ExternalDispatcherImpl(LLVMContext &ctx) : ctx(ctx) {
   std::string error;
-#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 6)
-  dispatchModule_uniptr.reset(dispatchModule);
-  executionEngine = EngineBuilder(std::move(dispatchModule_uniptr)).create();
+  singleDispatchModule = new Module(getFreshModuleID(), ctx);
+#if LLVM_VERSION_CODE < LLVM_VERSION(3, 6)
+  // Use old JIT
+  executionEngine = ExecutionEngine::createJIT(singleDispatchModule, &error);
 #else
-  executionEngine = ExecutionEngine::createJIT(dispatchModule, &error);
+  // Use MCJIT.
+  // The MCJIT JITs whole modules at a time rather than individual functions
+  // so we will let it manage the modules.
+  // Note that we don't do anything with `singleDispatchModule`. This is just
+  // so we can use the EngineBuilder API.
+  auto dispatchModuleUniq = std::unique_ptr<Module>(singleDispatchModule);
+  executionEngine = EngineBuilder(std::move(dispatchModuleUniq))
+                        .setEngineKind(EngineKind::JIT)
+                        .create();
 #endif
+
   if (!executionEngine) {
     llvm::errs() << "unable to make jit: " << error << "\n";
     abort();
@@ -121,65 +165,90 @@ ExternalDispatcher::ExternalDispatcher(LLVMContext &ctx) {
   }
 
 #ifdef WINDOWS
-  preboundFunctions["getpid"] = (void*) (long) getpid;
-  preboundFunctions["putchar"] = (void*) (long) putchar;
-  preboundFunctions["printf"] = (void*) (long) printf;
-  preboundFunctions["fprintf"] = (void*) (long) fprintf;
-  preboundFunctions["sprintf"] = (void*) (long) sprintf;
+  preboundFunctions["getpid"] = (void *)(long)getpid;
+  preboundFunctions["putchar"] = (void *)(long)putchar;
+  preboundFunctions["printf"] = (void *)(long)printf;
+  preboundFunctions["fprintf"] = (void *)(long)fprintf;
+  preboundFunctions["sprintf"] = (void *)(long)sprintf;
 #endif
 }
 
-ExternalDispatcher::~ExternalDispatcher() {
+ExternalDispatcherImpl::~ExternalDispatcherImpl() {
   delete executionEngine;
+  // NOTE: the `executionEngine` owns all modules so
+  // we don't need to delete any of them.
 }
 
-bool ExternalDispatcher::executeCall(Function *f, Instruction *i, uint64_t *args) {
+bool ExternalDispatcherImpl::executeCall(Function *f, Instruction *i,
+                                         uint64_t *args) {
   dispatchers_ty::iterator it = dispatchers.find(i);
-  Function *dispatcher;
+  if (it != dispatchers.end()) {
+    // Code already JIT'ed for this
+    return runProtectedCall(it->second, args);
+  }
 
-  if (it == dispatchers.end()) {
+  // Code for this not JIT'ed. Do this now.
+  Function *dispatcher;
 #ifdef WINDOWS
-    std::map<std::string, void*>::iterator it2 = 
-      preboundFunctions.find(f->getName()));
-
-    if (it2 != preboundFunctions.end()) {
-      // only bind once
-      if (it2->second) {
-        executionEngine->addGlobalMapping(f, it2->second);
-        it2->second = 0;
-      }
+  std::map<std::string, void *>::iterator it2 =
+      preboundFunctions.find(f->getName());
+
+  if (it2 != preboundFunctions.end()) {
+    // only bind once
+    if (it2->second) {
+      executionEngine->addGlobalMapping(f, it2->second);
+      it2->second = 0;
     }
+  }
 #endif
 
-    dispatcher = createDispatcher(f,i);
-
-    dispatchers.insert(std::make_pair(i, dispatcher));
-
-    if (dispatcher) {
-      // Force the JIT execution engine to go ahead and build the function. This
-      // ensures that any errors or assertions in the compilation process will
-      // trigger crashes instead of being caught as aborts in the external
-      // function.
+  Module *dispatchModule = NULL;
 #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 6)
-      executionEngine->finalizeObject();
+  // The MCJIT generates whole modules at a time so for every call that we
+  // haven't made before we need to create a new Module.
+  dispatchModule = new Module(getFreshModuleID(), ctx);
 #else
-      executionEngine->recompileAndRelinkFunction(dispatcher);
+  dispatchModule = this->singleDispatchModule;
 #endif
-    }
+  dispatcher = createDispatcher(f, i, dispatchModule);
+  dispatchers.insert(std::make_pair(i, dispatcher));
+
+// Force the JIT execution engine to go ahead and build the function. This
+// ensures that any errors or assertions in the compilation process will
+// trigger crashes instead of being caught as aborts in the external
+// function.
+#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 6)
+  if (dispatcher) {
+    // The dispatchModule is now ready so tell MCJIT to generate the code for
+    // it.
+    auto dispatchModuleUniq = std::unique_ptr<Module>(dispatchModule);
+    executionEngine->addModule(
+        std::move(dispatchModuleUniq)); // MCJIT takes ownership
+    // Force code generation
+    uint64_t fnAddr =
+        executionEngine->getFunctionAddress(dispatcher->getName());
+    executionEngine->finalizeObject();
+    assert(fnAddr && "failed to get function address");
+    (void)fnAddr;
   } else {
-    dispatcher = it->second;
+    // MCJIT didn't take ownership of the module so delete it.
+    delete dispatchModule;
   }
-
+#else
+  if (dispatcher) {
+    // Old JIT works on a function at a time so compile the function.
+    executionEngine->recompileAndRelinkFunction(dispatcher);
+  }
+#endif
   return runProtectedCall(dispatcher, args);
 }
 
 // FIXME: This is not reentrant.
 static uint64_t *gTheArgsP;
-
-bool ExternalDispatcher::runProtectedCall(Function *f, uint64_t *args) {
+bool ExternalDispatcherImpl::runProtectedCall(Function *f, uint64_t *args) {
   struct sigaction segvAction, segvActionOld;
   bool res;
-  
+
   if (!f)
     return false;
 
@@ -203,86 +272,87 @@ bool ExternalDispatcher::runProtectedCall(Function *f, uint64_t *args) {
   return res;
 }
 
+// FIXME: This might have been relevant for the old JIT but the MCJIT
+// has a completly different implementation so this comment below is
+// likely irrelevant and misleading.
+//
 // For performance purposes we construct the stub in such a way that the
 // arguments pointer is passed through the static global variable gTheArgsP in
 // this file. This is done so that the stub function prototype trivially matches
 // the special cases that the JIT knows how to directly call. If this is not
 // done, then the jit will end up generating a nullary stub just to call our
 // stub, for every single function call.
-Function *ExternalDispatcher::createDispatcher(Function *target, Instruction *inst) {
+Function *ExternalDispatcherImpl::createDispatcher(Function *target,
+                                                   Instruction *inst,
+                                                   Module *module) {
   if (!resolveSymbol(target->getName()))
     return 0;
 
-  LLVMContext &ctx = target->getContext();
   CallSite cs;
-  if (inst->getOpcode()==Instruction::Call) {
+  if (inst->getOpcode() == Instruction::Call) {
     cs = CallSite(cast<CallInst>(inst));
   } else {
     cs = CallSite(cast<InvokeInst>(inst));
   }
 
-  Value **args = new Value*[cs.arg_size()];
+  Value **args = new Value *[cs.arg_size()];
 
-  std::vector<LLVM_TYPE_Q Type*> nullary;
-  
-  // MCJIT functions need unique names, or wrong function can be called
-  Function *dispatcher = Function::Create(FunctionType::get(Type::getVoidTy(ctx),
-							    nullary, false),
-					  GlobalVariable::ExternalLinkage, 
-					  "dispatcher_" + target->getName().str(),
-					  dispatchModule);
+  std::vector<LLVM_TYPE_Q Type *> nullary;
 
+  // MCJIT functions need unique names, or wrong function can be called.
+  // The module identifier is included because for the MCJIT we need
+  // unique function names across all `llvm::Modules`s.
+  std::string fnName =
+      "dispatcher_" + target->getName().str() + module->getModuleIdentifier();
+  Function *dispatcher =
+      Function::Create(FunctionType::get(Type::getVoidTy(ctx), nullary, false),
+                       GlobalVariable::ExternalLinkage, fnName, module);
 
   BasicBlock *dBB = BasicBlock::Create(ctx, "entry", dispatcher);
 
   // Get a Value* for &gTheArgsP, as an i64**.
-  Instruction *argI64sp = 
-    new IntToPtrInst(ConstantInt::get(Type::getInt64Ty(ctx),
-                                      (uintptr_t) (void*) &gTheArgsP),
-                     PointerType::getUnqual(PointerType::getUnqual(Type::getInt64Ty(ctx))),
-                     "argsp", dBB);
-  Instruction *argI64s = new LoadInst(argI64sp, "args", dBB); 
-  
+  Instruction *argI64sp = new IntToPtrInst(
+      ConstantInt::get(Type::getInt64Ty(ctx), (uintptr_t)(void *)&gTheArgsP),
+      PointerType::getUnqual(PointerType::getUnqual(Type::getInt64Ty(ctx))),
+      "argsp", dBB);
+  Instruction *argI64s = new LoadInst(argI64sp, "args", dBB);
+
   // Get the target function type.
-  LLVM_TYPE_Q FunctionType *FTy =
-    cast<FunctionType>(cast<PointerType>(target->getType())->getElementType());
+  LLVM_TYPE_Q FunctionType *FTy = cast<FunctionType>(
+      cast<PointerType>(target->getType())->getElementType());
 
   // Each argument will be passed by writing it into gTheArgsP[i].
   unsigned i = 0, idx = 2;
-  for (CallSite::arg_iterator ai = cs.arg_begin(), ae = cs.arg_end();
-       ai!=ae; ++ai, ++i) {
+  for (CallSite::arg_iterator ai = cs.arg_begin(), ae = cs.arg_end(); ai != ae;
+       ++ai, ++i) {
     // Determine the type the argument will be passed as. This accomodates for
     // the corresponding code in Executor.cpp for handling calls to bitcasted
     // functions.
-    LLVM_TYPE_Q Type *argTy = (i < FTy->getNumParams() ? FTy->getParamType(i) : 
-                               (*ai)->getType());
-    Instruction *argI64p = 
-      GetElementPtrInst::Create(argI64s, 
-                                ConstantInt::get(Type::getInt32Ty(ctx), idx),
-                                "", dBB);
-
-    Instruction *argp = new BitCastInst(argI64p, PointerType::getUnqual(argTy),
-                                        "", dBB);
+    LLVM_TYPE_Q Type *argTy =
+        (i < FTy->getNumParams() ? FTy->getParamType(i) : (*ai)->getType());
+    Instruction *argI64p = GetElementPtrInst::Create(
+        argI64s, ConstantInt::get(Type::getInt32Ty(ctx), idx), "", dBB);
+
+    Instruction *argp =
+        new BitCastInst(argI64p, PointerType::getUnqual(argTy), "", dBB);
     args[i] = new LoadInst(argp, "", dBB);
 
     unsigned argSize = argTy->getPrimitiveSizeInBits();
-    idx += ((!!argSize ? argSize : 64) + 63)/64;
+    idx += ((!!argSize ? argSize : 64) + 63) / 64;
   }
 
-  Constant *dispatchTarget =
-    dispatchModule->getOrInsertFunction(target->getName(), FTy,
-                                        target->getAttributes());
+  Constant *dispatchTarget = module->getOrInsertFunction(
+      target->getName(), FTy, target->getAttributes());
 #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 0)
-  Instruction *result = CallInst::Create(dispatchTarget,
-                                         llvm::ArrayRef<Value *>(args, args+i),
-                                         "", dBB);
+  Instruction *result = CallInst::Create(
+      dispatchTarget, llvm::ArrayRef<Value *>(args, args + i), "", dBB);
 #else
-  Instruction *result = CallInst::Create(dispatchTarget, args, args+i, "", dBB);
+  Instruction *result =
+      CallInst::Create(dispatchTarget, args, args + i, "", dBB);
 #endif
   if (result->getType() != Type::getVoidTy(ctx)) {
-    Instruction *resp = 
-      new BitCastInst(argI64s, PointerType::getUnqual(result->getType()), 
-                      "", dBB);
+    Instruction *resp = new BitCastInst(
+        argI64s, PointerType::getUnqual(result->getType()), "", dBB);
     new StoreInst(result, resp, dBB);
   }
 
@@ -292,3 +362,18 @@ Function *ExternalDispatcher::createDispatcher(Function *target, Instruction *in
 
   return dispatcher;
 }
+
+ExternalDispatcher::ExternalDispatcher(llvm::LLVMContext &ctx)
+    : impl(new ExternalDispatcherImpl(ctx)) {}
+
+ExternalDispatcher::~ExternalDispatcher() { delete impl; }
+
+bool ExternalDispatcher::executeCall(llvm::Function *function,
+                                     llvm::Instruction *i, uint64_t *args) {
+  return impl->executeCall(function, i, args);
+}
+
+void *ExternalDispatcher::resolveSymbol(const std::string &name) {
+  return impl->resolveSymbol(name);
+}
+}
diff --git a/lib/Core/ExternalDispatcher.h b/lib/Core/ExternalDispatcher.h
index d869eb6f..c64dc7d8 100644
--- a/lib/Core/ExternalDispatcher.h
+++ b/lib/Core/ExternalDispatcher.h
@@ -14,44 +14,33 @@
 
 #include <map>
 #include <memory>
-#include <string>
 #include <stdint.h>
+#include <string>
 
 namespace llvm {
-  class ExecutionEngine;
-  class Instruction;
-  class LLVMContext;
-  class Function;
-  class FunctionType;
-  class Module;
+class Instruction;
+class LLVMContext;
+class Function;
 }
 
 namespace klee {
-  class ExternalDispatcher {
-  private:
-    typedef std::map<const llvm::Instruction*,llvm::Function*> dispatchers_ty;
-    dispatchers_ty dispatchers;
-#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 6)
-    std::unique_ptr<llvm::Module> dispatchModule_uniptr;
-#endif
-    llvm::Module *dispatchModule;
-    llvm::ExecutionEngine *executionEngine;
-    std::map<std::string, void*> preboundFunctions;
-    
-    llvm::Function *createDispatcher(llvm::Function *f, llvm::Instruction *i);
-    bool runProtectedCall(llvm::Function *f, uint64_t *args);
-    
-  public:
-    ExternalDispatcher(llvm::LLVMContext &ctx);
-    ~ExternalDispatcher();
-
-    /* Call the given function using the parameter passing convention of
-     * ci with arguments in args[1], args[2], ... and writing the result
-     * into args[0].
-     */
-    bool executeCall(llvm::Function *function, llvm::Instruction *i, uint64_t *args);
-    void *resolveSymbol(const std::string &name);
-  };  
+class ExternalDispatcherImpl;
+class ExternalDispatcher {
+private:
+  ExternalDispatcherImpl *impl;
+
+public:
+  ExternalDispatcher(llvm::LLVMContext &ctx);
+  ~ExternalDispatcher();
+
+  /* Call the given function using the parameter passing convention of
+   * ci with arguments in args[1], args[2], ... and writing the result
+   * into args[0].
+   */
+  bool executeCall(llvm::Function *function, llvm::Instruction *i,
+                   uint64_t *args);
+  void *resolveSymbol(const std::string &name);
+};
 }
 
 #endif