about summary refs log tree commit diff homepage
diff options
context:
space:
mode:
authorDan Liew <delcypher@gmail.com>2014-02-14 14:16:35 +0000
committerDan Liew <delcypher@gmail.com>2014-02-14 14:16:35 +0000
commit2ad968e1c05cfa40c75d0e5ba689beb91ed9d572 (patch)
tree4c5c66a2792379e9c87fcb113d8061cd6408c575
parent3c24ce15982948b911bb7910f6ab4481aced8818 (diff)
parentfd0c6614ec5eb93fb00de029a79de4247511d0ef (diff)
downloadklee-2ad968e1c05cfa40c75d0e5ba689beb91ed9d572.tar.gz
Merge pull request #70 from MartinNowack/feature_reading_archive
Add support for archive and single bc file linking
-rw-r--r--lib/Core/SpecialFunctionHandler.cpp39
-rw-r--r--lib/Core/SpecialFunctionHandler.h33
-rw-r--r--lib/Module/ModuleUtil.cpp360
3 files changed, 414 insertions, 18 deletions
diff --git a/lib/Core/SpecialFunctionHandler.cpp b/lib/Core/SpecialFunctionHandler.cpp
index 04f32780..ca9f7b63 100644
--- a/lib/Core/SpecialFunctionHandler.cpp
+++ b/lib/Core/SpecialFunctionHandler.cpp
@@ -38,20 +38,14 @@ using namespace klee;
 
 ///
 
-struct HandlerInfo {
-  const char *name;
-  SpecialFunctionHandler::Handler handler;
-  bool doesNotReturn; /// Intrinsic terminates the process
-  bool hasReturnValue; /// Intrinsic has a return value
-  bool doNotOverride; /// Intrinsic should not be used if already defined
-};
+
 
 // FIXME: We are more or less committed to requiring an intrinsic
 // library these days. We can move some of this stuff there,
 // especially things like realloc which have complicated semantics
 // w.r.t. forking. Among other things this makes delayed query
 // dispatch easier to implement.
-HandlerInfo handlerInfo[] = {
+static SpecialFunctionHandler::HandlerInfo handlerInfo[] = {
 #define add(name, handler, ret) { name, \
                                   &SpecialFunctionHandler::handler, \
                                   false, ret, false }
@@ -117,12 +111,37 @@ HandlerInfo handlerInfo[] = {
 #undef add  
 };
 
+SpecialFunctionHandler::const_iterator SpecialFunctionHandler::begin() {
+  return SpecialFunctionHandler::const_iterator(handlerInfo);
+}
+
+SpecialFunctionHandler::const_iterator SpecialFunctionHandler::end() {
+  // NULL pointer is sentinel
+  return SpecialFunctionHandler::const_iterator(0);
+}
+
+SpecialFunctionHandler::const_iterator& SpecialFunctionHandler::const_iterator::operator++() {
+  ++index;
+  if ( index >= SpecialFunctionHandler::size())
+  {
+    // Out of range, return .end()
+    base=0; // Sentinel
+    index=0;
+  }
+
+  return *this;
+}
+
+int SpecialFunctionHandler::size() {
+	return sizeof(handlerInfo)/sizeof(handlerInfo[0]);
+}
+
 SpecialFunctionHandler::SpecialFunctionHandler(Executor &_executor) 
   : executor(_executor) {}
 
 
 void SpecialFunctionHandler::prepare() {
-  unsigned N = sizeof(handlerInfo)/sizeof(handlerInfo[0]);
+  unsigned N = size();
 
   for (unsigned i=0; i<N; ++i) {
     HandlerInfo &hi = handlerInfo[i];
@@ -715,3 +734,5 @@ void SpecialFunctionHandler::handleMarkGlobal(ExecutionState &state,
     mo->isGlobal = true;
   }
 }
+
+
diff --git a/lib/Core/SpecialFunctionHandler.h b/lib/Core/SpecialFunctionHandler.h
index 02e70ed4..f68c6edb 100644
--- a/lib/Core/SpecialFunctionHandler.h
+++ b/lib/Core/SpecialFunctionHandler.h
@@ -10,6 +10,7 @@
 #ifndef KLEE_SPECIALFUNCTIONHANDLER_H
 #define KLEE_SPECIALFUNCTIONHANDLER_H
 
+#include <iterator>
 #include <map>
 #include <vector>
 #include <string>
@@ -37,6 +38,38 @@ namespace klee {
     handlers_ty handlers;
     class Executor &executor;
 
+    struct HandlerInfo {
+      const char *name;
+      SpecialFunctionHandler::Handler handler;
+      bool doesNotReturn; /// Intrinsic terminates the process
+      bool hasReturnValue; /// Intrinsic has a return value
+      bool doNotOverride; /// Intrinsic should not be used if already defined
+    };
+
+    // const_iterator to iterate over stored HandlerInfo
+    // FIXME: Implement >, >=, <=, < operators
+    class const_iterator : public std::iterator<std::random_access_iterator_tag, HandlerInfo>
+    {
+      private:
+        value_type* base;
+        int index;
+      public:
+      const_iterator(value_type* hi) : base(hi), index(0) {};
+      const_iterator& operator++();  // pre-fix
+      const_iterator operator++(int); // post-fix
+      const value_type& operator*() { return base[index];}
+      const value_type* operator->() { return &(base[index]);}
+      const value_type& operator[](int i) { return base[i];}
+      bool operator==(const_iterator& rhs) { return (rhs.base + rhs.index) == (this->base + this->index);}
+      bool operator!=(const_iterator& rhs) { return !(*this == rhs);}
+    };
+
+    static const_iterator begin();
+    static const_iterator end();
+    static int size();
+
+
+
   public:
     SpecialFunctionHandler(Executor &_executor);
 
diff --git a/lib/Module/ModuleUtil.cpp b/lib/Module/ModuleUtil.cpp
index 9ae72936..58096de4 100644
--- a/lib/Module/ModuleUtil.cpp
+++ b/lib/Module/ModuleUtil.cpp
@@ -11,6 +11,7 @@
 #include "klee/Config/Version.h"
 // FIXME: This does not belong here.
 #include "../Core/Common.h"
+#include "../Core/SpecialFunctionHandler.h"
 
 #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3)
 #include "llvm/Bitcode/ReaderWriter.h"
@@ -19,6 +20,12 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/DataStream.h"
 #else
@@ -32,12 +39,14 @@
 #include "llvm/Assembly/AssemblyAnnotationWriter.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/Path.h"
 
 #include <map>
+#include <set>
 #include <iostream>
 #include <fstream>
 #include <sstream>
@@ -46,18 +55,349 @@
 using namespace llvm;
 using namespace klee;
 
+#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3)
+/// Based on GetAllUndefinedSymbols() from LLVM3.2
+///
+/// GetAllUndefinedSymbols - calculates the set of undefined symbols that still
+/// exist in an LLVM module. This is a bit tricky because there may be two
+/// symbols with the same name but different LLVM types that will be resolved to
+/// each other but aren't currently (thus we need to treat it as resolved).
+///
+/// Inputs:
+///  M - The module in which to find undefined symbols.
+///
+/// Outputs:
+///  UndefinedSymbols - A set of C++ strings containing the name of all
+///                     undefined symbols.
+///
+static void
+GetAllUndefinedSymbols(Module *M, std::set<std::string> &UndefinedSymbols) {
+  static const std::string llvmIntrinsicPrefix="llvm.";
+  std::set<std::string> DefinedSymbols;
+  UndefinedSymbols.clear();
+  DEBUG_WITH_TYPE("klee_linker", dbgs() << "*** Computing undefined symbols ***\n");
+
+  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
+    if (I->hasName()) {
+      if (I->isDeclaration())
+        UndefinedSymbols.insert(I->getName());
+      else if (!I->hasLocalLinkage()) {
+        assert(!I->hasDLLImportLinkage()
+               && "Found dllimported non-external symbol!");
+        DefinedSymbols.insert(I->getName());
+      }
+    }
+
+  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I)
+    if (I->hasName()) {
+      if (I->isDeclaration())
+        UndefinedSymbols.insert(I->getName());
+      else if (!I->hasLocalLinkage()) {
+        assert(!I->hasDLLImportLinkage()
+               && "Found dllimported non-external symbol!");
+        DefinedSymbols.insert(I->getName());
+      }
+    }
+
+  for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    if (I->hasName())
+      DefinedSymbols.insert(I->getName());
+
+
+  // Prune out any defined symbols from the undefined symbols set
+  // and other symbols we don't want to treat as an undefined symbol
+  std::vector<std::string> SymbolsToRemove;
+  for (std::set<std::string>::iterator I = UndefinedSymbols.begin();
+       I != UndefinedSymbols.end(); ++I )
+  {
+    if (DefinedSymbols.count(*I))
+    {
+      SymbolsToRemove.push_back(*I);
+      continue;
+    }
+
+    // Strip out llvm intrinsics
+    if ( (I->size() >= llvmIntrinsicPrefix.size() ) &&
+       (I->compare(0, llvmIntrinsicPrefix.size(), llvmIntrinsicPrefix) == 0) )
+    {
+      DEBUG_WITH_TYPE("klee_linker", dbgs() << "LLVM intrinsic " << *I <<
+                      " has will be removed from undefined symbols"<< "\n");
+      SymbolsToRemove.push_back(*I);
+      continue;
+    }
+
+    // Symbol really is undefined
+    DEBUG_WITH_TYPE("klee_linker", dbgs() << "Symbol " << *I << " is undefined.\n");
+  }
+
+  // Remove KLEE intrinsics from set of undefined symbols
+  for (SpecialFunctionHandler::const_iterator sf = SpecialFunctionHandler::begin(),
+       se = SpecialFunctionHandler::end(); sf != se; ++sf)
+  {
+    if (UndefinedSymbols.find(sf->name) == UndefinedSymbols.end())
+      continue;
+
+    SymbolsToRemove.push_back(sf->name);
+    DEBUG_WITH_TYPE("klee_linker", dbgs() << "KLEE intrinsic " << sf->name <<
+                    " has will be removed from undefined symbols"<< "\n");
+  }
+
+  // Now remove the symbols from undefined set.
+  for (size_t i = 0, j = SymbolsToRemove.size(); i < j; ++i )
+    UndefinedSymbols.erase(SymbolsToRemove[i]);
+
+  DEBUG_WITH_TYPE("klee_linker", dbgs() << "*** Finished computing undefined symbols ***\n");
+}
+
+
+/*!  A helper function for linkBCA() which cleans up
+ *   memory allocated by that function.
+ */
+static void CleanUpLinkBCA(std::vector<Module*> &archiveModules)
+{
+  for (std::vector<Module*>::iterator I = archiveModules.begin(), E = archiveModules.end();
+      I != E; ++I)
+  {
+    delete (*I);
+  }
+}
+
+/*! A helper function for klee::linkWithLibrary() that links in an archive of bitcode
+ *  modules into a composite bitcode module
+ *
+ *  \param[in] archive Archive of bitcode modules
+ *  \param[in,out] composite The bitcode module to link against the archive
+ *  \param[out] errorMessage Set to an error message if linking fails
+ *
+ *  \return True if linking succeeds otherwise false
+ */
+static bool linkBCA(object::Archive* archive, Module* composite, std::string& errorMessage)
+{
+  llvm::raw_string_ostream SS(errorMessage);
+  std::vector<Module*> archiveModules;
+
+  // Is this efficient? Could we use StringRef instead?
+  std::set<std::string> undefinedSymbols;
+  GetAllUndefinedSymbols(composite, undefinedSymbols);
+
+  if (undefinedSymbols.size() == 0)
+  {
+    // Nothing to do
+    DEBUG_WITH_TYPE("klee_linker", dbgs() << "No undefined symbols. Not linking anything in!\n");
+    return true;
+  }
+
+  DEBUG_WITH_TYPE("klee_linker", dbgs() << "Loading modules\n");
+  // Load all bitcode files in to memory so we can examine their symbols
+  for (object::Archive::child_iterator AI = archive->begin_children(),
+       AE = archive->end_children(); AI != AE; ++AI)
+  {
+
+    StringRef memberName;
+    error_code ec = AI->getName(memberName);
+
+    if ( ec == errc::success )
+    {
+      DEBUG_WITH_TYPE("klee_linker", dbgs() << "Loading archive member " << memberName << "\n");
+    }
+    else
+    {
+      errorMessage="Archive member does not have a name!\n";
+      return false;
+    }
+
+    OwningPtr<object::Binary> child;
+    ec = AI->getAsBinary(child);
+    if (ec != object::object_error::success)
+    {
+      // If we can't open as a binary object file its hopefully a bitcode file
+
+      OwningPtr<MemoryBuffer> buff; // Once this is destroyed will Module still be valid??
+      Module *Result = 0;
+
+      if (error_code ec = AI->getMemoryBuffer(buff))
+      {
+        SS << "Failed to get MemoryBuffer: " <<ec.message();
+        SS.flush();
+        return false;
+      }
+
+      if (buff)
+      {
+        // FIXME: Maybe load bitcode file lazily? Then if we need to link, materialise the module
+        Result = ParseBitcodeFile(buff.get(), getGlobalContext(), &errorMessage);
+
+        if(!Result)
+        {
+          SS << "Loading module failed : " << errorMessage << "\n";
+          SS.flush();
+          return false;
+        }
+        archiveModules.push_back(Result);
+      }
+      else
+      {
+        errorMessage="Buffer was NULL!";
+        return false;
+      }
+
+    }
+    else if (object::ObjectFile *o = dyn_cast<object::ObjectFile>(child.get()))
+    {
+      SS << "Object file " << o->getFileName().data() <<
+            " in archive is not supported";
+      SS.flush();
+      return false;
+    }
+    else
+    {
+      SS << "Loading archive child with error "<< ec.message();
+      SS.flush();
+      return false;
+    }
+
+  }
+
+  DEBUG_WITH_TYPE("klee_linker", dbgs() << "Loaded " << archiveModules.size() << " modules\n");
+
+
+  std::set<std::string> previouslyUndefinedSymbols;
+
+  // Walk through the modules looking for definitions of undefined symbols
+  // if we find a match we should link that module in.
+  unsigned int passCounter=0;
+  do
+  {
+    unsigned int modulesLoadedOnPass=0;
+    previouslyUndefinedSymbols = undefinedSymbols;
+
+    for (size_t i = 0, j = archiveModules.size(); i < j; ++i)
+    {
+      // skip empty archives
+      if (archiveModules[i] == 0)
+        continue;
+      Module * M = archiveModules[i];
+      // Look for the undefined symbols in the composite module
+      for (std::set<std::string>::iterator S = undefinedSymbols.begin(), SE = undefinedSymbols.end();
+         S != SE; ++S)
+      {
+
+        // FIXME: We aren't handling weak symbols here!
+        // However the algorithm used in LLVM3.2 didn't seem to either
+        // so maybe it doesn't matter?
+
+        if ( GlobalValue* GV = dyn_cast_or_null<GlobalValue>(M->getValueSymbolTable().lookup(*S)))
+        {
+          if (GV->isDeclaration()) continue; // Not a definition
+
+          DEBUG_WITH_TYPE("klee_linker", dbgs() << "Found " << GV->getName() <<
+              " in " << M->getModuleIdentifier() << "\n");
+
+
+          if (Linker::LinkModules(composite, M, Linker::DestroySource, &errorMessage))
+          {
+            // Linking failed
+            SS << "Linking archive module with composite failed:" << errorMessage;
+            SS.flush();
+            CleanUpLinkBCA(archiveModules);
+            return false;
+          }
+          else
+          {
+            // Link succeed, now clean up
+            modulesLoadedOnPass++;
+            DEBUG_WITH_TYPE("klee_linker", dbgs() << "Linking succeeded.\n");
+
+            delete M;
+            archiveModules[i] = 0;
+
+            // We need to recompute the undefined symbols in the composite module
+            // after linking
+            GetAllUndefinedSymbols(composite, undefinedSymbols);
+
+            break; // Look for symbols in next module
+          }
+
+        }
+      }
+    }
+
+    passCounter++;
+    DEBUG_WITH_TYPE("klee_linker", dbgs() << "Completed " << passCounter <<
+                " linker passes.\n" << modulesLoadedOnPass <<
+                " modules loaded on the last pass\n");
+  } while (undefinedSymbols != previouslyUndefinedSymbols); // Iterate until we reach a fixed point
+
+
+  // What's left in archiveModules we don't want to link in so free it
+  CleanUpLinkBCA(archiveModules);
+
+  return true;
+
+}
+#endif
+
+
 Module *klee::linkWithLibrary(Module *module, 
                               const std::string &libraryName) {
+DEBUG_WITH_TYPE("klee_linker", dbgs() << "Linking file " << libraryName << "\n");
 #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3)
-  SMDiagnostic err;
-  std::string err_str;
-  sys::Path libraryPath(libraryName);
-  Module *new_mod = ParseIRFile(libraryPath.str(), err, 
-module->getContext());
-
-  if (Linker::LinkModules(module, new_mod, Linker::DestroySource, 
-&err_str)) {
-    klee_error("Linking library %s failed", libraryName.c_str());
+  if (!sys::fs::exists(libraryName)) {
+    klee_error("Link with library %s failed. No such file.",
+        libraryName.c_str());
+  }
+
+  OwningPtr<MemoryBuffer> Buffer;
+  if (error_code ec = MemoryBuffer::getFile(libraryName,Buffer)) {
+    klee_error("Link with library %s failed: %s", libraryName.c_str(),
+        ec.message().c_str());
+  }
+
+  sys::fs::file_magic magic = sys::fs::identify_magic(Buffer->getBuffer());
+
+  LLVMContext &Context = getGlobalContext();
+  std::string ErrorMessage;
+
+  if (magic == sys::fs::file_magic::bitcode) {
+    Module *Result = 0;
+    Result = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
+
+
+    if (!Result || Linker::LinkModules(module, Result, Linker::DestroySource,
+        &ErrorMessage))
+      klee_error("Link with library %s failed: %s", libraryName.c_str(),
+          ErrorMessage.c_str());
+
+    delete Result;
+
+  } else if (magic == sys::fs::file_magic::archive) {
+    OwningPtr<object::Binary> arch;
+    if (error_code ec = object::createBinary(Buffer.take(), arch))
+      klee_error("Link with library %s failed: %s", libraryName.c_str(),
+          ec.message().c_str());
+
+    if (object::Archive *a = dyn_cast<object::Archive>(arch.get())) {
+      // Handle in helper
+      if (!linkBCA(a, module, ErrorMessage))
+        klee_error("Link with library %s failed: %s", libraryName.c_str(),
+            ErrorMessage.c_str());
+    }
+    else {
+    	klee_error("Link with library %s failed: Cast to archive failed", libraryName.c_str());
+    }
+
+  } else if (magic.is_object()) {
+    OwningPtr<object::Binary> obj;
+    if (object::ObjectFile *o = dyn_cast<object::ObjectFile>(obj.get())) {
+      klee_warning("Link with library: Object file %s in archive %s found. "
+          "Currently not supported.",
+          o->getFileName().data(), libraryName.c_str());
+    }
+  } else {
+    klee_error("Link with library %s failed: Unrecognized file type.",
+        libraryName.c_str());
   }
 
   return module;
@@ -75,6 +415,8 @@ module->getContext());
 #endif
 }
 
+
+
 Function *klee::getDirectCallTarget(CallSite cs) {
   Value *v = cs.getCalledValue();
   if (Function *f = dyn_cast<Function>(v)) {