about summary refs log tree commit diff homepage
path: root/lib
diff options
context:
space:
mode:
authorDan Liew <daniel.liew@imperial.ac.uk>2014-01-23 19:39:12 +0000
committerMartin Nowack <martin@se.inf.tu-dresden.de>2014-02-06 23:55:29 +0100
commit2bddb27b38b4aebee7977afb8de4f5c849384750 (patch)
tree3bdd2724729e96da8dd77bfeb025aef0feeab9cd /lib
parent9e16c443aea9104609c5d5f4016ff41190c7dd24 (diff)
downloadklee-2bddb27b38b4aebee7977afb8de4f5c849384750.tar.gz
Improved archive (of bitcode modules) linking performance for
LLVM >= 3.3 by effectively reimplementing the linking algorithm
used in LLVM <= 3.2.

The LLVM specific bitcode archive format has been removed
from LLVM >= 3.3 . Now archives are normal system archives that can
contain LLVM bitcode modules as well as regular binary object files.

The previous commit implemented an approach where ALL the bitcode
modules get linked in which can be terribly slow when klee-uclibc gets
linked (~600 LLVM modules).

Here are the options that I considered to address this:

* Use LD with LLVM gold plug-in and call as an external program.
  I Don't really want to add another dependency to KLEE. It already
  has enough!

* Use the upcomming LLVM linker (lld). Not really an option
  because at the time of writing there is no support for linking
  archives of bitcode modules.

* Don't use archives at all and just work with modules (i.e.
  replace uses of llvm-ar with llvm-link and tinker with the
  flags a little). This isn't so great because the resulting
  LLVM bitcode module we execute is bigger than it should be.

* Reimpelent bitcode archive linking ourselves in a slightly
  better way.

I've gone for the last option

This implementation unfortunately loads all bitcode modules into memory
first so we can query the module symbols tables. I would prefer to read
the archive's index and link in modules on demand but unfortunately
although the new Object::Archive interface in LLVM allows iteration over
symbols it doesn't provide a way of knowing if that symbol is
defined/undefined.

This implementation is far from perfect!
Diffstat (limited to 'lib')
-rw-r--r--lib/Module/ModuleUtil.cpp294
1 files changed, 267 insertions, 27 deletions
diff --git a/lib/Module/ModuleUtil.cpp b/lib/Module/ModuleUtil.cpp
index c1ea274f..ef2752ed 100644
--- a/lib/Module/ModuleUtil.cpp
+++ b/lib/Module/ModuleUtil.cpp
@@ -21,7 +21,10 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/Error.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/DataStream.h"
 #else
@@ -35,6 +38,7 @@
 #include "llvm/Assembly/AssemblyAnnotationWriter.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -49,8 +53,261 @@
 using namespace llvm;
 using namespace klee;
 
+#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3)
+/// Based on GetAllUndefinedSymbols() from LLVM3.2
+///
+/// GetAllUndefinedSymbols - calculates the set of undefined symbols that still
+/// exist in an LLVM module. This is a bit tricky because there may be two
+/// symbols with the same name but different LLVM types that will be resolved to
+/// each other but aren't currently (thus we need to treat it as resolved).
+///
+/// Inputs:
+///  M - The module in which to find undefined symbols.
+///
+/// Outputs:
+///  UndefinedSymbols - A set of C++ strings containing the name of all
+///                     undefined symbols.
+///
+static void
+GetAllUndefinedSymbols(Module *M, std::set<std::string> &UndefinedSymbols) {
+  std::set<std::string> DefinedSymbols;
+  UndefinedSymbols.clear();
+  DEBUG_WITH_TYPE("klee_linker", dbgs() << "*** Computing undefined symbols ***\n");
+
+  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
+    if (I->hasName()) {
+      if (I->isDeclaration())
+        UndefinedSymbols.insert(I->getName());
+      else if (!I->hasLocalLinkage()) {
+        assert(!I->hasDLLImportLinkage()
+               && "Found dllimported non-external symbol!");
+        DefinedSymbols.insert(I->getName());
+      }
+    }
+
+  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I)
+    if (I->hasName()) {
+      if (I->isDeclaration())
+        UndefinedSymbols.insert(I->getName());
+      else if (!I->hasLocalLinkage()) {
+        assert(!I->hasDLLImportLinkage()
+               && "Found dllimported non-external symbol!");
+        DefinedSymbols.insert(I->getName());
+      }
+    }
+
+  for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    if (I->hasName())
+      DefinedSymbols.insert(I->getName());
+
+  // Prune out any defined symbols from the undefined symbols set...
+  for (std::set<std::string>::iterator I = UndefinedSymbols.begin();
+       I != UndefinedSymbols.end(); )
+    if (DefinedSymbols.count(*I))
+      UndefinedSymbols.erase(I++);  // This symbol really is defined!
+    else
+    {
+      DEBUG_WITH_TYPE("klee_linker", dbgs() << "Symbol " << *I << " is undefined.\n");
+      ++I; // Keep this symbol in the undefined symbols list
+    }
+
+  // FIXME: Remove KLEE special functions and LLVM intrinsics
+  // from the list of undefined symbols. HandlerInfo in SpecialFunctionHandler
+  // needs to be moved so we can use it here.
+
+  DEBUG_WITH_TYPE("klee_linker", dbgs() << "*** Finished computing undefined symbols ***\n");
+}
+
+
+
+/*! A helper function for klee::linkWithLibrary() that links in an archive of bitcode
+ *  modules into a composite bitcode module
+ *
+ *  \param[in] archive Archive of bitcode modules
+ *  \param[in,out] composite The archive to link the archive with
+ *  \param[out] errorMessage Set to an error message if linking fails
+ *
+ *  \return True if linking succeeds otherwise false
+ */
+static bool linkBCA(object::Archive* archive, Module* composite, std::string& errorMessage)
+{
+  llvm::raw_string_ostream SS(errorMessage);
+  std::vector<Module*> archiveModules;
+
+  // Is this efficient? Could we use StringRef instead?
+  std::set<std::string> undefinedSymbols;
+  GetAllUndefinedSymbols(composite, undefinedSymbols);
+
+  if (undefinedSymbols.size() == 0)
+  {
+    // Nothing to do
+    DEBUG_WITH_TYPE("klee_linker", dbgs() << "No undefined symbols. Not linking anything in!\n");
+    return true;
+  }
+
+  DEBUG_WITH_TYPE("klee_linker", dbgs() << "Loading modules\n");
+  // Load all bitcode files in to memory so we can examine their symbols
+  for (object::Archive::child_iterator AI = archive->begin_children(),
+       AE = archive->end_children(); AI != AE; ++AI)
+  {
+
+    StringRef memberName;
+    error_code ec = AI->getName(memberName);
+
+    if ( ec == errc::success )
+    {
+      DEBUG_WITH_TYPE("klee_linker", dbgs() << "Loading archive member " << memberName << "\n");
+    }
+    else
+    {
+      errorMessage="Archive member does not have a name!\n";
+      return false;
+    }
+
+    OwningPtr<object::Binary> child;
+    ec = AI->getAsBinary(child);
+    if (ec != object::object_error::success)
+    {
+      // If we can't open as a binary object file its hopefully a bitcode file
+
+      OwningPtr<MemoryBuffer> buff; // Once this is destroyed will Module still be valid??
+      Module *Result = 0;
+
+      if (error_code ec = AI->getMemoryBuffer(buff))
+      {
+        SS << "Failed to get MemoryBuffer: " <<ec.message();
+        SS.flush();
+        return false;
+      }
+
+      if (buff)
+      {
+        // FIXME: Maybe load bitcode file lazily? Then if we need to link, materialise the module
+        Result = ParseBitcodeFile(buff.get(), getGlobalContext(), &errorMessage);
+
+        if(!Result)
+        {
+          SS << "Loading module failed : " << errorMessage << "\n";
+          SS.flush();
+          return false;
+        }
+        archiveModules.push_back(Result);
+      }
+      else
+      {
+        errorMessage="Buffer was NULL!";
+        return false;
+      }
+
+    }
+    else if (object::ObjectFile *o = dyn_cast<object::ObjectFile>(child.get()))
+    {
+      SS << "Object file " << o->getFileName().data() <<
+            " in archive is not supported";
+      SS.flush();
+      return false;
+    }
+    else
+    {
+      SS << "Loading archive child with error "<< ec.message();
+      SS.flush();
+      return false;
+    }
+
+  }
+
+  DEBUG_WITH_TYPE("klee_linker", dbgs() << "Loaded " << archiveModules.size() << " modules\n");
+
+
+  std::set<std::string> previouslyUndefinedSymbols;
+
+  // Walk through the modules looking for definitions of undefined symbols
+  // if we find a match we should link that module in.
+  unsigned int passCounter=0;
+  do
+  {
+    unsigned int modulesLoadedOnPass=0;
+    previouslyUndefinedSymbols = undefinedSymbols;
+
+    for (std::vector<Module*>::iterator M = archiveModules.begin(), ME = archiveModules.end();
+        M != ME ;)
+    {
+      // Look for the undefined symbols in the composite module
+      for (std::set<std::string>::iterator S = undefinedSymbols.begin(), SE = undefinedSymbols.end();
+         S != SE; ++S)
+      {
+        // FIXME: We aren't handling weak symbols here!
+        // However the algorithm used in LLVM3.2 didn't seem to either
+        // so maybe it doesn't matter?
+
+        if ( GlobalValue* GV = dyn_cast_or_null<GlobalValue>((**M).getValueSymbolTable().lookup(*S)))
+        {
+          if (GV->isDeclaration()) continue; // Not a definition
+
+          Module* moduleToLinkIn = *M;
+
+
+          DEBUG_WITH_TYPE("klee_linker", dbgs() << "Found " << GV->getName() <<
+              " in " << (**M).getModuleIdentifier() << "\n");
+
+
+          if (Linker::LinkModules(composite, moduleToLinkIn, Linker::DestroySource, &errorMessage))
+          {
+            // Linking failed
+            SS << "Linking archive module with composite failed:" << errorMessage;
+            SS.flush();
+            return false;
+          }
+          else
+          {
+            // Link succeed, now clean up
+            modulesLoadedOnPass++;
+            DEBUG_WITH_TYPE("klee_linker", dbgs() << "Linking succeeded.\n");
+
+            // Note use of postfix operator here so that the iterator gets incremented
+            // BEFORE we remove it from the vector so the iterator is not invalidated
+            archiveModules.erase(M++);
+            delete moduleToLinkIn;
+
+            // We need to recompute the undefined symbols in the composite module
+            // after linking
+            GetAllUndefinedSymbols(composite, undefinedSymbols);
+
+            break; // Look for symbols in next module
+          }
+
+        }
+      }
+
+      ++M; // Try next module
+    }
+
+    passCounter++;
+    DEBUG_WITH_TYPE("klee_linker", dbgs() << "Completed " << passCounter <<
+                " linker passes.\n" << modulesLoadedOnPass <<
+                " modules loaded on the last pass\n" <<
+                archiveModules.size() << " modules left.\n");
+  } while (undefinedSymbols != previouslyUndefinedSymbols); // Iterate until we reach a fixed point
+
+  // What's left in archiveModules we don't want to link in so free it
+  for (std::vector<Module*>::iterator I = archiveModules.begin(), E = archiveModules.end();
+      I != E; ++I)
+  {
+    delete (*I);
+  }
+
+
+  return true;
+
+}
+#endif
+
+
 Module *klee::linkWithLibrary(Module *module, 
                               const std::string &libraryName) {
+DEBUG_WITH_TYPE("klee_linker", dbgs() << "Linking file " << libraryName << "\n");
 #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3)
   if (!sys::fs::exists(libraryName)) {
     klee_error("Link with library %s failed. No such file.",
@@ -87,34 +344,15 @@ Module *klee::linkWithLibrary(Module *module,
           ec.message().c_str());
 
     if (object::Archive *a = dyn_cast<object::Archive>(arch.get())) {
-      for (object::Archive::child_iterator i = a->begin_children(),
-          e = a->end_children(); i != e; ++i) {
-        OwningPtr<object::Binary> child;
-        if (i->getAsBinary(child)) {
-          // Try opening it as a bitcode file.
-          OwningPtr<MemoryBuffer> buff;
-          if (error_code ec = i->getMemoryBuffer(buff))
-            klee_error("Link with library %s failed: %s", libraryName.c_str(),
-                ec.message().c_str());
-          Module *Result = 0;
-          if (buff)
-            Result = ParseBitcodeFile(buff.get(), Context, &ErrorMessage);
-
-          if (!Result || Linker::LinkModules(module, Result,
-              Linker::DestroySource, &ErrorMessage))
-            klee_error("Link with library %s failed: %s", libraryName.c_str(),
-                ErrorMessage.c_str());
-          delete Result;
-
-          continue;
-        }
-        if (object::ObjectFile *o = dyn_cast<object::ObjectFile>(child.get())) {
-          klee_warning("Link with library: Object file %s in archive %s found. "
-              "Currently not supported.",
-              o->getFileName().data(), libraryName.c_str());
-        }
-      }
+      // Handle in helper
+      if (!linkBCA(a, module, ErrorMessage))
+        klee_error("Link with library %s failed: %s", libraryName.c_str(),
+            ErrorMessage.c_str());
+    }
+    else {
+    	klee_error("Link with library %s failed: Cast to archive failed", libraryName.c_str());
     }
+
   } else if (magic.is_object()) {
     OwningPtr<object::Binary> obj;
     if (object::ObjectFile *o = dyn_cast<object::ObjectFile>(obj.get())) {
@@ -142,6 +380,8 @@ Module *klee::linkWithLibrary(Module *module,
 #endif
 }
 
+
+
 Function *klee::getDirectCallTarget(CallSite cs) {
   Value *v = cs.getCalledValue();
   if (Function *f = dyn_cast<Function>(v)) {