diff options
-rw-r--r-- | lib/Core/SpecialFunctionHandler.cpp | 39 | ||||
-rw-r--r-- | lib/Core/SpecialFunctionHandler.h | 33 | ||||
-rw-r--r-- | lib/Module/ModuleUtil.cpp | 360 | ||||
-rw-r--r-- | test/Runtime/POSIX/Isatty.c | 4 | ||||
-rw-r--r-- | test/lit.cfg | 38 |
5 files changed, 447 insertions, 27 deletions
diff --git a/lib/Core/SpecialFunctionHandler.cpp b/lib/Core/SpecialFunctionHandler.cpp index 04f32780..ca9f7b63 100644 --- a/lib/Core/SpecialFunctionHandler.cpp +++ b/lib/Core/SpecialFunctionHandler.cpp @@ -38,20 +38,14 @@ using namespace klee; /// -struct HandlerInfo { - const char *name; - SpecialFunctionHandler::Handler handler; - bool doesNotReturn; /// Intrinsic terminates the process - bool hasReturnValue; /// Intrinsic has a return value - bool doNotOverride; /// Intrinsic should not be used if already defined -}; + // FIXME: We are more or less committed to requiring an intrinsic // library these days. We can move some of this stuff there, // especially things like realloc which have complicated semantics // w.r.t. forking. Among other things this makes delayed query // dispatch easier to implement. -HandlerInfo handlerInfo[] = { +static SpecialFunctionHandler::HandlerInfo handlerInfo[] = { #define add(name, handler, ret) { name, \ &SpecialFunctionHandler::handler, \ false, ret, false } @@ -117,12 +111,37 @@ HandlerInfo handlerInfo[] = { #undef add }; +SpecialFunctionHandler::const_iterator SpecialFunctionHandler::begin() { + return SpecialFunctionHandler::const_iterator(handlerInfo); +} + +SpecialFunctionHandler::const_iterator SpecialFunctionHandler::end() { + // NULL pointer is sentinel + return SpecialFunctionHandler::const_iterator(0); +} + +SpecialFunctionHandler::const_iterator& SpecialFunctionHandler::const_iterator::operator++() { + ++index; + if ( index >= SpecialFunctionHandler::size()) + { + // Out of range, return .end() + base=0; // Sentinel + index=0; + } + + return *this; +} + +int SpecialFunctionHandler::size() { + return sizeof(handlerInfo)/sizeof(handlerInfo[0]); +} + SpecialFunctionHandler::SpecialFunctionHandler(Executor &_executor) : executor(_executor) {} void SpecialFunctionHandler::prepare() { - unsigned N = sizeof(handlerInfo)/sizeof(handlerInfo[0]); + unsigned N = size(); for (unsigned i=0; i<N; ++i) { HandlerInfo &hi = handlerInfo[i]; @@ -715,3 +734,5 @@ void SpecialFunctionHandler::handleMarkGlobal(ExecutionState &state, mo->isGlobal = true; } } + + diff --git a/lib/Core/SpecialFunctionHandler.h b/lib/Core/SpecialFunctionHandler.h index 02e70ed4..f68c6edb 100644 --- a/lib/Core/SpecialFunctionHandler.h +++ b/lib/Core/SpecialFunctionHandler.h @@ -10,6 +10,7 @@ #ifndef KLEE_SPECIALFUNCTIONHANDLER_H #define KLEE_SPECIALFUNCTIONHANDLER_H +#include <iterator> #include <map> #include <vector> #include <string> @@ -37,6 +38,38 @@ namespace klee { handlers_ty handlers; class Executor &executor; + struct HandlerInfo { + const char *name; + SpecialFunctionHandler::Handler handler; + bool doesNotReturn; /// Intrinsic terminates the process + bool hasReturnValue; /// Intrinsic has a return value + bool doNotOverride; /// Intrinsic should not be used if already defined + }; + + // const_iterator to iterate over stored HandlerInfo + // FIXME: Implement >, >=, <=, < operators + class const_iterator : public std::iterator<std::random_access_iterator_tag, HandlerInfo> + { + private: + value_type* base; + int index; + public: + const_iterator(value_type* hi) : base(hi), index(0) {}; + const_iterator& operator++(); // pre-fix + const_iterator operator++(int); // post-fix + const value_type& operator*() { return base[index];} + const value_type* operator->() { return &(base[index]);} + const value_type& operator[](int i) { return base[i];} + bool operator==(const_iterator& rhs) { return (rhs.base + rhs.index) == (this->base + this->index);} + bool operator!=(const_iterator& rhs) { return !(*this == rhs);} + }; + + static const_iterator begin(); + static const_iterator end(); + static int size(); + + + public: SpecialFunctionHandler(Executor &_executor); diff --git a/lib/Module/ModuleUtil.cpp b/lib/Module/ModuleUtil.cpp index 9ae72936..58096de4 100644 --- a/lib/Module/ModuleUtil.cpp +++ b/lib/Module/ModuleUtil.cpp @@ -11,6 +11,7 @@ #include "klee/Config/Version.h" // FIXME: This does not belong here. #include "../Core/Common.h" +#include "../Core/SpecialFunctionHandler.h" #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3) #include "llvm/Bitcode/ReaderWriter.h" @@ -19,6 +20,12 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IRReader/IRReader.h" #include "llvm/IR/Module.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_os_ostream.h" +#include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/DataStream.h" #else @@ -32,12 +39,14 @@ #include "llvm/Assembly/AssemblyAnnotationWriter.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/Path.h" #include <map> +#include <set> #include <iostream> #include <fstream> #include <sstream> @@ -46,18 +55,349 @@ using namespace llvm; using namespace klee; +#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3) +/// Based on GetAllUndefinedSymbols() from LLVM3.2 +/// +/// GetAllUndefinedSymbols - calculates the set of undefined symbols that still +/// exist in an LLVM module. This is a bit tricky because there may be two +/// symbols with the same name but different LLVM types that will be resolved to +/// each other but aren't currently (thus we need to treat it as resolved). +/// +/// Inputs: +/// M - The module in which to find undefined symbols. +/// +/// Outputs: +/// UndefinedSymbols - A set of C++ strings containing the name of all +/// undefined symbols. +/// +static void +GetAllUndefinedSymbols(Module *M, std::set<std::string> &UndefinedSymbols) { + static const std::string llvmIntrinsicPrefix="llvm."; + std::set<std::string> DefinedSymbols; + UndefinedSymbols.clear(); + DEBUG_WITH_TYPE("klee_linker", dbgs() << "*** Computing undefined symbols ***\n"); + + for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) + if (I->hasName()) { + if (I->isDeclaration()) + UndefinedSymbols.insert(I->getName()); + else if (!I->hasLocalLinkage()) { + assert(!I->hasDLLImportLinkage() + && "Found dllimported non-external symbol!"); + DefinedSymbols.insert(I->getName()); + } + } + + for (Module::global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) + if (I->hasName()) { + if (I->isDeclaration()) + UndefinedSymbols.insert(I->getName()); + else if (!I->hasLocalLinkage()) { + assert(!I->hasDLLImportLinkage() + && "Found dllimported non-external symbol!"); + DefinedSymbols.insert(I->getName()); + } + } + + for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) + if (I->hasName()) + DefinedSymbols.insert(I->getName()); + + + // Prune out any defined symbols from the undefined symbols set + // and other symbols we don't want to treat as an undefined symbol + std::vector<std::string> SymbolsToRemove; + for (std::set<std::string>::iterator I = UndefinedSymbols.begin(); + I != UndefinedSymbols.end(); ++I ) + { + if (DefinedSymbols.count(*I)) + { + SymbolsToRemove.push_back(*I); + continue; + } + + // Strip out llvm intrinsics + if ( (I->size() >= llvmIntrinsicPrefix.size() ) && + (I->compare(0, llvmIntrinsicPrefix.size(), llvmIntrinsicPrefix) == 0) ) + { + DEBUG_WITH_TYPE("klee_linker", dbgs() << "LLVM intrinsic " << *I << + " has will be removed from undefined symbols"<< "\n"); + SymbolsToRemove.push_back(*I); + continue; + } + + // Symbol really is undefined + DEBUG_WITH_TYPE("klee_linker", dbgs() << "Symbol " << *I << " is undefined.\n"); + } + + // Remove KLEE intrinsics from set of undefined symbols + for (SpecialFunctionHandler::const_iterator sf = SpecialFunctionHandler::begin(), + se = SpecialFunctionHandler::end(); sf != se; ++sf) + { + if (UndefinedSymbols.find(sf->name) == UndefinedSymbols.end()) + continue; + + SymbolsToRemove.push_back(sf->name); + DEBUG_WITH_TYPE("klee_linker", dbgs() << "KLEE intrinsic " << sf->name << + " has will be removed from undefined symbols"<< "\n"); + } + + // Now remove the symbols from undefined set. + for (size_t i = 0, j = SymbolsToRemove.size(); i < j; ++i ) + UndefinedSymbols.erase(SymbolsToRemove[i]); + + DEBUG_WITH_TYPE("klee_linker", dbgs() << "*** Finished computing undefined symbols ***\n"); +} + + +/*! A helper function for linkBCA() which cleans up + * memory allocated by that function. + */ +static void CleanUpLinkBCA(std::vector<Module*> &archiveModules) +{ + for (std::vector<Module*>::iterator I = archiveModules.begin(), E = archiveModules.end(); + I != E; ++I) + { + delete (*I); + } +} + +/*! A helper function for klee::linkWithLibrary() that links in an archive of bitcode + * modules into a composite bitcode module + * + * \param[in] archive Archive of bitcode modules + * \param[in,out] composite The bitcode module to link against the archive + * \param[out] errorMessage Set to an error message if linking fails + * + * \return True if linking succeeds otherwise false + */ +static bool linkBCA(object::Archive* archive, Module* composite, std::string& errorMessage) +{ + llvm::raw_string_ostream SS(errorMessage); + std::vector<Module*> archiveModules; + + // Is this efficient? Could we use StringRef instead? + std::set<std::string> undefinedSymbols; + GetAllUndefinedSymbols(composite, undefinedSymbols); + + if (undefinedSymbols.size() == 0) + { + // Nothing to do + DEBUG_WITH_TYPE("klee_linker", dbgs() << "No undefined symbols. Not linking anything in!\n"); + return true; + } + + DEBUG_WITH_TYPE("klee_linker", dbgs() << "Loading modules\n"); + // Load all bitcode files in to memory so we can examine their symbols + for (object::Archive::child_iterator AI = archive->begin_children(), + AE = archive->end_children(); AI != AE; ++AI) + { + + StringRef memberName; + error_code ec = AI->getName(memberName); + + if ( ec == errc::success ) + { + DEBUG_WITH_TYPE("klee_linker", dbgs() << "Loading archive member " << memberName << "\n"); + } + else + { + errorMessage="Archive member does not have a name!\n"; + return false; + } + + OwningPtr<object::Binary> child; + ec = AI->getAsBinary(child); + if (ec != object::object_error::success) + { + // If we can't open as a binary object file its hopefully a bitcode file + + OwningPtr<MemoryBuffer> buff; // Once this is destroyed will Module still be valid?? + Module *Result = 0; + + if (error_code ec = AI->getMemoryBuffer(buff)) + { + SS << "Failed to get MemoryBuffer: " <<ec.message(); + SS.flush(); + return false; + } + + if (buff) + { + // FIXME: Maybe load bitcode file lazily? Then if we need to link, materialise the module + Result = ParseBitcodeFile(buff.get(), getGlobalContext(), &errorMessage); + + if(!Result) + { + SS << "Loading module failed : " << errorMessage << "\n"; + SS.flush(); + return false; + } + archiveModules.push_back(Result); + } + else + { + errorMessage="Buffer was NULL!"; + return false; + } + + } + else if (object::ObjectFile *o = dyn_cast<object::ObjectFile>(child.get())) + { + SS << "Object file " << o->getFileName().data() << + " in archive is not supported"; + SS.flush(); + return false; + } + else + { + SS << "Loading archive child with error "<< ec.message(); + SS.flush(); + return false; + } + + } + + DEBUG_WITH_TYPE("klee_linker", dbgs() << "Loaded " << archiveModules.size() << " modules\n"); + + + std::set<std::string> previouslyUndefinedSymbols; + + // Walk through the modules looking for definitions of undefined symbols + // if we find a match we should link that module in. + unsigned int passCounter=0; + do + { + unsigned int modulesLoadedOnPass=0; + previouslyUndefinedSymbols = undefinedSymbols; + + for (size_t i = 0, j = archiveModules.size(); i < j; ++i) + { + // skip empty archives + if (archiveModules[i] == 0) + continue; + Module * M = archiveModules[i]; + // Look for the undefined symbols in the composite module + for (std::set<std::string>::iterator S = undefinedSymbols.begin(), SE = undefinedSymbols.end(); + S != SE; ++S) + { + + // FIXME: We aren't handling weak symbols here! + // However the algorithm used in LLVM3.2 didn't seem to either + // so maybe it doesn't matter? + + if ( GlobalValue* GV = dyn_cast_or_null<GlobalValue>(M->getValueSymbolTable().lookup(*S))) + { + if (GV->isDeclaration()) continue; // Not a definition + + DEBUG_WITH_TYPE("klee_linker", dbgs() << "Found " << GV->getName() << + " in " << M->getModuleIdentifier() << "\n"); + + + if (Linker::LinkModules(composite, M, Linker::DestroySource, &errorMessage)) + { + // Linking failed + SS << "Linking archive module with composite failed:" << errorMessage; + SS.flush(); + CleanUpLinkBCA(archiveModules); + return false; + } + else + { + // Link succeed, now clean up + modulesLoadedOnPass++; + DEBUG_WITH_TYPE("klee_linker", dbgs() << "Linking succeeded.\n"); + + delete M; + archiveModules[i] = 0; + + // We need to recompute the undefined symbols in the composite module + // after linking + GetAllUndefinedSymbols(composite, undefinedSymbols); + + break; // Look for symbols in next module + } + + } + } + } + + passCounter++; + DEBUG_WITH_TYPE("klee_linker", dbgs() << "Completed " << passCounter << + " linker passes.\n" << modulesLoadedOnPass << + " modules loaded on the last pass\n"); + } while (undefinedSymbols != previouslyUndefinedSymbols); // Iterate until we reach a fixed point + + + // What's left in archiveModules we don't want to link in so free it + CleanUpLinkBCA(archiveModules); + + return true; + +} +#endif + + Module *klee::linkWithLibrary(Module *module, const std::string &libraryName) { +DEBUG_WITH_TYPE("klee_linker", dbgs() << "Linking file " << libraryName << "\n"); #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 3) - SMDiagnostic err; - std::string err_str; - sys::Path libraryPath(libraryName); - Module *new_mod = ParseIRFile(libraryPath.str(), err, -module->getContext()); - - if (Linker::LinkModules(module, new_mod, Linker::DestroySource, -&err_str)) { - klee_error("Linking library %s failed", libraryName.c_str()); + if (!sys::fs::exists(libraryName)) { + klee_error("Link with library %s failed. No such file.", + libraryName.c_str()); + } + + OwningPtr<MemoryBuffer> Buffer; + if (error_code ec = MemoryBuffer::getFile(libraryName,Buffer)) { + klee_error("Link with library %s failed: %s", libraryName.c_str(), + ec.message().c_str()); + } + + sys::fs::file_magic magic = sys::fs::identify_magic(Buffer->getBuffer()); + + LLVMContext &Context = getGlobalContext(); + std::string ErrorMessage; + + if (magic == sys::fs::file_magic::bitcode) { + Module *Result = 0; + Result = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage); + + + if (!Result || Linker::LinkModules(module, Result, Linker::DestroySource, + &ErrorMessage)) + klee_error("Link with library %s failed: %s", libraryName.c_str(), + ErrorMessage.c_str()); + + delete Result; + + } else if (magic == sys::fs::file_magic::archive) { + OwningPtr<object::Binary> arch; + if (error_code ec = object::createBinary(Buffer.take(), arch)) + klee_error("Link with library %s failed: %s", libraryName.c_str(), + ec.message().c_str()); + + if (object::Archive *a = dyn_cast<object::Archive>(arch.get())) { + // Handle in helper + if (!linkBCA(a, module, ErrorMessage)) + klee_error("Link with library %s failed: %s", libraryName.c_str(), + ErrorMessage.c_str()); + } + else { + klee_error("Link with library %s failed: Cast to archive failed", libraryName.c_str()); + } + + } else if (magic.is_object()) { + OwningPtr<object::Binary> obj; + if (object::ObjectFile *o = dyn_cast<object::ObjectFile>(obj.get())) { + klee_warning("Link with library: Object file %s in archive %s found. " + "Currently not supported.", + o->getFileName().data(), libraryName.c_str()); + } + } else { + klee_error("Link with library %s failed: Unrecognized file type.", + libraryName.c_str()); } return module; @@ -75,6 +415,8 @@ module->getContext()); #endif } + + Function *klee::getDirectCallTarget(CallSite cs) { Value *v = cs.getCalledValue(); if (Function *f = dyn_cast<Function>(v)) { diff --git a/test/Runtime/POSIX/Isatty.c b/test/Runtime/POSIX/Isatty.c index bd514d48..3054aadb 100644 --- a/test/Runtime/POSIX/Isatty.c +++ b/test/Runtime/POSIX/Isatty.c @@ -1,5 +1,5 @@ // RUN: %llvmgcc %s -emit-llvm -O0 -c -o %t.bc -// RUN: %klee --libc=uclibc --posix-runtime %t.bc --sym-files 0 10 --sym-stdout 2>%t.log +// RUN: %klee --libc=uclibc --posix-runtime %t.bc --sym-files 0 10 --sym-stdout > %t.log 2>&1 // RUN: test -f %T/klee-last/test000001.ktest // RUN: test -f %T/klee-last/test000002.ktest // RUN: test -f %T/klee-last/test000003.ktest @@ -10,7 +10,7 @@ // RUN: grep -q "stdout is NOT a tty" %t.log // Depending on how uClibc is compiled (i.e. without -DKLEE_SYM_PRINTF) -// fprintf prints out on stdout even stderr is provided. +// fprintf prints out on stdout even if stderr is provided. #include <unistd.h> #include <stdio.h> #include <assert.h> diff --git a/test/lit.cfg b/test/lit.cfg index 36a79b42..23696138 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -36,9 +36,20 @@ if klee_obj_root is not None: path = os.path.pathsep.join((llvm_tools_dir, klee_tools_dir, config.environment['PATH'] )) config.environment['PATH'] = path -# Propogate 'HOME' through the environment. -config.environment['HOME'] = os.environ['HOME'] -config.environment['PWD'] = os.environ['PWD'] + +# Propogate some environment variable to test environment. +def addEnv(name): + if name in os.environ: + config.environment[name] = os.environ[name] + +addEnv('HOME') +addEnv('PWD') + +# llvm-gcc on Ubuntu needs to be told where to look +# for headers. If user has these in their environment +# we should propagate to test environment +addEnv('C_INCLUDE_PATH') +addEnv('CPLUS_INCLUDE_PATH') # Check that the object root is known. if config.test_exec_root is None: @@ -53,10 +64,23 @@ for name in subs: lit.fatal('{0} is not set'.format(name)) config.substitutions.append( ('%' + name, value)) -# Set absolute paths for KLEE's tools -subs = [ ('%kleaver', 'kleaver'), ('%klee','klee') ] -for s,basename in subs: - config.substitutions.append( (s, os.path.join(klee_tools_dir, basename) ) ) +# Get KLEE and Kleaver specific parameters passed on llvm-lit cmd line +# e.g. llvm-lit --param klee_opts=--help +klee_extra_params = lit.params.get('klee_opts',"") +kleaver_extra_params = lit.params.get('kleaver_opts',"") + +if len(klee_extra_params) != 0: + print("Passing extra KLEE command line args: {0}".format(klee_extra_params)) +if len(kleaver_extra_params) != 0: + print("Passing extra Kleaver command line args: {0}".format(kleaver_extra_params)) + +# Set absolute paths and extra cmdline args for KLEE's tools +subs = [ ('%kleaver', 'kleaver', kleaver_extra_params), ('%klee','klee', klee_extra_params) ] +for s,basename,extra_args in subs: + config.substitutions.append( ( s, + "{0} {1}".format( os.path.join(klee_tools_dir, basename), extra_args ) + ) + ) # LLVM < 3.0 doesn't Support %T directive |