//===-- KModule.cpp -------------------------------------------------------===// // // The KLEE Symbolic Virtual Machine // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "KModule" #include "Passes.h" #include "klee/Config/Version.h" #include "klee/Internal/Module/Cell.h" #include "klee/Internal/Module/InstructionInfoTable.h" #include "klee/Internal/Module/KInstruction.h" #include "klee/Internal/Module/KModule.h" #include "klee/Internal/Support/Debug.h" #include "klee/Internal/Support/ErrorHandling.h" #include "klee/Internal/Support/ModuleUtil.h" #include "klee/Interpreter.h" #include "klee/OptionCategories.h" #if LLVM_VERSION_CODE >= LLVM_VERSION(4, 0) #include "llvm/Bitcode/BitcodeWriter.h" #else #include "llvm/Bitcode/ReaderWriter.h" #endif #include "llvm/IR/DataLayout.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueSymbolTable.h" #if LLVM_VERSION_CODE < LLVM_VERSION(3, 5) #include "llvm/Analysis/Verifier.h" #include "llvm/Linker.h" #include "llvm/Support/CallSite.h" #else #include "llvm/IR/CallSite.h" #include "llvm/IR/Verifier.h" #include "llvm/Linker/Linker.h" #endif #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_os_ostream.h" #include "llvm/Support/Path.h" #include "llvm/Transforms/Scalar.h" #if LLVM_VERSION_CODE >= LLVM_VERSION(8, 0) #include "llvm/Transforms/Scalar/Scalarizer.h" #endif #include "llvm/Transforms/Utils/Cloning.h" #if LLVM_VERSION_CODE >= LLVM_VERSION(7, 0) #include "llvm/Transforms/Utils.h" #endif #include using namespace llvm; using namespace klee; namespace klee { cl::OptionCategory ModuleCat("Module-related options", "These options affect the compile-time processing of the code."); } namespace { enum SwitchImplType { eSwitchTypeSimple, eSwitchTypeLLVM, eSwitchTypeInternal }; cl::opt OutputSource("output-source", cl::desc("Write the assembly for the final transformed source (default=true)"), cl::init(true), cl::cat(ModuleCat)); cl::opt OutputModule("output-module", cl::desc("Write the bitcode for the final transformed module"), cl::init(false), cl::cat(ModuleCat)); cl::opt SwitchType("switch-type", cl::desc("Select the implementation of switch (default=internal)"), cl::values(clEnumValN(eSwitchTypeSimple, "simple", "lower to ordered branches"), clEnumValN(eSwitchTypeLLVM, "llvm", "lower using LLVM"), clEnumValN(eSwitchTypeInternal, "internal", "execute switch internally") KLEE_LLVM_CL_VAL_END), cl::init(eSwitchTypeInternal), cl::cat(ModuleCat)); cl::opt DebugPrintEscapingFunctions("debug-print-escaping-functions", cl::desc("Print functions whose address is taken (default=false)"), cl::cat(ModuleCat)); // Don't run VerifierPass when checking module cl::opt DontVerify("disable-verify", cl::desc("Do not verify the module integrity (default=false)"), cl::init(false), cl::cat(klee::ModuleCat)); cl::opt OptimiseKLEECall("klee-call-optimisation", cl::desc("Allow optimization of functions that " "contain KLEE calls (default=true)"), cl::init(true), cl::cat(ModuleCat)); } /***/ namespace llvm { extern void Optimize(Module *, llvm::ArrayRef preservedFunctions); } // what a hack static Function *getStubFunctionForCtorList(Module *m, GlobalVariable *gv, std::string name) { assert(!gv->isDeclaration() && !gv->hasInternalLinkage() && "do not support old LLVM style constructor/destructor lists"); std::vector nullary; Function *fn = Function::Create(FunctionType::get(Type::getVoidTy(m->getContext()), nullary, false), GlobalVariable::InternalLinkage, name, m); BasicBlock *bb = BasicBlock::Create(m->getContext(), "entry", fn); llvm::IRBuilder<> Builder(bb); // From lli: // Should be an array of '{ int, void ()* }' structs. The first value is // the init priority, which we ignore. auto arr = dyn_cast(gv->getInitializer()); if (arr) { for (unsigned i=0; igetNumOperands(); i++) { auto cs = cast(arr->getOperand(i)); #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 5) // There is a third *optional* element in global_ctor elements (``i8 // @data``). assert((cs->getNumOperands() == 2 || cs->getNumOperands() == 3) && "unexpected element in ctor initializer list"); #else assert(cs->getNumOperands()==2 && "unexpected element in ctor initializer list"); #endif auto fp = cs->getOperand(1); if (!fp->isNullValue()) { if (auto ce = dyn_cast(fp)) fp = ce->getOperand(0); if (auto f = dyn_cast(fp)) { Builder.CreateCall(f); } else { assert(0 && "unable to get function pointer from ctor initializer list"); } } } } Builder.CreateRetVoid(); return fn; } static void injectStaticConstructorsAndDestructors(Module *m, llvm::StringRef entryFunction) { GlobalVariable *ctors = m->getNamedGlobal("llvm.global_ctors"); GlobalVariable *dtors = m->getNamedGlobal("llvm.global_dtors"); if (!ctors && !dtors) return; Function *mainFn = m->getFunction(entryFunction); if (!mainFn) klee_error("Entry function '%s' not found in module.", entryFunction.str().c_str()); if (ctors) { #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 8) llvm::IRBuilder<> Builder(&*mainFn->begin()->begin()); #else llvm::IRBuilder<> Builder(mainFn->begin()->begin()); #endif Builder.CreateCall(getStubFunctionForCtorList(m, ctors, "klee.ctor_stub")); } if (dtors) { Function *dtorStub = getStubFunctionForCtorList(m, dtors, "klee.dtor_stub"); for (Function::iterator it = mainFn->begin(), ie = mainFn->end(); it != ie; ++it) { if (isa(it->getTerminator())) { llvm::IRBuilder<> Builder(it->getTerminator()); Builder.CreateCall(dtorStub); } } } } void KModule::addInternalFunction(const char* functionName){ Function* internalFunction = module->getFunction(functionName); if (!internalFunction) { KLEE_DEBUG(klee_warning( "Failed to add internal function %s. Not found.", functionName)); return ; } KLEE_DEBUG(klee_message("Added function %s.",functionName)); internalFunctions.insert(internalFunction); } bool KModule::link(std::vector> &modules, const std::string &entryPoint) { auto numRemainingModules = modules.size(); // Add the currently active module to the list of linkables modules.push_back(std::move(module)); std::string error; module = std::unique_ptr( klee::linkModules(modules, entryPoint, error)); if (!module) klee_error("Could not link KLEE files %s", error.c_str()); targetData = std::unique_ptr(new DataLayout(module.get())); // Check if we linked anything return modules.size() != numRemainingModules; } void KModule::instrument(const Interpreter::ModuleOptions &opts) { // Inject checks prior to optimization... we also perform the // invariant transformations that we will end up doing later so that // optimize is seeing what is as close as possible to the final // module. legacy::PassManager pm; pm.add(new RaiseAsmPass()); // This pass will scalarize as much code as possible so that the Executor // does not need to handle operands of vector type for most instructions // other than InsertElementInst and ExtractElementInst. // // NOTE: Must come before division/overshift checks because those passes // don't know how to handle vector instructions. pm.add(createScalarizerPass()); // This pass will replace atomic instructions with non-atomic operations pm.add(createLowerAtomicPass()); if (opts.CheckDivZero) pm.add(new DivCheckPass()); if (opts.CheckOvershift) pm.add(new OvershiftCheckPass()); pm.add(new IntrinsicCleanerPass(*targetData)); pm.run(*module); } void KModule::optimiseAndPrepare( const Interpreter::ModuleOptions &opts, llvm::ArrayRef preservedFunctions) { // Preserve all functions containing klee-related function calls from being // optimised around if (!OptimiseKLEECall) { legacy::PassManager pm; pm.add(new OptNonePass()); pm.run(*module); } if (opts.Optimize) Optimize(module.get(), preservedFunctions); // Add internal functions which are not used to check if instructions // have been already visited if (opts.CheckDivZero) addInternalFunction("klee_div_zero_check"); if (opts.CheckOvershift) addInternalFunction("klee_overshift_check"); // Needs to happen after linking (since ctors/dtors can be modified) // and optimization (since global optimization can rewrite lists). injectStaticConstructorsAndDestructors(module.get(), opts.EntryPoint); // Finally, run the passes that maintain invariants we expect during // interpretation. We run the intrinsic cleaner just in case we // linked in something with intrinsics but any external calls are // going to be unresolved. We really need to handle the intrinsics // directly I think? legacy::PassManager pm3; pm3.add(createCFGSimplificationPass()); switch(SwitchType) { case eSwitchTypeInternal: break; case eSwitchTypeSimple: pm3.add(new LowerSwitchPass()); break; case eSwitchTypeLLVM: pm3.add(createLowerSwitchPass()); break; default: klee_error("invalid --switch-type"); } pm3.add(new IntrinsicCleanerPass(*targetData)); pm3.add(new PhiCleanerPass()); pm3.run(*module); } void KModule::manifest(InterpreterHandler *ih, bool forceSourceOutput) { if (OutputSource || forceSourceOutput) { std::unique_ptr os(ih->openOutputFile("assembly.ll")); assert(os && !os->has_error() && "unable to open source output"); *os << *module; } if (OutputModule) { std::unique_ptr f(ih->openOutputFile("final.bc")); #if LLVM_VERSION_CODE >= LLVM_VERSION(7, 0) WriteBitcodeToFile(*module, *f); #else WriteBitcodeToFile(module.get(), *f); #endif } /* Build shadow structures */ infos = std::unique_ptr( new InstructionInfoTable(*module.get())); std::vector declarations; for (auto &Function : *module) { if (Function.isDeclaration()) { declarations.push_back(&Function); continue; } auto kf = std::unique_ptr(new KFunction(&Function, this)); for (unsigned i=0; inumInstructions; ++i) { KInstruction *ki = kf->instructions[i]; ki->info = &infos->getInfo(*ki->inst); } functionMap.insert(std::make_pair(&Function, kf.get())); functions.push_back(std::move(kf)); } /* Compute various interesting properties */ for (auto &kf : functions) { if (functionEscapes(kf->function)) escapingFunctions.insert(kf->function); } for (auto &declaration : declarations) { if (functionEscapes(declaration)) escapingFunctions.insert(declaration); } if (DebugPrintEscapingFunctions && !escapingFunctions.empty()) { llvm::errs() << "KLEE: escaping functions: ["; std::string delimiter = ""; for (auto &Function : escapingFunctions) { llvm::errs() << delimiter << Function->getName(); delimiter = ", "; } llvm::errs() << "]\n"; } } void KModule::checkModule() { InstructionOperandTypeCheckPass *operandTypeCheckPass = new InstructionOperandTypeCheckPass(); legacy::PassManager pm; if (!DontVerify) pm.add(createVerifierPass()); pm.add(operandTypeCheckPass); pm.run(*module); // Enforce the operand type invariants that the Executor expects. This // implicitly depends on the "Scalarizer" pass to be run in order to succeed // in the presence of vector instructions. if (!operandTypeCheckPass->checkPassed()) { klee_error("Unexpected instruction operand types detected"); } } KConstant* KModule::getKConstant(const Constant *c) { auto it = constantMap.find(c); if (it != constantMap.end()) return it->second.get(); return NULL; } unsigned KModule::getConstantID(Constant *c, KInstruction* ki) { if (KConstant *kc = getKConstant(c)) return kc->id; unsigned id = constants.size(); auto kc = std::unique_ptr(new KConstant(c, id, ki)); constantMap.insert(std::make_pair(c, std::move(kc))); constants.push_back(c); return id; } /***/ KConstant::KConstant(llvm::Constant* _ct, unsigned _id, KInstruction* _ki) { ct = _ct; id = _id; ki = _ki; } /***/ static int getOperandNum(Value *v, std::map ®isterMap, KModule *km, KInstruction *ki) { if (Instruction *inst = dyn_cast(v)) { return registerMap[inst]; } else if (Argument *a = dyn_cast(v)) { return a->getArgNo(); #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 6) // Metadata is no longer a Value } else if (isa(v) || isa(v) || isa(v)) { #else } else if (isa(v) || isa(v) || isa(v)) { #endif return -1; } else { assert(isa(v)); Constant *c = cast(v); return -(km->getConstantID(c, ki) + 2); } } KFunction::KFunction(llvm::Function *_function, KModule *km) : function(_function), numArgs(function->arg_size()), numInstructions(0), trackCoverage(true) { // Assign unique instruction IDs to each basic block for (auto &BasicBlock : *function) { basicBlockEntry[&BasicBlock] = numInstructions; numInstructions += BasicBlock.size(); } instructions = new KInstruction*[numInstructions]; std::map registerMap; // The first arg_size() registers are reserved for formals. unsigned rnum = numArgs; for (llvm::Function::iterator bbit = function->begin(), bbie = function->end(); bbit != bbie; ++bbit) { for (llvm::BasicBlock::iterator it = bbit->begin(), ie = bbit->end(); it != ie; ++it) registerMap[&*it] = rnum++; } numRegisters = rnum; unsigned i = 0; for (llvm::Function::iterator bbit = function->begin(), bbie = function->end(); bbit != bbie; ++bbit) { for (llvm::BasicBlock::iterator it = bbit->begin(), ie = bbit->end(); it != ie; ++it) { KInstruction *ki; switch(it->getOpcode()) { case Instruction::GetElementPtr: case Instruction::InsertValue: case Instruction::ExtractValue: ki = new KGEPInstruction(); break; default: ki = new KInstruction(); break; } Instruction *inst = &*it; ki->inst = inst; ki->dest = registerMap[inst]; if (isa(it) || isa(it)) { CallSite cs(inst); unsigned numArgs = cs.arg_size(); ki->operands = new int[numArgs+1]; ki->operands[0] = getOperandNum(cs.getCalledValue(), registerMap, km, ki); for (unsigned j=0; joperands[j+1] = getOperandNum(v, registerMap, km, ki); } } else { unsigned numOperands = it->getNumOperands(); ki->operands = new int[numOperands]; for (unsigned j=0; jgetOperand(j); ki->operands[j] = getOperandNum(v, registerMap, km, ki); } } instructions[i++] = ki; } } } KFunction::~KFunction() { for (unsigned i=0; i