From 6f290d8f9e9d7faac295cb51fc96884a18f4ded4 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Thu, 21 May 2009 04:36:41 +0000 Subject: Initial KLEE checkin. - Lots more tweaks, documentation, and web page content is needed, but this should compile & work on OS X & Linux. git-svn-id: https://llvm.org/svn/llvm-project/klee/trunk@72205 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Module/KModule.cpp | 506 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 506 insertions(+) create mode 100644 lib/Module/KModule.cpp (limited to 'lib/Module/KModule.cpp') diff --git a/lib/Module/KModule.cpp b/lib/Module/KModule.cpp new file mode 100644 index 00000000..5d88fbda --- /dev/null +++ b/lib/Module/KModule.cpp @@ -0,0 +1,506 @@ +//===-- KModule.cpp -------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// FIXME: This does not belong here. +#include "../Core/Common.h" + +#include "klee/Internal/Module/KModule.h" + +#include "Passes.h" + +#include "klee/Interpreter.h" +#include "klee/Internal/Module/Cell.h" +#include "klee/Internal/Module/KInstruction.h" +#include "klee/Internal/Module/InstructionInfoTable.h" +#include "klee/Internal/Support/ModuleUtil.h" + +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/ValueSymbolTable.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/System/Path.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Scalar.h" + +#include + +using namespace llvm; +using namespace klee; + +namespace { + enum SwitchImplType { + eSwitchTypeSimple, + eSwitchTypeLLVM, + eSwitchTypeInternal + }; + + cl::list + MergeAtExit("merge-at-exit"); + + cl::opt + NoTruncateSourceLines("no-truncate-source-lines", + cl::desc("Don't truncate long lines in the output source")); + + cl::opt + OutputSource("output-source", + cl::desc("Write the assembly for the final transformed source"), + cl::init(true)); + + cl::opt + OutputModule("output-module", + cl::desc("Write the bitcode for the final transformed module"), + cl::init(false)); + + cl::opt + SwitchType("switch-type", cl::desc("Select the implementation of switch"), + cl::values(clEnumValN(eSwitchTypeSimple, "simple", + "lower to ordered branches"), + clEnumValN(eSwitchTypeLLVM, "llvm", + "lower using LLVM"), + clEnumValN(eSwitchTypeInternal, "internal", + "execute switch internally"), + clEnumValEnd), + cl::init(eSwitchTypeInternal)); + + cl::opt + DebugPrintEscapingFunctions("debug-print-escaping-functions", + cl::desc("Print functions whose address is taken.")); +} + +KModule::KModule(Module *_module) + : module(_module), + targetData(new TargetData(module)), + dbgStopPointFn(0), + kleeMergeFn(0), + infos(0), + constantTable(0) { +} + +KModule::~KModule() { + delete[] constantTable; + delete infos; + + for (std::vector::iterator it = functions.begin(), + ie = functions.end(); it != ie; ++it) + delete *it; + + delete targetData; + delete module; +} + +/***/ + +namespace llvm { +extern void Optimize(Module*); +} + +// what a hack +static Function *getStubFunctionForCtorList(Module *m, + GlobalVariable *gv, + std::string name) { + assert(!gv->isDeclaration() && !gv->hasInternalLinkage() && + "do not support old LLVM style constructor/destructor lists"); + + std::vector nullary; + + Function *fn = Function::Create(FunctionType::get(Type::VoidTy, + nullary, false), + GlobalVariable::InternalLinkage, + name, + m); + BasicBlock *bb = BasicBlock::Create("entry", fn); + + // From lli: + // Should be an array of '{ int, void ()* }' structs. The first value is + // the init priority, which we ignore. + ConstantArray *arr = dyn_cast(gv->getInitializer()); + if (arr) { + for (unsigned i=0; igetNumOperands(); i++) { + ConstantStruct *cs = cast(arr->getOperand(i)); + assert(cs->getNumOperands()==2 && "unexpected element in ctor initializer list"); + + Constant *fp = cs->getOperand(1); + if (!fp->isNullValue()) { + if (llvm::ConstantExpr *ce = dyn_cast(fp)) + fp = ce->getOperand(0); + + if (Function *f = dyn_cast(fp)) { + CallInst::Create(f, "", bb); + } else { + assert(0 && "unable to get function pointer from ctor initializer list"); + } + } + } + } + + ReturnInst::Create(bb); + + return fn; +} + +static void injectStaticConstructorsAndDestructors(Module *m) { + GlobalVariable *ctors = m->getNamedGlobal("llvm.global_ctors"); + GlobalVariable *dtors = m->getNamedGlobal("llvm.global_dtors"); + + if (ctors || dtors) { + Function *mainFn = m->getFunction("main"); + assert(mainFn && "unable to find main function"); + + if (ctors) + CallInst::Create(getStubFunctionForCtorList(m, ctors, "klee.ctor_stub"), + "", mainFn->begin()->begin()); + if (dtors) { + Function *dtorStub = getStubFunctionForCtorList(m, dtors, "klee.dtor_stub"); + for (Function::iterator it = mainFn->begin(), ie = mainFn->end(); + it != ie; ++it) { + if (isa(it->getTerminator())) + CallInst::Create(dtorStub, "", it->getTerminator()); + } + } + } +} + +static void forceImport(Module *m, const char *name, const Type *retType, ...) { + // If module lacks an externally visible symbol for the name then we + // need to create one. We have to look in the symbol table because + // we want to check everything (global variables, functions, and + // aliases). + + Value *v = m->getValueSymbolTable().lookup(name); + GlobalValue *gv = dyn_cast_or_null(v); + + if (!gv || gv->hasInternalLinkage()) { + va_list ap; + + va_start(ap, retType); + std::vector argTypes; + while (const Type *t = va_arg(ap, const Type*)) + argTypes.push_back(t); + va_end(ap); + + m->getOrInsertFunction(name, FunctionType::get(retType, argTypes, false)); + } +} + +void KModule::prepare(const Interpreter::ModuleOptions &opts, + InterpreterHandler *ih) { + if (!MergeAtExit.empty()) { + Function *mergeFn = module->getFunction("klee_merge"); + if (!mergeFn) { + const llvm::FunctionType *Ty = + FunctionType::get(Type::VoidTy, std::vector(), false); + mergeFn = Function::Create(Ty, GlobalVariable::ExternalLinkage, + "klee_merge", + module); + } + + for (cl::list::iterator it = MergeAtExit.begin(), + ie = MergeAtExit.end(); it != ie; ++it) { + std::string &name = *it; + Function *f = module->getFunction(name); + if (!f) { + klee_error("cannot insert merge-at-exit for: %s (cannot find)", + name.c_str()); + } else if (f->isDeclaration()) { + klee_error("cannot insert merge-at-exit for: %s (external)", + name.c_str()); + } + + BasicBlock *exit = BasicBlock::Create("exit", f); + PHINode *result = 0; + if (f->getReturnType() != Type::VoidTy) + result = PHINode::Create(f->getReturnType(), "retval", exit); + CallInst::Create(mergeFn, "", exit); + ReturnInst::Create(result, exit); + + llvm::cerr << "KLEE: adding klee_merge at exit of: " << name << "\n"; + for (llvm::Function::iterator bbit = f->begin(), bbie = f->end(); + bbit != bbie; ++bbit) { + if (&*bbit != exit) { + Instruction *i = bbit->getTerminator(); + if (i->getOpcode()==Instruction::Ret) { + if (result) { + result->addIncoming(i->getOperand(0), bbit); + } + i->eraseFromParent(); + BranchInst::Create(exit, bbit); + } + } + } + } + } + + // Inject checks prior to optimization... we also perform the + // invariant transformations that we will end up doing later so that + // optimize is seeing what is as close as possible to the final + // module. + PassManager pm; + pm.add(new RaiseAsmPass()); + if (opts.CheckDivZero) pm.add(new DivCheckPass()); + // FIXME: This false here is to work around a bug in + // IntrinsicLowering which caches values which may eventually be + // deleted (via RAUW). This can be removed once LLVM fixes this + // issue. + pm.add(new IntrinsicCleanerPass(*targetData, false)); + pm.run(*module); + + if (opts.Optimize) + Optimize(module); + + // Force importing functions required by intrinsic lowering. Kind of + // unfortunate clutter when we don't need them but we won't know + // that until after all linking and intrinsic lowering is + // done. After linking and passes we just try to manually trim these + // by name. We only add them if such a function doesn't exist to + // avoid creating stale uses. + + forceImport(module, "memcpy", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + targetData->getIntPtrType(), (Type*) 0); + forceImport(module, "memmove", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + targetData->getIntPtrType(), (Type*) 0); + forceImport(module, "memset", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + Type::Int32Ty, + targetData->getIntPtrType(), (Type*) 0); + + // FIXME: Missing force import for various math functions. + + // FIXME: Find a way that we can test programs without requiring + // this to be linked in, it makes low level debugging much more + // annoying. + llvm::sys::Path path(opts.LibraryDir); + path.appendComponent("libintrinsic.bca"); + module = linkWithLibrary(module, path.c_str()); + + // Needs to happen after linking (since ctors/dtors can be modified) + // and optimization (since global optimization can rewrite lists). + injectStaticConstructorsAndDestructors(module); + + // Finally, run the passes that maintain invariants we expect during + // interpretation. We run the intrinsic cleaner just in case we + // linked in something with intrinsics but any external calls are + // going to be unresolved. We really need to handle the intrinsics + // directly I think? + PassManager pm3; + pm3.add(createCFGSimplificationPass()); + switch(SwitchType) { + case eSwitchTypeInternal: break; + case eSwitchTypeSimple: pm3.add(new LowerSwitchPass()); break; + case eSwitchTypeLLVM: pm3.add(createLowerSwitchPass()); break; + default: klee_error("invalid --switch-type"); + } + pm3.add(new IntrinsicCleanerPass(*targetData)); + pm3.add(new PhiCleanerPass()); + pm3.run(*module); + + // For cleanliness see if we can discard any of the functions we + // forced to import. + Function *f; + f = module->getFunction("memcpy"); + if (f && f->use_empty()) f->eraseFromParent(); + f = module->getFunction("memmove"); + if (f && f->use_empty()) f->eraseFromParent(); + f = module->getFunction("memset"); + if (f && f->use_empty()) f->eraseFromParent(); + + + // Write out the .ll assembly file. We truncate long lines to work + // around a kcachegrind parsing bug (it puts them on new lines), so + // that source browsing works. + if (OutputSource) { + std::ostream *os = ih->openOutputFile("assembly.ll"); + assert(os && os->good() && "unable to open source output"); + + // We have an option for this in case the user wants a .ll they + // can compile. + if (NoTruncateSourceLines) { + *os << *module; + } else { + bool truncated = false; + std::stringstream buffer; + buffer << *module; + std::string string = buffer.str(); + const char *position = string.c_str(); + + for (;;) { + const char *end = index(position, '\n'); + if (!end) { + *os << position; + break; + } else { + unsigned count = (end - position) + 1; + if (count<255) { + os->write(position, count); + } else { + os->write(position, 254); + *os << "\n"; + truncated = true; + } + position = end+1; + } + } + } + + delete os; + } + + if (OutputModule) { + std::ostream *f = ih->openOutputFile("final.bc"); + WriteBitcodeToFile(module, *f); + delete f; + } + + dbgStopPointFn = module->getFunction("llvm.dbg.stoppoint"); + kleeMergeFn = module->getFunction("klee_merge"); + + /* Build shadow structures */ + + infos = new InstructionInfoTable(module); + + for (Module::iterator it = module->begin(), ie = module->end(); + it != ie; ++it) { + if (it->isDeclaration()) + continue; + + KFunction *kf = new KFunction(it, this); + + for (unsigned i=0; inumInstructions; ++i) { + KInstruction *ki = kf->instructions[i]; + ki->info = &infos->getInfo(ki->inst); + } + + functions.push_back(kf); + functionMap.insert(std::make_pair(it, kf)); + } + + /* Compute various interesting properties */ + + for (std::vector::iterator it = functions.begin(), + ie = functions.end(); it != ie; ++it) { + KFunction *kf = *it; + if (functionEscapes(kf->function)) + escapingFunctions.insert(kf->function); + } + + if (DebugPrintEscapingFunctions && !escapingFunctions.empty()) { + llvm::cerr << "KLEE: escaping functions: ["; + for (std::set::iterator it = escapingFunctions.begin(), + ie = escapingFunctions.end(); it != ie; ++it) { + llvm::cerr << (*it)->getName() << ", "; + } + llvm::cerr << "]\n"; + } +} + +KConstant* KModule::getKConstant(Constant *c) { + std::map::iterator it = constantMap.find(c); + if (it != constantMap.end()) + return it->second; + return NULL; +} + +unsigned KModule::getConstantID(Constant *c, KInstruction* ki) { + KConstant *kc = getKConstant(c); + if (kc) + return kc->id; + + unsigned id = constants.size(); + kc = new KConstant(c, id, ki); + constantMap.insert(std::make_pair(c, kc)); + constants.push_back(c); + return id; +} + +/***/ + +KConstant::KConstant(llvm::Constant* _ct, unsigned _id, KInstruction* _ki) { + ct = _ct; + id = _id; + ki = _ki; +} + +/***/ + +KFunction::KFunction(llvm::Function *_function, + KModule *km) + : function(_function), + numArgs(function->arg_size()), + numInstructions(0), + trackCoverage(true) { + for (llvm::Function::iterator bbit = function->begin(), + bbie = function->end(); bbit != bbie; ++bbit) { + BasicBlock *bb = bbit; + basicBlockEntry[bb] = numInstructions; + numInstructions += bb->size(); + } + + instructions = new KInstruction*[numInstructions]; + + std::map registerMap; + + // The first arg_size() registers are reserved for formals. + unsigned rnum = numArgs; + for (llvm::Function::iterator bbit = function->begin(), + bbie = function->end(); bbit != bbie; ++bbit) { + for (llvm::BasicBlock::iterator it = bbit->begin(), ie = bbit->end(); + it != ie; ++it) + registerMap[it] = rnum++; + } + numRegisters = rnum; + + unsigned i = 0; + for (llvm::Function::iterator bbit = function->begin(), + bbie = function->end(); bbit != bbie; ++bbit) { + for (llvm::BasicBlock::iterator it = bbit->begin(), ie = bbit->end(); + it != ie; ++it) { + KInstruction *ki; + + switch(it->getOpcode()) { + case Instruction::GetElementPtr: + ki = new KGEPInstruction(); break; + default: + ki = new KInstruction(); break; + } + + unsigned numOperands = it->getNumOperands(); + ki->inst = it; + ki->operands = new int[numOperands]; + ki->dest = registerMap[it]; + for (unsigned j=0; jgetOperand(j); + + if (Instruction *inst = dyn_cast(v)) { + ki->operands[j] = registerMap[inst]; + } else if (Argument *a = dyn_cast(v)) { + ki->operands[j] = a->getArgNo(); + } else if (isa(v) || isa(v)) { + ki->operands[j] = -1; + } else { + assert(isa(v)); + Constant *c = cast(v); + ki->operands[j] = -(km->getConstantID(c, ki) + 2); + } + } + + instructions[i++] = ki; + } + } +} + +KFunction::~KFunction() { + for (unsigned i=0; i