diff options
author | Daniel Dunbar <daniel@zuster.org> | 2009-05-21 04:36:41 +0000 |
---|---|---|
committer | Daniel Dunbar <daniel@zuster.org> | 2009-05-21 04:36:41 +0000 |
commit | 6f290d8f9e9d7faac295cb51fc96884a18f4ded4 (patch) | |
tree | 46e7d426abc0c9f06ac472ac6f7f9e661b5d78cb /lib/Module | |
parent | a55960edd4dcd7535526de8d2277642522aa0209 (diff) | |
download | klee-6f290d8f9e9d7faac295cb51fc96884a18f4ded4.tar.gz |
Initial KLEE checkin.
- Lots more tweaks, documentation, and web page content is needed, but this should compile & work on OS X & Linux. git-svn-id: https://llvm.org/svn/llvm-project/klee/trunk@72205 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Module')
-rw-r--r-- | lib/Module/Checks.cpp | 68 | ||||
-rw-r--r-- | lib/Module/InstructionInfoTable.cpp | 196 | ||||
-rw-r--r-- | lib/Module/IntrinsicCleaner.cpp | 119 | ||||
-rw-r--r-- | lib/Module/KInstruction.cpp | 19 | ||||
-rw-r--r-- | lib/Module/KModule.cpp | 506 | ||||
-rw-r--r-- | lib/Module/LowerSwitch.cpp | 134 | ||||
-rwxr-xr-x | lib/Module/Makefile | 16 | ||||
-rw-r--r-- | lib/Module/ModuleUtil.cpp | 101 | ||||
-rw-r--r-- | lib/Module/Optimize.cpp | 272 | ||||
-rw-r--r-- | lib/Module/Passes.h | 132 | ||||
-rw-r--r-- | lib/Module/PhiCleaner.cpp | 83 | ||||
-rw-r--r-- | lib/Module/RaiseAsm.cpp | 69 |
12 files changed, 1715 insertions, 0 deletions
diff --git a/lib/Module/Checks.cpp b/lib/Module/Checks.cpp new file mode 100644 index 00000000..ca4eeb44 --- /dev/null +++ b/lib/Module/Checks.cpp @@ -0,0 +1,68 @@ +//===-- Checks.cpp --------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Passes.h" + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/InstrTypes.h" +#include "llvm/Instruction.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Target/TargetData.h" + +using namespace llvm; +using namespace klee; + +char DivCheckPass::ID; + +bool DivCheckPass::runOnModule(Module &M) { + Function *divZeroCheckFunction = 0; + + bool moduleChanged = false; + + for (Module::iterator f = M.begin(), fe = M.end(); f != fe; ++f) { + for (Function::iterator b = f->begin(), be = f->end(); b != be; ++b) { + for (BasicBlock::iterator i = b->begin(), ie = b->end(); i != ie; ++i) { + if (BinaryOperator* binOp = dyn_cast<BinaryOperator>(i)) { + // find all [s|u][div|mod] instructions + Instruction::BinaryOps opcode = binOp->getOpcode(); + if (opcode == Instruction::SDiv || opcode == Instruction::UDiv || + opcode == Instruction::SRem || opcode == Instruction::URem) { + + CastInst *denominator = + CastInst::CreateIntegerCast(i->getOperand(1), + (Type*)Type::Int64Ty, + false, /* sign doesn't matter */ + "int_cast_to_i64", + i); + + // Lazily bind the function to avoid always importing it. + if (!divZeroCheckFunction) { + Constant *fc = M.getOrInsertFunction("klee_div_zero_check", + Type::VoidTy, + Type::Int64Ty, NULL); + divZeroCheckFunction = cast<Function>(fc); + } + + CallInst::Create(divZeroCheckFunction, denominator, "", &*i); + moduleChanged = true; + } + } + } + } + } + return moduleChanged; +} diff --git a/lib/Module/InstructionInfoTable.cpp b/lib/Module/InstructionInfoTable.cpp new file mode 100644 index 00000000..82874406 --- /dev/null +++ b/lib/Module/InstructionInfoTable.cpp @@ -0,0 +1,196 @@ +//===-- InstructionInfoTable.cpp ------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Internal/Module/InstructionInfoTable.h" + +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Linker.h" +#include "llvm/Module.h" +#include "llvm/Assembly/AsmAnnotationWriter.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/ValueTracking.h" + +#include <map> +#include <iostream> +#include <fstream> +#include <sstream> +#include <string> + +using namespace llvm; +using namespace klee; + +class InstructionToLineAnnotator : public llvm::AssemblyAnnotationWriter { +public: + void emitInstructionAnnot(const Instruction *i, llvm::raw_ostream &os) { + os << "%%%" << (uintptr_t) i; + } +}; + +static void buildInstructionToLineMap(Module *m, + std::map<const Instruction*, unsigned> &out) { + InstructionToLineAnnotator a; + std::ostringstream buffer; + m->print(buffer, &a); + std::string str = buffer.str(); + const char *s; + + unsigned line = 1; + for (s=str.c_str(); *s; s++) { + if (*s=='\n') { + line++; + if (s[1]=='%' && s[2]=='%' && s[3]=='%') { + s += 4; + char *end; + unsigned long long value = strtoull(s, &end, 10); + if (end!=s) { + out.insert(std::make_pair((const Instruction*) value, line)); + } + s = end; + } + } + } +} + +static std::string getDSPIPath(DbgStopPointInst *dspi) { + std::string dir, file; + bool res = GetConstantStringInfo(dspi->getDirectory(), dir); + assert(res && "GetConstantStringInfo failed"); + res = GetConstantStringInfo(dspi->getFileName(), file); + assert(res && "GetConstantStringInfo failed"); + if (dir.empty()) { + return file; + } else if (*dir.rbegin() == '/') { + return dir + file; + } else { + return dir + "/" + file; + } +} + +InstructionInfoTable::InstructionInfoTable(Module *m) + : dummyString(""), dummyInfo(0, dummyString, 0, 0) { + unsigned id = 0; + std::map<const Instruction*, unsigned> lineTable; + buildInstructionToLineMap(m, lineTable); + + for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); + fnIt != fn_ie; ++fnIt) { + const std::string *initialFile = &dummyString; + unsigned initialLine = 0; + + // It may be better to look for the closest stoppoint to the entry + // following the CFG, but it is not clear that it ever matters in + // practice. + for (inst_iterator it = inst_begin(fnIt), ie = inst_end(fnIt); + it != ie; ++it) { + if (DbgStopPointInst *dspi = dyn_cast<DbgStopPointInst>(&*it)) { + initialFile = internString(getDSPIPath(dspi)); + initialLine = dspi->getLine(); + break; + } + } + + typedef std::map<BasicBlock*, std::pair<const std::string*,unsigned> > + sourceinfo_ty; + sourceinfo_ty sourceInfo; + for (llvm::Function::iterator bbIt = fnIt->begin(), bbie = fnIt->end(); + bbIt != bbie; ++bbIt) { + std::pair<sourceinfo_ty::iterator, bool> + res = sourceInfo.insert(std::make_pair(bbIt, + std::make_pair(initialFile, + initialLine))); + if (!res.second) + continue; + + std::vector<BasicBlock*> worklist; + worklist.push_back(bbIt); + + do { + BasicBlock *bb = worklist.back(); + worklist.pop_back(); + + sourceinfo_ty::iterator si = sourceInfo.find(bb); + assert(si != sourceInfo.end()); + const std::string *file = si->second.first; + unsigned line = si->second.second; + + for (BasicBlock::iterator it = bb->begin(), ie = bb->end(); + it != ie; ++it) { + Instruction *instr = it; + unsigned assemblyLine = 0; + std::map<const Instruction*, unsigned>::const_iterator ltit = + lineTable.find(instr); + if (ltit!=lineTable.end()) + assemblyLine = ltit->second; + if (DbgStopPointInst *dspi = dyn_cast<DbgStopPointInst>(instr)) { + file = internString(getDSPIPath(dspi)); + line = dspi->getLine(); + } + infos.insert(std::make_pair(instr, + InstructionInfo(id++, + *file, + line, + assemblyLine))); + } + + for (succ_iterator it = succ_begin(bb), ie = succ_end(bb); + it != ie; ++it) { + if (sourceInfo.insert(std::make_pair(*it, + std::make_pair(file, line))).second) + worklist.push_back(*it); + } + } while (!worklist.empty()); + } + } +} + +InstructionInfoTable::~InstructionInfoTable() { + for (std::set<const std::string *, ltstr>::iterator + it = internedStrings.begin(), ie = internedStrings.end(); + it != ie; ++it) + delete *it; +} + +const std::string *InstructionInfoTable::internString(std::string s) { + std::set<const std::string *, ltstr>::iterator it = internedStrings.find(&s); + if (it==internedStrings.end()) { + std::string *interned = new std::string(s); + internedStrings.insert(interned); + return interned; + } else { + return *it; + } +} + +unsigned InstructionInfoTable::getMaxID() const { + return infos.size(); +} + +const InstructionInfo & +InstructionInfoTable::getInfo(const Instruction *inst) const { + std::map<const llvm::Instruction*, InstructionInfo>::const_iterator it = + infos.find(inst); + if (it==infos.end()) { + return dummyInfo; + } else { + return it->second; + } +} + +const InstructionInfo & +InstructionInfoTable::getFunctionInfo(const Function *f) const { + if (f->isDeclaration()) { + return dummyInfo; + } else { + return getInfo(f->begin()->begin()); + } +} diff --git a/lib/Module/IntrinsicCleaner.cpp b/lib/Module/IntrinsicCleaner.cpp new file mode 100644 index 00000000..e59b7ff6 --- /dev/null +++ b/lib/Module/IntrinsicCleaner.cpp @@ -0,0 +1,119 @@ +//===-- IntrinsicCleaner.cpp ----------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Passes.h" + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/InstrTypes.h" +#include "llvm/Instruction.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Type.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Target/TargetData.h" + +using namespace llvm; + +namespace klee { + +char IntrinsicCleanerPass::ID; + +bool IntrinsicCleanerPass::runOnModule(Module &M) { + bool dirty = false; + for (Module::iterator f = M.begin(), fe = M.end(); f != fe; ++f) + for (Function::iterator b = f->begin(), be = f->end(); b != be; ++b) + dirty |= runOnBasicBlock(*b); + return dirty; +} + +bool IntrinsicCleanerPass::runOnBasicBlock(BasicBlock &b) { + bool dirty = false; + + for (BasicBlock::iterator i = b.begin(), ie = b.end(); i != ie;) { + IntrinsicInst *ii = dyn_cast<IntrinsicInst>(&*i); + // increment now since LowerIntrinsic deletion makes iterator invalid. + ++i; + if(ii) { + switch (ii->getIntrinsicID()) { + case Intrinsic::vastart: + case Intrinsic::vaend: + break; + + // Lower vacopy so that object resolution etc is handled by + // normal instructions. FIXME: This is broken for non-x86_32. + case Intrinsic::vacopy: { // (dst, src) -> *((i8**) dst) = *((i8**) src) + Value *dst = ii->getOperand(1); + Value *src = ii->getOperand(2); + Type *i8pp = PointerType::getUnqual(PointerType::getUnqual(Type::Int8Ty)); + Value *castedDst = CastInst::CreatePointerCast(dst, i8pp, "vacopy.cast.dst", ii); + Value *castedSrc = CastInst::CreatePointerCast(src, i8pp, "vacopy.cast.src", ii); + Value *load = new LoadInst(castedSrc, "vacopy.read", ii); + new StoreInst(load, castedDst, false, ii); + ii->removeFromParent(); + delete ii; + break; + } + + case Intrinsic::dbg_stoppoint: { + // We can remove this stoppoint if the next instruction is + // sure to be another stoppoint. This is nice for cleanliness + // but also important for switch statements where it can allow + // the targets to be joined. + bool erase = false; + if (isa<DbgStopPointInst>(i) || + isa<UnreachableInst>(i)) { + erase = true; + } else if (isa<BranchInst>(i) || + isa<SwitchInst>(i)) { + BasicBlock *bb = i->getParent(); + erase = true; + for (succ_iterator it=succ_begin(bb), ie=succ_end(bb); + it!=ie; ++it) { + if (!isa<DbgStopPointInst>(it->getFirstNonPHI())) { + erase = false; + break; + } + } + } + + if (erase) { + ii->eraseFromParent(); + dirty = true; + } + break; + } + + case Intrinsic::dbg_region_start: + case Intrinsic::dbg_region_end: + case Intrinsic::dbg_func_start: + case Intrinsic::dbg_declare: + // Remove these regardless of lower intrinsics flag. This can + // be removed once IntrinsicLowering is fixed to not have bad + // caches. + ii->eraseFromParent(); + dirty = true; + break; + + default: + if (LowerIntrinsics) + IL->LowerIntrinsicCall(ii); + dirty = true; + break; + } + } + } + + return dirty; +} +} diff --git a/lib/Module/KInstruction.cpp b/lib/Module/KInstruction.cpp new file mode 100644 index 00000000..799620c6 --- /dev/null +++ b/lib/Module/KInstruction.cpp @@ -0,0 +1,19 @@ +//===-- KInstruction.cpp --------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Internal/Module/KInstruction.h" + +using namespace llvm; +using namespace klee; + +/***/ + +KInstruction::~KInstruction() { + delete[] operands; +} diff --git a/lib/Module/KModule.cpp b/lib/Module/KModule.cpp new file mode 100644 index 00000000..5d88fbda --- /dev/null +++ b/lib/Module/KModule.cpp @@ -0,0 +1,506 @@ +//===-- KModule.cpp -------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// FIXME: This does not belong here. +#include "../Core/Common.h" + +#include "klee/Internal/Module/KModule.h" + +#include "Passes.h" + +#include "klee/Interpreter.h" +#include "klee/Internal/Module/Cell.h" +#include "klee/Internal/Module/KInstruction.h" +#include "klee/Internal/Module/InstructionInfoTable.h" +#include "klee/Internal/Support/ModuleUtil.h" + +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/ValueSymbolTable.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/System/Path.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Transforms/Scalar.h" + +#include <sstream> + +using namespace llvm; +using namespace klee; + +namespace { + enum SwitchImplType { + eSwitchTypeSimple, + eSwitchTypeLLVM, + eSwitchTypeInternal + }; + + cl::list<std::string> + MergeAtExit("merge-at-exit"); + + cl::opt<bool> + NoTruncateSourceLines("no-truncate-source-lines", + cl::desc("Don't truncate long lines in the output source")); + + cl::opt<bool> + OutputSource("output-source", + cl::desc("Write the assembly for the final transformed source"), + cl::init(true)); + + cl::opt<bool> + OutputModule("output-module", + cl::desc("Write the bitcode for the final transformed module"), + cl::init(false)); + + cl::opt<SwitchImplType> + SwitchType("switch-type", cl::desc("Select the implementation of switch"), + cl::values(clEnumValN(eSwitchTypeSimple, "simple", + "lower to ordered branches"), + clEnumValN(eSwitchTypeLLVM, "llvm", + "lower using LLVM"), + clEnumValN(eSwitchTypeInternal, "internal", + "execute switch internally"), + clEnumValEnd), + cl::init(eSwitchTypeInternal)); + + cl::opt<bool> + DebugPrintEscapingFunctions("debug-print-escaping-functions", + cl::desc("Print functions whose address is taken.")); +} + +KModule::KModule(Module *_module) + : module(_module), + targetData(new TargetData(module)), + dbgStopPointFn(0), + kleeMergeFn(0), + infos(0), + constantTable(0) { +} + +KModule::~KModule() { + delete[] constantTable; + delete infos; + + for (std::vector<KFunction*>::iterator it = functions.begin(), + ie = functions.end(); it != ie; ++it) + delete *it; + + delete targetData; + delete module; +} + +/***/ + +namespace llvm { +extern void Optimize(Module*); +} + +// what a hack +static Function *getStubFunctionForCtorList(Module *m, + GlobalVariable *gv, + std::string name) { + assert(!gv->isDeclaration() && !gv->hasInternalLinkage() && + "do not support old LLVM style constructor/destructor lists"); + + std::vector<const Type*> nullary; + + Function *fn = Function::Create(FunctionType::get(Type::VoidTy, + nullary, false), + GlobalVariable::InternalLinkage, + name, + m); + BasicBlock *bb = BasicBlock::Create("entry", fn); + + // From lli: + // Should be an array of '{ int, void ()* }' structs. The first value is + // the init priority, which we ignore. + ConstantArray *arr = dyn_cast<ConstantArray>(gv->getInitializer()); + if (arr) { + for (unsigned i=0; i<arr->getNumOperands(); i++) { + ConstantStruct *cs = cast<ConstantStruct>(arr->getOperand(i)); + assert(cs->getNumOperands()==2 && "unexpected element in ctor initializer list"); + + Constant *fp = cs->getOperand(1); + if (!fp->isNullValue()) { + if (llvm::ConstantExpr *ce = dyn_cast<llvm::ConstantExpr>(fp)) + fp = ce->getOperand(0); + + if (Function *f = dyn_cast<Function>(fp)) { + CallInst::Create(f, "", bb); + } else { + assert(0 && "unable to get function pointer from ctor initializer list"); + } + } + } + } + + ReturnInst::Create(bb); + + return fn; +} + +static void injectStaticConstructorsAndDestructors(Module *m) { + GlobalVariable *ctors = m->getNamedGlobal("llvm.global_ctors"); + GlobalVariable *dtors = m->getNamedGlobal("llvm.global_dtors"); + + if (ctors || dtors) { + Function *mainFn = m->getFunction("main"); + assert(mainFn && "unable to find main function"); + + if (ctors) + CallInst::Create(getStubFunctionForCtorList(m, ctors, "klee.ctor_stub"), + "", mainFn->begin()->begin()); + if (dtors) { + Function *dtorStub = getStubFunctionForCtorList(m, dtors, "klee.dtor_stub"); + for (Function::iterator it = mainFn->begin(), ie = mainFn->end(); + it != ie; ++it) { + if (isa<ReturnInst>(it->getTerminator())) + CallInst::Create(dtorStub, "", it->getTerminator()); + } + } + } +} + +static void forceImport(Module *m, const char *name, const Type *retType, ...) { + // If module lacks an externally visible symbol for the name then we + // need to create one. We have to look in the symbol table because + // we want to check everything (global variables, functions, and + // aliases). + + Value *v = m->getValueSymbolTable().lookup(name); + GlobalValue *gv = dyn_cast_or_null<GlobalValue>(v); + + if (!gv || gv->hasInternalLinkage()) { + va_list ap; + + va_start(ap, retType); + std::vector<const Type *> argTypes; + while (const Type *t = va_arg(ap, const Type*)) + argTypes.push_back(t); + va_end(ap); + + m->getOrInsertFunction(name, FunctionType::get(retType, argTypes, false)); + } +} + +void KModule::prepare(const Interpreter::ModuleOptions &opts, + InterpreterHandler *ih) { + if (!MergeAtExit.empty()) { + Function *mergeFn = module->getFunction("klee_merge"); + if (!mergeFn) { + const llvm::FunctionType *Ty = + FunctionType::get(Type::VoidTy, std::vector<const Type*>(), false); + mergeFn = Function::Create(Ty, GlobalVariable::ExternalLinkage, + "klee_merge", + module); + } + + for (cl::list<std::string>::iterator it = MergeAtExit.begin(), + ie = MergeAtExit.end(); it != ie; ++it) { + std::string &name = *it; + Function *f = module->getFunction(name); + if (!f) { + klee_error("cannot insert merge-at-exit for: %s (cannot find)", + name.c_str()); + } else if (f->isDeclaration()) { + klee_error("cannot insert merge-at-exit for: %s (external)", + name.c_str()); + } + + BasicBlock *exit = BasicBlock::Create("exit", f); + PHINode *result = 0; + if (f->getReturnType() != Type::VoidTy) + result = PHINode::Create(f->getReturnType(), "retval", exit); + CallInst::Create(mergeFn, "", exit); + ReturnInst::Create(result, exit); + + llvm::cerr << "KLEE: adding klee_merge at exit of: " << name << "\n"; + for (llvm::Function::iterator bbit = f->begin(), bbie = f->end(); + bbit != bbie; ++bbit) { + if (&*bbit != exit) { + Instruction *i = bbit->getTerminator(); + if (i->getOpcode()==Instruction::Ret) { + if (result) { + result->addIncoming(i->getOperand(0), bbit); + } + i->eraseFromParent(); + BranchInst::Create(exit, bbit); + } + } + } + } + } + + // Inject checks prior to optimization... we also perform the + // invariant transformations that we will end up doing later so that + // optimize is seeing what is as close as possible to the final + // module. + PassManager pm; + pm.add(new RaiseAsmPass()); + if (opts.CheckDivZero) pm.add(new DivCheckPass()); + // FIXME: This false here is to work around a bug in + // IntrinsicLowering which caches values which may eventually be + // deleted (via RAUW). This can be removed once LLVM fixes this + // issue. + pm.add(new IntrinsicCleanerPass(*targetData, false)); + pm.run(*module); + + if (opts.Optimize) + Optimize(module); + + // Force importing functions required by intrinsic lowering. Kind of + // unfortunate clutter when we don't need them but we won't know + // that until after all linking and intrinsic lowering is + // done. After linking and passes we just try to manually trim these + // by name. We only add them if such a function doesn't exist to + // avoid creating stale uses. + + forceImport(module, "memcpy", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + targetData->getIntPtrType(), (Type*) 0); + forceImport(module, "memmove", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + targetData->getIntPtrType(), (Type*) 0); + forceImport(module, "memset", PointerType::getUnqual(Type::Int8Ty), + PointerType::getUnqual(Type::Int8Ty), + Type::Int32Ty, + targetData->getIntPtrType(), (Type*) 0); + + // FIXME: Missing force import for various math functions. + + // FIXME: Find a way that we can test programs without requiring + // this to be linked in, it makes low level debugging much more + // annoying. + llvm::sys::Path path(opts.LibraryDir); + path.appendComponent("libintrinsic.bca"); + module = linkWithLibrary(module, path.c_str()); + + // Needs to happen after linking (since ctors/dtors can be modified) + // and optimization (since global optimization can rewrite lists). + injectStaticConstructorsAndDestructors(module); + + // Finally, run the passes that maintain invariants we expect during + // interpretation. We run the intrinsic cleaner just in case we + // linked in something with intrinsics but any external calls are + // going to be unresolved. We really need to handle the intrinsics + // directly I think? + PassManager pm3; + pm3.add(createCFGSimplificationPass()); + switch(SwitchType) { + case eSwitchTypeInternal: break; + case eSwitchTypeSimple: pm3.add(new LowerSwitchPass()); break; + case eSwitchTypeLLVM: pm3.add(createLowerSwitchPass()); break; + default: klee_error("invalid --switch-type"); + } + pm3.add(new IntrinsicCleanerPass(*targetData)); + pm3.add(new PhiCleanerPass()); + pm3.run(*module); + + // For cleanliness see if we can discard any of the functions we + // forced to import. + Function *f; + f = module->getFunction("memcpy"); + if (f && f->use_empty()) f->eraseFromParent(); + f = module->getFunction("memmove"); + if (f && f->use_empty()) f->eraseFromParent(); + f = module->getFunction("memset"); + if (f && f->use_empty()) f->eraseFromParent(); + + + // Write out the .ll assembly file. We truncate long lines to work + // around a kcachegrind parsing bug (it puts them on new lines), so + // that source browsing works. + if (OutputSource) { + std::ostream *os = ih->openOutputFile("assembly.ll"); + assert(os && os->good() && "unable to open source output"); + + // We have an option for this in case the user wants a .ll they + // can compile. + if (NoTruncateSourceLines) { + *os << *module; + } else { + bool truncated = false; + std::stringstream buffer; + buffer << *module; + std::string string = buffer.str(); + const char *position = string.c_str(); + + for (;;) { + const char *end = index(position, '\n'); + if (!end) { + *os << position; + break; + } else { + unsigned count = (end - position) + 1; + if (count<255) { + os->write(position, count); + } else { + os->write(position, 254); + *os << "\n"; + truncated = true; + } + position = end+1; + } + } + } + + delete os; + } + + if (OutputModule) { + std::ostream *f = ih->openOutputFile("final.bc"); + WriteBitcodeToFile(module, *f); + delete f; + } + + dbgStopPointFn = module->getFunction("llvm.dbg.stoppoint"); + kleeMergeFn = module->getFunction("klee_merge"); + + /* Build shadow structures */ + + infos = new InstructionInfoTable(module); + + for (Module::iterator it = module->begin(), ie = module->end(); + it != ie; ++it) { + if (it->isDeclaration()) + continue; + + KFunction *kf = new KFunction(it, this); + + for (unsigned i=0; i<kf->numInstructions; ++i) { + KInstruction *ki = kf->instructions[i]; + ki->info = &infos->getInfo(ki->inst); + } + + functions.push_back(kf); + functionMap.insert(std::make_pair(it, kf)); + } + + /* Compute various interesting properties */ + + for (std::vector<KFunction*>::iterator it = functions.begin(), + ie = functions.end(); it != ie; ++it) { + KFunction *kf = *it; + if (functionEscapes(kf->function)) + escapingFunctions.insert(kf->function); + } + + if (DebugPrintEscapingFunctions && !escapingFunctions.empty()) { + llvm::cerr << "KLEE: escaping functions: ["; + for (std::set<Function*>::iterator it = escapingFunctions.begin(), + ie = escapingFunctions.end(); it != ie; ++it) { + llvm::cerr << (*it)->getName() << ", "; + } + llvm::cerr << "]\n"; + } +} + +KConstant* KModule::getKConstant(Constant *c) { + std::map<llvm::Constant*, KConstant*>::iterator it = constantMap.find(c); + if (it != constantMap.end()) + return it->second; + return NULL; +} + +unsigned KModule::getConstantID(Constant *c, KInstruction* ki) { + KConstant *kc = getKConstant(c); + if (kc) + return kc->id; + + unsigned id = constants.size(); + kc = new KConstant(c, id, ki); + constantMap.insert(std::make_pair(c, kc)); + constants.push_back(c); + return id; +} + +/***/ + +KConstant::KConstant(llvm::Constant* _ct, unsigned _id, KInstruction* _ki) { + ct = _ct; + id = _id; + ki = _ki; +} + +/***/ + +KFunction::KFunction(llvm::Function *_function, + KModule *km) + : function(_function), + numArgs(function->arg_size()), + numInstructions(0), + trackCoverage(true) { + for (llvm::Function::iterator bbit = function->begin(), + bbie = function->end(); bbit != bbie; ++bbit) { + BasicBlock *bb = bbit; + basicBlockEntry[bb] = numInstructions; + numInstructions += bb->size(); + } + + instructions = new KInstruction*[numInstructions]; + + std::map<Instruction*, unsigned> registerMap; + + // The first arg_size() registers are reserved for formals. + unsigned rnum = numArgs; + for (llvm::Function::iterator bbit = function->begin(), + bbie = function->end(); bbit != bbie; ++bbit) { + for (llvm::BasicBlock::iterator it = bbit->begin(), ie = bbit->end(); + it != ie; ++it) + registerMap[it] = rnum++; + } + numRegisters = rnum; + + unsigned i = 0; + for (llvm::Function::iterator bbit = function->begin(), + bbie = function->end(); bbit != bbie; ++bbit) { + for (llvm::BasicBlock::iterator it = bbit->begin(), ie = bbit->end(); + it != ie; ++it) { + KInstruction *ki; + + switch(it->getOpcode()) { + case Instruction::GetElementPtr: + ki = new KGEPInstruction(); break; + default: + ki = new KInstruction(); break; + } + + unsigned numOperands = it->getNumOperands(); + ki->inst = it; + ki->operands = new int[numOperands]; + ki->dest = registerMap[it]; + for (unsigned j=0; j<numOperands; j++) { + Value *v = it->getOperand(j); + + if (Instruction *inst = dyn_cast<Instruction>(v)) { + ki->operands[j] = registerMap[inst]; + } else if (Argument *a = dyn_cast<Argument>(v)) { + ki->operands[j] = a->getArgNo(); + } else if (isa<BasicBlock>(v) || isa<InlineAsm>(v)) { + ki->operands[j] = -1; + } else { + assert(isa<Constant>(v)); + Constant *c = cast<Constant>(v); + ki->operands[j] = -(km->getConstantID(c, ki) + 2); + } + } + + instructions[i++] = ki; + } + } +} + +KFunction::~KFunction() { + for (unsigned i=0; i<numInstructions; ++i) + delete instructions[i]; + delete[] instructions; +} diff --git a/lib/Module/LowerSwitch.cpp b/lib/Module/LowerSwitch.cpp new file mode 100644 index 00000000..a1b887f3 --- /dev/null +++ b/lib/Module/LowerSwitch.cpp @@ -0,0 +1,134 @@ +//===-- LowerSwitch.cpp - Eliminate Switch instructions -------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Derived from LowerSwitch.cpp in LLVM, heavily modified by piotrek +// to get rid of the binary search transform, as it was creating +// multiple paths through the program (i.e., extra paths that didn't +// exist in the original program). +// +//===----------------------------------------------------------------------===// + +#include "Passes.h" +#include <algorithm> + +using namespace llvm; + +namespace klee { + +char LowerSwitchPass::ID = 0; + +// The comparison function for sorting the switch case values in the vector. +struct SwitchCaseCmp { + bool operator () (const LowerSwitchPass::SwitchCase& C1, + const LowerSwitchPass::SwitchCase& C2) { + + const ConstantInt* CI1 = cast<const ConstantInt>(C1.value); + const ConstantInt* CI2 = cast<const ConstantInt>(C2.value); + return CI1->getValue().slt(CI2->getValue()); + } +}; + +bool LowerSwitchPass::runOnFunction(Function &F) { + bool changed = false; + + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *cur = I++; // Advance over block so we don't traverse new blocks + + if (SwitchInst *SI = dyn_cast<SwitchInst>(cur->getTerminator())) { + changed = true; + processSwitchInst(SI); + } + } + + return changed; +} + +// switchConvert - Convert the switch statement into a linear scan +// through all the case values +void LowerSwitchPass::switchConvert(CaseItr begin, CaseItr end, + Value* value, BasicBlock* origBlock, + BasicBlock* defaultBlock) +{ + BasicBlock *curHead = defaultBlock; + Function *F = origBlock->getParent(); + + // iterate through all the cases, creating a new BasicBlock for each + for (CaseItr it = begin; it < end; ++it) { + BasicBlock *newBlock = BasicBlock::Create("NodeBlock"); + Function::iterator FI = origBlock; + F->getBasicBlockList().insert(++FI, newBlock); + + ICmpInst *cmpInst = new ICmpInst(ICmpInst::ICMP_EQ, + value, + it->value, + "Case Comparison"); + + newBlock->getInstList().push_back(cmpInst); + BranchInst::Create(it->block, curHead, cmpInst, newBlock); + + // If there were any PHI nodes in this successor, rewrite one entry + // from origBlock to come from newBlock. + for (BasicBlock::iterator bi = it->block->begin(); isa<PHINode>(bi); ++bi) { + PHINode* PN = cast<PHINode>(bi); + + int blockIndex = PN->getBasicBlockIndex(origBlock); + assert(blockIndex != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)blockIndex, newBlock); + } + + curHead = newBlock; + } + + // Branch to our shiny new if-then stuff... + BranchInst::Create(curHead, origBlock); +} + +// processSwitchInst - Replace the specified switch instruction with a sequence +// of chained if-then instructions. +// +void LowerSwitchPass::processSwitchInst(SwitchInst *SI) { + BasicBlock *origBlock = SI->getParent(); + BasicBlock *defaultBlock = SI->getDefaultDest(); + Function *F = origBlock->getParent(); + Value *switchValue = SI->getOperand(0); + + // Create a new, empty default block so that the new hierarchy of + // if-then statements go to this and the PHI nodes are happy. + BasicBlock* newDefault = BasicBlock::Create("newDefault"); + + F->getBasicBlockList().insert(defaultBlock, newDefault); + BranchInst::Create(defaultBlock, newDefault); + + // If there is an entry in any PHI nodes for the default edge, make sure + // to update them as well. + for (BasicBlock::iterator I = defaultBlock->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + int BlockIdx = PN->getBasicBlockIndex(origBlock); + assert(BlockIdx != -1 && "Switch didn't go to this successor??"); + PN->setIncomingBlock((unsigned)BlockIdx, newDefault); + } + + CaseVector cases; + for (unsigned i = 1; i < SI->getNumSuccessors(); ++i) + cases.push_back(SwitchCase(SI->getSuccessorValue(i), + SI->getSuccessor(i))); + + // reverse cases, as switchConvert constructs a chain of + // basic blocks by appending to the front. if we reverse, + // the if comparisons will happen in the same order + // as the cases appear in the switch + std::reverse(cases.begin(), cases.end()); + + switchConvert(cases.begin(), cases.end(), switchValue, origBlock, newDefault); + + // We are now done with the switch instruction, so delete it + origBlock->getInstList().erase(SI); +} + +} diff --git a/lib/Module/Makefile b/lib/Module/Makefile new file mode 100755 index 00000000..bfd7c469 --- /dev/null +++ b/lib/Module/Makefile @@ -0,0 +1,16 @@ +#===-- lib/Module/Makefile ---------------------------------*- Makefile -*--===# +# +# The KLEE Symbolic Virtual Machine +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +LEVEL=../.. + +LIBRARYNAME=kleeModule +DONT_BUILD_RELINKED=1 +BUILD_ARCHIVE=1 + +include $(LEVEL)/Makefile.common diff --git a/lib/Module/ModuleUtil.cpp b/lib/Module/ModuleUtil.cpp new file mode 100644 index 00000000..d86b9d48 --- /dev/null +++ b/lib/Module/ModuleUtil.cpp @@ -0,0 +1,101 @@ +//===-- ModuleUtil.cpp ----------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "klee/Internal/Support/ModuleUtil.h" + +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Linker.h" +#include "llvm/Module.h" +#include "llvm/Assembly/AsmAnnotationWriter.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Analysis/ValueTracking.h" + +#include <map> +#include <iostream> +#include <fstream> +#include <sstream> +#include <string> + +using namespace llvm; +using namespace klee; + +Module *klee::linkWithLibrary(Module *module, + const std::string &libraryName) { + try { + Linker linker("klee", module, false); + + llvm::sys::Path libraryPath(libraryName); + bool native = false; + + if (linker.LinkInFile(libraryPath, native)) { + assert(0 && "linking in library failed!"); + } + + return linker.releaseModule(); + } catch (...) { + assert(0 && "error during linking"); + } +} + +Function *klee::getDirectCallTarget(const Instruction *i) { + assert(isa<CallInst>(i) || isa<InvokeInst>(i)); + + Value *v = i->getOperand(0); + if (Function *f = dyn_cast<Function>(v)) { + return f; + } else if (llvm::ConstantExpr *ce = dyn_cast<llvm::ConstantExpr>(v)) { + if (ce->getOpcode()==Instruction::BitCast) + if (Function *f = dyn_cast<Function>(ce->getOperand(0))) + return f; + + // NOTE: This assert may fire, it isn't necessarily a problem and + // can be disabled, I just wanted to know when and if it happened. + assert(0 && "FIXME: Unresolved direct target for a constant expression."); + } + + return 0; +} + +static bool valueIsOnlyCalled(const Value *v) { + for (Value::use_const_iterator it = v->use_begin(), ie = v->use_end(); + it != ie; ++it) { + if (const Instruction *instr = dyn_cast<Instruction>(*it)) { + if (instr->getOpcode()==0) continue; // XXX function numbering inst + if (!isa<CallInst>(instr) && !isa<InvokeInst>(instr)) return false; + + // Make sure that the value is only the target of this call and + // not an argument. + for (unsigned i=1,e=instr->getNumOperands(); i!=e; ++i) + if (instr->getOperand(i)==v) + return false; + } else if (const llvm::ConstantExpr *ce = + dyn_cast<llvm::ConstantExpr>(*it)) { + if (ce->getOpcode()==Instruction::BitCast) + if (valueIsOnlyCalled(ce)) + continue; + return false; + } else if (const GlobalAlias *ga = dyn_cast<GlobalAlias>(*it)) { + // XXX what about v is bitcast of aliasee? + if (v==ga->getAliasee() && !valueIsOnlyCalled(ga)) + return false; + } else { + return false; + } + } + + return true; +} + +bool klee::functionEscapes(const Function *f) { + return !valueIsOnlyCalled(f); +} diff --git a/lib/Module/Optimize.cpp b/lib/Module/Optimize.cpp new file mode 100644 index 00000000..83e67292 --- /dev/null +++ b/lib/Module/Optimize.cpp @@ -0,0 +1,272 @@ +// FIXME: This file is a bastard child of opt.cpp and llvm-ld's +// Optimize.cpp. This stuff should live in common code. + + +//===- Optimize.cpp - Optimize a complete program -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements all optimization of the linked module for llvm-ld. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/System/DynamicLibrary.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/PassNameParser.h" +#include "llvm/Support/PluginLoader.h" +#include <iostream> +using namespace llvm; + +#if 0 +// Pass Name Options as generated by the PassNameParser +static cl::list<const PassInfo*, bool, PassNameParser> + OptimizationList(cl::desc("Optimizations available:")); +#endif + +// Don't verify at the end +static cl::opt<bool> DontVerify("disable-verify", cl::ReallyHidden); + +static cl::opt<bool> DisableInline("disable-inlining", + cl::desc("Do not run the inliner pass")); + +static cl::opt<bool> +DisableOptimizations("disable-opt", + cl::desc("Do not run any optimization passes")); + +static cl::opt<bool> DisableInternalize("disable-internalize", + cl::desc("Do not mark all symbols as internal")); + +static cl::opt<bool> VerifyEach("verify-each", + cl::desc("Verify intermediate results of all passes")); + +static cl::alias ExportDynamic("export-dynamic", + cl::aliasopt(DisableInternalize), + cl::desc("Alias for -disable-internalize")); + +static cl::opt<bool> Strip("strip-all", + cl::desc("Strip all symbol info from executable")); + +static cl::alias A0("s", cl::desc("Alias for --strip-all"), + cl::aliasopt(Strip)); + +static cl::opt<bool> StripDebug("strip-debug", + cl::desc("Strip debugger symbol info from executable")); + +static cl::alias A1("S", cl::desc("Alias for --strip-debug"), + cl::aliasopt(StripDebug)); + +// A utility function that adds a pass to the pass manager but will also add +// a verifier pass after if we're supposed to verify. +static inline void addPass(PassManager &PM, Pass *P) { + // Add the pass to the pass manager... + PM.add(P); + + // If we are verifying all of the intermediate steps, add the verifier... + if (VerifyEach) + PM.add(createVerifierPass()); +} + +namespace llvm { + + +static void AddStandardCompilePasses(PassManager &PM) { + PM.add(createVerifierPass()); // Verify that input is correct + + addPass(PM, createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp + + // If the -strip-debug command line option was specified, do it. + if (StripDebug) + addPass(PM, createStripSymbolsPass(true)); + + if (DisableOptimizations) return; + + addPass(PM, createRaiseAllocationsPass()); // call %malloc -> malloc inst + addPass(PM, createCFGSimplificationPass()); // Clean up disgusting code + addPass(PM, createPromoteMemoryToRegisterPass());// Kill useless allocas + addPass(PM, createGlobalOptimizerPass()); // Optimize out global vars + addPass(PM, createGlobalDCEPass()); // Remove unused fns and globs + addPass(PM, createIPConstantPropagationPass());// IP Constant Propagation + addPass(PM, createDeadArgEliminationPass()); // Dead argument elimination + addPass(PM, createInstructionCombiningPass()); // Clean up after IPCP & DAE + addPass(PM, createCFGSimplificationPass()); // Clean up after IPCP & DAE + + addPass(PM, createPruneEHPass()); // Remove dead EH info + addPass(PM, createFunctionAttrsPass()); // Deduce function attrs + + if (!DisableInline) + addPass(PM, createFunctionInliningPass()); // Inline small functions + addPass(PM, createArgumentPromotionPass()); // Scalarize uninlined fn args + + addPass(PM, createSimplifyLibCallsPass()); // Library Call Optimizations + addPass(PM, createInstructionCombiningPass()); // Cleanup for scalarrepl. + addPass(PM, createJumpThreadingPass()); // Thread jumps. + addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs + addPass(PM, createScalarReplAggregatesPass()); // Break up aggregate allocas + addPass(PM, createInstructionCombiningPass()); // Combine silly seq's + addPass(PM, createCondPropagationPass()); // Propagate conditionals + + addPass(PM, createTailCallEliminationPass()); // Eliminate tail calls + addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs + addPass(PM, createReassociatePass()); // Reassociate expressions + addPass(PM, createLoopRotatePass()); + addPass(PM, createLICMPass()); // Hoist loop invariants + addPass(PM, createLoopUnswitchPass()); // Unswitch loops. + addPass(PM, createLoopIndexSplitPass()); // Index split loops. + // FIXME : Removing instcombine causes nestedloop regression. + addPass(PM, createInstructionCombiningPass()); + addPass(PM, createIndVarSimplifyPass()); // Canonicalize indvars + addPass(PM, createLoopDeletionPass()); // Delete dead loops + addPass(PM, createLoopUnrollPass()); // Unroll small loops + addPass(PM, createInstructionCombiningPass()); // Clean up after the unroller + addPass(PM, createGVNPass()); // Remove redundancies + addPass(PM, createMemCpyOptPass()); // Remove memcpy / form memset + addPass(PM, createSCCPPass()); // Constant prop with SCCP + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + addPass(PM, createInstructionCombiningPass()); + addPass(PM, createCondPropagationPass()); // Propagate conditionals + + addPass(PM, createDeadStoreEliminationPass()); // Delete dead stores + addPass(PM, createAggressiveDCEPass()); // Delete dead instructions + addPass(PM, createCFGSimplificationPass()); // Merge & remove BBs + addPass(PM, createStripDeadPrototypesPass()); // Get rid of dead prototypes + addPass(PM, createDeadTypeEliminationPass()); // Eliminate dead types + addPass(PM, createConstantMergePass()); // Merge dup global constants +} + +/// Optimize - Perform link time optimizations. This will run the scalar +/// optimizations, any loaded plugin-optimization modules, and then the +/// inter-procedural optimizations if applicable. +void Optimize(Module* M) { + + // Instantiate the pass manager to organize the passes. + PassManager Passes; + + // If we're verifying, start off with a verification pass. + if (VerifyEach) + Passes.add(createVerifierPass()); + + // Add an appropriate TargetData instance for this module... + addPass(Passes, new TargetData(M)); + + // DWD - Run the opt standard pass list as well. + AddStandardCompilePasses(Passes); + + if (!DisableOptimizations) { + // Now that composite has been compiled, scan through the module, looking + // for a main function. If main is defined, mark all other functions + // internal. + if (!DisableInternalize) + addPass(Passes, createInternalizePass(true)); + + // Propagate constants at call sites into the functions they call. This + // opens opportunities for globalopt (and inlining) by substituting function + // pointers passed as arguments to direct uses of functions. + addPass(Passes, createIPSCCPPass()); + + // Now that we internalized some globals, see if we can hack on them! + addPass(Passes, createGlobalOptimizerPass()); + + // Linking modules together can lead to duplicated global constants, only + // keep one copy of each constant... + addPass(Passes, createConstantMergePass()); + + // Remove unused arguments from functions... + addPass(Passes, createDeadArgEliminationPass()); + + // Reduce the code after globalopt and ipsccp. Both can open up significant + // simplification opportunities, and both can propagate functions through + // function pointers. When this happens, we often have to resolve varargs + // calls, etc, so let instcombine do this. + addPass(Passes, createInstructionCombiningPass()); + + if (!DisableInline) + addPass(Passes, createFunctionInliningPass()); // Inline small functions + + addPass(Passes, createPruneEHPass()); // Remove dead EH info + addPass(Passes, createGlobalOptimizerPass()); // Optimize globals again. + addPass(Passes, createGlobalDCEPass()); // Remove dead functions + + // If we didn't decide to inline a function, check to see if we can + // transform it to pass arguments by value instead of by reference. + addPass(Passes, createArgumentPromotionPass()); + + // The IPO passes may leave cruft around. Clean up after them. + addPass(Passes, createInstructionCombiningPass()); + addPass(Passes, createJumpThreadingPass()); // Thread jumps. + addPass(Passes, createScalarReplAggregatesPass()); // Break up allocas + + // Run a few AA driven optimizations here and now, to cleanup the code. + addPass(Passes, createFunctionAttrsPass()); // Add nocapture + addPass(Passes, createGlobalsModRefPass()); // IP alias analysis + + addPass(Passes, createLICMPass()); // Hoist loop invariants + addPass(Passes, createGVNPass()); // Remove redundancies + addPass(Passes, createMemCpyOptPass()); // Remove dead memcpy's + addPass(Passes, createDeadStoreEliminationPass()); // Nuke dead stores + + // Cleanup and simplify the code after the scalar optimizations. + addPass(Passes, createInstructionCombiningPass()); + + addPass(Passes, createJumpThreadingPass()); // Thread jumps. + addPass(Passes, createPromoteMemoryToRegisterPass()); // Cleanup jumpthread. + + // Delete basic blocks, which optimization passes may have killed... + addPass(Passes, createCFGSimplificationPass()); + + // Now that we have optimized the program, discard unreachable functions... + addPass(Passes, createGlobalDCEPass()); + } + + // If the -s or -S command line options were specified, strip the symbols out + // of the resulting program to make it smaller. -s and -S are GNU ld options + // that we are supporting; they alias -strip-all and -strip-debug. + if (Strip || StripDebug) + addPass(Passes, createStripSymbolsPass(StripDebug && !Strip)); + +#if 0 + // Create a new optimization pass for each one specified on the command line + std::auto_ptr<TargetMachine> target; + for (unsigned i = 0; i < OptimizationList.size(); ++i) { + const PassInfo *Opt = OptimizationList[i]; + if (Opt->getNormalCtor()) + addPass(Passes, Opt->getNormalCtor()()); + else + std::cerr << "llvm-ld: cannot create pass: " << Opt->getPassName() + << "\n"; + } +#endif + + // The user's passes may leave cruft around. Clean up after them them but + // only if we haven't got DisableOptimizations set + if (!DisableOptimizations) { + addPass(Passes, createInstructionCombiningPass()); + addPass(Passes, createCFGSimplificationPass()); + addPass(Passes, createAggressiveDCEPass()); + addPass(Passes, createGlobalDCEPass()); + } + + // Make sure everything is still good. + if (!DontVerify) + Passes.add(createVerifierPass()); + + // Run our queue of passes all at once now, efficiently. + Passes.run(*M); +} + +} diff --git a/lib/Module/Passes.h b/lib/Module/Passes.h new file mode 100644 index 00000000..23205f75 --- /dev/null +++ b/lib/Module/Passes.h @@ -0,0 +1,132 @@ +//===-- Passes.h ------------------------------------------------*- C++ -*-===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef KLEE_PASSES_H +#define KLEE_PASSES_H + +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/CodeGen/IntrinsicLowering.h" + +namespace llvm { + class Function; + class Instruction; + class Module; + class TargetData; + class Type; +} + +namespace klee { + + /// RaiseAsmPass - This pass raises some common occurences of inline + /// asm which are used by glibc into normal LLVM IR. +class RaiseAsmPass : public llvm::ModulePass { + static char ID; + + llvm::Function *getIntrinsic(llvm::Module &M, + unsigned IID, + const llvm::Type **Tys, + unsigned NumTys); + llvm::Function *getIntrinsic(llvm::Module &M, + unsigned IID, + const llvm::Type *Ty0) { + return getIntrinsic(M, IID, &Ty0, 1); + } + + bool runOnInstruction(llvm::Module &M, llvm::Instruction *I); + +public: + RaiseAsmPass() : llvm::ModulePass((intptr_t) &ID) {} + + virtual bool runOnModule(llvm::Module &M); +}; + + // This is a module pass because it can add and delete module + // variables (via intrinsic lowering). +class IntrinsicCleanerPass : public llvm::ModulePass { + static char ID; + llvm::IntrinsicLowering *IL; + bool LowerIntrinsics; + + bool runOnBasicBlock(llvm::BasicBlock &b); +public: + IntrinsicCleanerPass(const llvm::TargetData &TD, + bool LI=true) + : llvm::ModulePass((intptr_t) &ID), + IL(new llvm::IntrinsicLowering(TD)), + LowerIntrinsics(LI) {} + ~IntrinsicCleanerPass() { delete IL; } + + virtual bool runOnModule(llvm::Module &M); +}; + + // performs two transformations which make interpretation + // easier and faster. + // + // 1) Ensure that all the PHI nodes in a basic block have + // the incoming block list in the same order. Thus the + // incoming block index only needs to be computed once + // for each transfer. + // + // 2) Ensure that no PHI node result is used as an argument to + // a subsequent PHI node in the same basic block. This allows + // the transfer to execute the instructions in order instead + // of in two passes. +class PhiCleanerPass : public llvm::FunctionPass { + static char ID; + +public: + PhiCleanerPass() : llvm::FunctionPass((intptr_t) &ID) {} + + virtual bool runOnFunction(llvm::Function &f); +}; + +class DivCheckPass : public llvm::ModulePass { + static char ID; +public: + DivCheckPass(): ModulePass((intptr_t) &ID) {} + virtual bool runOnModule(llvm::Module &M); +}; + +/// LowerSwitchPass - Replace all SwitchInst instructions with chained branch +/// instructions. Note that this cannot be a BasicBlock pass because it +/// modifies the CFG! +class LowerSwitchPass : public llvm::FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + LowerSwitchPass() : FunctionPass((intptr_t) &ID) {} + + virtual bool runOnFunction(llvm::Function &F); + + struct SwitchCase { + llvm ::Constant *value; + llvm::BasicBlock *block; + + SwitchCase() : value(0), block(0) { } + SwitchCase(llvm::Constant *v, llvm::BasicBlock *b) : + value(v), block(b) { } + }; + + typedef std::vector<SwitchCase> CaseVector; + typedef std::vector<SwitchCase>::iterator CaseItr; + +private: + void processSwitchInst(llvm::SwitchInst *SI); + void switchConvert(CaseItr begin, + CaseItr end, + llvm::Value *value, + llvm::BasicBlock *origBlock, + llvm::BasicBlock *defaultBlock); +}; + +} + +#endif diff --git a/lib/Module/PhiCleaner.cpp b/lib/Module/PhiCleaner.cpp new file mode 100644 index 00000000..3d8d7867 --- /dev/null +++ b/lib/Module/PhiCleaner.cpp @@ -0,0 +1,83 @@ +//===-- PhiCleaner.cpp ----------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Passes.h" + +#include <set> + +using namespace llvm; + +char klee::PhiCleanerPass::ID = 0; + +bool klee::PhiCleanerPass::runOnFunction(Function &f) { + bool changed = false; + + for (Function::iterator b = f.begin(), be = f.end(); b != be; ++b) { + BasicBlock::iterator it = b->begin(); + + if (it->getOpcode() == Instruction::PHI) { + PHINode *reference = cast<PHINode>(it); + + std::set<Value*> phis; + phis.insert(reference); + + unsigned numBlocks = reference->getNumIncomingValues(); + for (++it; isa<PHINode>(*it); ++it) { + PHINode *pi = cast<PHINode>(it); + + assert(numBlocks == pi->getNumIncomingValues()); + + // see if it is out of order + unsigned i; + for (i=0; i<numBlocks; i++) + if (pi->getIncomingBlock(i) != reference->getIncomingBlock(i)) + break; + + if (i!=numBlocks) { + std::vector<Value*> values; + values.reserve(numBlocks); + for (unsigned i=0; i<numBlocks; i++) + values[i] = pi->getIncomingValueForBlock(reference->getIncomingBlock(i)); + for (unsigned i=0; i<numBlocks; i++) { + pi->setIncomingBlock(i, reference->getIncomingBlock(i)); + pi->setIncomingValue(i, values[i]); + } + changed = true; + } + + // see if it uses any previously defined phi nodes + for (i=0; i<numBlocks; i++) { + Value *value = pi->getIncomingValue(i); + + if (phis.find(value) != phis.end()) { + // fix by making a "move" at the end of the incoming block + // to a new temporary, which is thus known not to be a phi + // result. we could be somewhat more efficient about this + // by sharing temps and by reordering phi instructions so + // this isn't completely necessary, but in the end this is + // just a pathological case which does not occur very + // often. + Instruction *tmp = + new BitCastInst(value, + value->getType(), + value->getName() + ".phiclean", + pi->getIncomingBlock(i)->getTerminator()); + pi->setIncomingValue(i, tmp); + } + + changed = true; + } + + phis.insert(pi); + } + } + } + + return changed; +} diff --git a/lib/Module/RaiseAsm.cpp b/lib/Module/RaiseAsm.cpp new file mode 100644 index 00000000..67fbf8ae --- /dev/null +++ b/lib/Module/RaiseAsm.cpp @@ -0,0 +1,69 @@ +//===-- RaiseAsm.cpp ------------------------------------------------------===// +// +// The KLEE Symbolic Virtual Machine +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Passes.h" + +#include "llvm/InlineAsm.h" + +using namespace llvm; +using namespace klee; + +char RaiseAsmPass::ID = 0; + +Function *RaiseAsmPass::getIntrinsic(llvm::Module &M, + unsigned IID, + const Type **Tys, + unsigned NumTys) { + return Intrinsic::getDeclaration(&M, (llvm::Intrinsic::ID) IID, Tys, NumTys); +} + +// FIXME: This should just be implemented as a patch to +// X86TargetAsmInfo.cpp, then everyone will benefit. +bool RaiseAsmPass::runOnInstruction(Module &M, Instruction *I) { + if (CallInst *ci = dyn_cast<CallInst>(I)) { + if (InlineAsm *ia = dyn_cast<InlineAsm>(ci->getCalledValue())) { + const std::string &as = ia->getAsmString(); + const std::string &cs = ia->getConstraintString(); + const llvm::Type *T = ci->getType(); + + // bswaps + if (ci->getNumOperands() == 2 && + T == ci->getOperand(1)->getType() && + ((T == llvm::Type::Int16Ty && + as == "rorw $$8, ${0:w}" && + cs == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") || + (T == llvm::Type::Int32Ty && + as == "rorw $$8, ${0:w};rorl $$16, $0;rorw $$8, ${0:w}" && + cs == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"))) { + llvm::Value *Arg0 = ci->getOperand(1); + Function *F = getIntrinsic(M, Intrinsic::bswap, Arg0->getType()); + ci->setOperand(0, F); + return true; + } + } + } + + return false; +} + +bool RaiseAsmPass::runOnModule(Module &M) { + bool changed = false; + + for (Module::iterator fi = M.begin(), fe = M.end(); fi != fe; ++fi) { + for (Function::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi) { + for (BasicBlock::iterator ii = bi->begin(), ie = bi->end(); ii != ie;) { + Instruction *i = ii; + ++ii; + changed |= runOnInstruction(M, i); + } + } + } + + return changed; +} |