From d5ce6b3b2c62badebc7534550f09f1b5592a7aa3 Mon Sep 17 00:00:00 2001 From: Martin Nowack Date: Tue, 7 Aug 2018 17:03:22 +0100 Subject: Refactor InstructionInfoTable Better debug information --- .../klee/Internal/Module/InstructionInfoTable.h | 63 +++--- lib/Core/CallPathManager.cpp | 62 +++--- lib/Core/CallPathManager.h | 43 ++-- lib/Core/Executor.cpp | 7 +- lib/Module/InstructionInfoTable.cpp | 237 +++++++++++---------- lib/Module/IntrinsicCleaner.cpp | 10 +- lib/Module/KInstruction.cpp | 3 +- lib/Module/KModule.cpp | 4 +- 8 files changed, 220 insertions(+), 209 deletions(-) diff --git a/include/klee/Internal/Module/InstructionInfoTable.h b/include/klee/Internal/Module/InstructionInfoTable.h index 98af6ac9..4c9c19b1 100644 --- a/include/klee/Internal/Module/InstructionInfoTable.h +++ b/include/klee/Internal/Module/InstructionInfoTable.h @@ -10,9 +10,10 @@ #ifndef KLEE_LIB_INSTRUCTIONINFOTABLE_H #define KLEE_LIB_INSTRUCTIONINFOTABLE_H -#include +#include #include -#include +#include +#include namespace llvm { class Function; @@ -27,44 +28,48 @@ namespace klee { unsigned id; const std::string &file; unsigned line; + unsigned column; unsigned assemblyLine; public: - InstructionInfo(unsigned _id, - const std::string &_file, - unsigned _line, - unsigned _assemblyLine) - : id(_id), - file(_file), - line(_line), - assemblyLine(_assemblyLine) { - } + InstructionInfo(unsigned _id, const std::string &_file, unsigned _line, + unsigned _column, unsigned _assemblyLine) + : id(_id), file(_file), line(_line), column(_column), + assemblyLine(_assemblyLine) {} }; - class InstructionInfoTable { - struct ltstr { - bool operator()(const std::string *a, const std::string *b) const { - return *a<*b; - } - }; + /* Stores debug information for a KInstruction */ + struct FunctionInfo { + unsigned id; + const std::string &file; + unsigned line; + uint64_t assemblyLine; - std::string dummyString; - InstructionInfo dummyInfo; - std::map infos; - std::set internedStrings; + public: + FunctionInfo(unsigned _id, const std::string &_file, unsigned _line, + uint64_t _assemblyLine) + : id(_id), file(_file), line(_line), assemblyLine(_assemblyLine) {} + + FunctionInfo(const FunctionInfo &) = delete; + FunctionInfo &operator=(FunctionInfo const &) = delete; - private: - const std::string *internString(std::string s); - bool getInstructionDebugInfo(const llvm::Instruction *I, - const std::string *&File, unsigned &Line); + FunctionInfo(FunctionInfo &&) = default; + }; + + class InstructionInfoTable { + std::unordered_map> + infos; + std::unordered_map> + functionInfos; + std::vector> internedStrings; public: - InstructionInfoTable(llvm::Module *m); - ~InstructionInfoTable(); + InstructionInfoTable(const llvm::Module &m); unsigned getMaxID() const; - const InstructionInfo &getInfo(const llvm::Instruction*) const; - const InstructionInfo &getFunctionInfo(const llvm::Function*) const; + const InstructionInfo &getInfo(const llvm::Instruction &) const; + const FunctionInfo &getFunctionInfo(const llvm::Function &) const; }; } diff --git a/lib/Core/CallPathManager.cpp b/lib/Core/CallPathManager.cpp index 42be3735..6d5ef1a8 100644 --- a/lib/Core/CallPathManager.cpp +++ b/lib/Core/CallPathManager.cpp @@ -17,19 +17,14 @@ #include "llvm/Support/raw_ostream.h" -using namespace llvm; using namespace klee; /// -CallPathNode::CallPathNode(CallPathNode *_parent, - Instruction *_callSite, - Function *_function) - : parent(_parent), - callSite(_callSite), - function(_function), - count(0) { -} +CallPathNode::CallPathNode(CallPathNode *_parent, + const llvm::Instruction *_callSite, + const llvm::Function *_function) + : parent(_parent), callSite(_callSite), function(_function), count(0) {} void CallPathNode::print() { llvm::errs() << " (Function: " << this->function->getName() << ", " @@ -44,26 +39,17 @@ void CallPathNode::print() { /// -CallPathManager::CallPathManager() : root(0, 0, 0) { -} - -CallPathManager::~CallPathManager() { - for (std::vector::iterator it = paths.begin(), - ie = paths.end(); it != ie; ++it) - delete *it; -} +CallPathManager::CallPathManager() : root(nullptr, nullptr, nullptr) {} void CallPathManager::getSummaryStatistics(CallSiteSummaryTable &results) { results.clear(); - for (std::vector::iterator it = paths.begin(), - ie = paths.end(); it != ie; ++it) - (*it)->summaryStatistics = (*it)->statistics; + for (auto &path : paths) + path->summaryStatistics = path->statistics; // compute summary bottom up, while building result table - for (std::vector::reverse_iterator it = paths.rbegin(), - ie = paths.rend(); it != ie; ++it) { - CallPathNode *cp = *it; + for (auto it = paths.rbegin(), ie = paths.rend(); it != ie; ++it) { + const auto &cp = (*it); cp->parent->summaryStatistics += cp->summaryStatistics; CallSiteInfo &csi = results[cp->callSite][cp->function]; @@ -72,29 +58,29 @@ void CallPathManager::getSummaryStatistics(CallSiteSummaryTable &results) { } } - -CallPathNode *CallPathManager::computeCallPath(CallPathNode *parent, - Instruction *cs, - Function *f) { +CallPathNode *CallPathManager::computeCallPath(CallPathNode *parent, + const llvm::Instruction *cs, + const llvm::Function *f) { for (CallPathNode *p=parent; p; p=p->parent) if (cs==p->callSite && f==p->function) return p; - - CallPathNode *cp = new CallPathNode(parent, cs, f); - paths.push_back(cp); - return cp; + + auto cp = std::unique_ptr(new CallPathNode(parent, cs, f)); + auto newCP = cp.get(); + paths.emplace_back(std::move(cp)); + return newCP; } -CallPathNode *CallPathManager::getCallPath(CallPathNode *parent, - Instruction *cs, - Function *f) { - std::pair key(cs, f); +CallPathNode *CallPathManager::getCallPath(CallPathNode *parent, + const llvm::Instruction *cs, + const llvm::Function *f) { + std::pair key(cs, f); if (!parent) parent = &root; - - CallPathNode::children_ty::iterator it = parent->children.find(key); + + auto it = parent->children.find(key); if (it==parent->children.end()) { - CallPathNode *cp = computeCallPath(parent, cs, f); + auto cp = computeCallPath(parent, cs, f); parent->children.insert(std::make_pair(key, cp)); return cp; } else { diff --git a/lib/Core/CallPathManager.h b/lib/Core/CallPathManager.h index 2e16d72b..0a648777 100644 --- a/lib/Core/CallPathManager.h +++ b/lib/Core/CallPathManager.h @@ -13,6 +13,7 @@ #include "klee/Statistics.h" #include +#include #include namespace llvm { @@ -31,20 +32,23 @@ namespace klee { CallSiteInfo() : count(0) {} }; - typedef std::map > CallSiteSummaryTable; - + typedef std::map> + CallSiteSummaryTable; + class CallPathNode { friend class CallPathManager; public: - typedef std::map, CallPathNode*> children_ty; + typedef std::map< + std::pair, + CallPathNode *> + children_ty; // form list of (callSite,function) path CallPathNode *parent; - llvm::Instruction *callSite; - llvm::Function *function; + const llvm::Instruction *callSite; + const llvm::Function *function; children_ty children; StatisticRecord statistics; @@ -52,31 +56,30 @@ namespace klee { unsigned count; public: - CallPathNode(CallPathNode *parent, - llvm::Instruction *callSite, - llvm::Function *function); + CallPathNode(CallPathNode *parent, const llvm::Instruction *callSite, + const llvm::Function *function); void print(); }; class CallPathManager { CallPathNode root; - std::vector paths; + std::vector> paths; private: - CallPathNode *computeCallPath(CallPathNode *parent, - llvm::Instruction *callSite, - llvm::Function *f); - + CallPathNode *computeCallPath(CallPathNode *parent, + const llvm::Instruction *callSite, + const llvm::Function *f); + public: CallPathManager(); - ~CallPathManager(); + ~CallPathManager() = default; void getSummaryStatistics(CallSiteSummaryTable &result); - - CallPathNode *getCallPath(CallPathNode *parent, - llvm::Instruction *callSite, - llvm::Function *f); + + CallPathNode *getCallPath(CallPathNode *parent, + const llvm::Instruction *callSite, + const llvm::Function *f); }; } diff --git a/lib/Core/Executor.cpp b/lib/Core/Executor.cpp index 70561216..cd0f6078 100644 --- a/lib/Core/Executor.cpp +++ b/lib/Core/Executor.cpp @@ -1587,11 +1587,6 @@ Function* Executor::getTargetFunction(Value *calledVal, ExecutionState &state) { } } -/// TODO remove? -static bool isDebugIntrinsic(const Function *f, KModule *KM) { - return false; -} - static inline const llvm::fltSemantics * fpWidthToSemantics(unsigned width) { switch(width) { #if LLVM_VERSION_CODE >= LLVM_VERSION(4, 0) @@ -1921,7 +1916,7 @@ void Executor::executeInstruction(ExecutionState &state, KInstruction *ki) { Function *f = getTargetFunction(fp, state); // Skip debug intrinsics, we can't evaluate their metadata arguments. - if (f && isDebugIntrinsic(f, kmodule.get())) + if (isa(i)) break; if (isa(fp)) { diff --git a/lib/Module/InstructionInfoTable.cpp b/lib/Module/InstructionInfoTable.cpp index 3d9bf5ae..b67335e3 100644 --- a/lib/Module/InstructionInfoTable.cpp +++ b/lib/Module/InstructionInfoTable.cpp @@ -13,6 +13,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" # if LLVM_VERSION_CODE < LLVM_VERSION(3,5) @@ -36,160 +37,180 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Path.h" +#include #include #include -using namespace llvm; using namespace klee; class InstructionToLineAnnotator : public llvm::AssemblyAnnotationWriter { public: - void emitInstructionAnnot(const Instruction *i, + void emitInstructionAnnot(const llvm::Instruction *i, llvm::formatted_raw_ostream &os) { os << "%%%"; - os << (uintptr_t) i; + os << reinterpret_cast(i); + } + + void emitFunctionAnnot(const llvm::Function *f, + llvm::formatted_raw_ostream &os) { + os << "%%%"; + os << reinterpret_cast(f); } }; - -static void buildInstructionToLineMap(Module *m, - std::map &out) { + +static std::map +buildInstructionToLineMap(const llvm::Module &m) { + + std::map mapping; InstructionToLineAnnotator a; std::string str; + llvm::raw_string_ostream os(str); - m->print(os, &a); + m.print(os, &a); os.flush(); + const char *s; unsigned line = 1; for (s=str.c_str(); *s; s++) { - if (*s=='\n') { - line++; - if (s[1]=='%' && s[2]=='%' && s[3]=='%') { - s += 4; - char *end; - unsigned long long value = strtoull(s, &end, 10); - if (end!=s) { - out.insert(std::make_pair((const Instruction*) value, line)); - } - s = end; - } + if (*s != '\n') + continue; + + line++; + if (s[1] != '%' || s[2] != '%' || s[3] != '%') + continue; + + s += 4; + char *end; + uint64_t value = strtoull(s, &end, 10); + if (end != s) { + mapping.insert(std::make_pair(value, line)); } + s = end; } + + return mapping; } -static std::string getDSPIPath(const DILocation &Loc) { - std::string dir = Loc.getDirectory(); - std::string file = Loc.getFilename(); - if (dir.empty() || file[0] == '/') { - return file; - } else if (*dir.rbegin() == '/') { - return dir + file; - } else { - return dir + "/" + file; - } +static std::string getFullPath(llvm::StringRef Directory, + llvm::StringRef FileName) { + llvm::SmallString<128> file_pathname(Directory); + llvm::sys::path::append(file_pathname, FileName); + + return file_pathname.str(); } -bool InstructionInfoTable::getInstructionDebugInfo(const llvm::Instruction *I, - const std::string *&File, - unsigned &Line) { - if (MDNode *N = I->getMetadata("dbg")) { -#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 7) - DILocation *Loc = cast(N); - File = internString(getDSPIPath(*Loc)); - Line = Loc->getLine(); -#else - DILocation Loc(N); - File = internString(getDSPIPath(Loc)); - Line = Loc.getLineNumber(); -#endif - return true; +class DebugInfoExtractor { + std::vector> &internedStrings; + llvm::DebugInfoFinder DIF; + + uint64_t counter; + + std::map lineTable; + + const llvm::Module &module; + +public: + DebugInfoExtractor( + std::vector> &_internedStrings, + const llvm::Module &_module) + : internedStrings(_internedStrings), counter(0), module(_module) { + DIF.processModule(module); + lineTable = buildInstructionToLineMap(module); } - return false; -} + std::string &getInternedString(const std::string &s) { + auto found = std::find_if(internedStrings.begin(), internedStrings.end(), + [&s](const std::unique_ptr &item) { + return *item.get() == s; + }); + if (found != internedStrings.end()) + return *found->get(); -InstructionInfoTable::InstructionInfoTable(Module *m) - : dummyString(""), dummyInfo(0, dummyString, 0, 0) { - unsigned id = 0; - std::map lineTable; - buildInstructionToLineMap(m, lineTable); - - for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); - fnIt != fn_ie; ++fnIt) { - Function *fn = &*fnIt; - - // We want to ensure that as all instructions have source information, if - // available. Clang sometimes will not write out debug information on the - // initial instructions in a function (correspond to the formal parameters), - // so we first search forward to find the first instruction with debug info, - // if any. - const std::string *initialFile = &dummyString; - unsigned initialLine = 0; - for (inst_iterator it = inst_begin(fn), ie = inst_end(fn); it != ie; ++it) { - if (getInstructionDebugInfo(&*it, initialFile, initialLine)) - break; - } + auto newItem = std::unique_ptr(new std::string(s)); + auto result = newItem.get(); + + internedStrings.emplace_back(std::move(newItem)); + return *result; + } - const std::string *file = initialFile; - unsigned line = initialLine; - for (inst_iterator it = inst_begin(fn), ie = inst_end(fn); it != ie; - ++it) { - Instruction *instr = &*it; - unsigned assemblyLine = lineTable[instr]; + std::unique_ptr getFunctionInfo(const llvm::Function &Func) { + auto asmLine = lineTable.at(reinterpret_cast(&Func)); - // Update our source level debug information. - getInstructionDebugInfo(instr, file, line); + // Acquire function debug information + for (auto subIt = DIF.subprogram_begin(), subItE = DIF.subprogram_end(); + subIt != subItE; ++subIt) { + llvm::DISubprogram SubProgram(*subIt); + if (SubProgram.getFunction() != &Func) + continue; - infos.insert(std::make_pair(instr, - InstructionInfo(id++, *file, line, - assemblyLine))); + auto path = + getFullPath(SubProgram.getDirectory(), SubProgram.getFilename()); + + return std::unique_ptr( + new FunctionInfo(counter++, getInternedString(path), + SubProgram.getLineNumber(), asmLine)); } + + return std::unique_ptr( + new FunctionInfo(counter++, getInternedString(""), 0, asmLine)); } -} -InstructionInfoTable::~InstructionInfoTable() { - for (std::set::iterator - it = internedStrings.begin(), ie = internedStrings.end(); - it != ie; ++it) - delete *it; -} + std::unique_ptr + getInstructionInfo(const llvm::Instruction &Inst, const FunctionInfo &f) { + auto asmLine = lineTable.at(reinterpret_cast(&Inst)); + + llvm::DebugLoc Loc(Inst.getDebugLoc()); + if (!Loc.isUnknown()) { + llvm::DIScope Scope(Loc.getScope(module.getContext())); + auto full_path = getFullPath(Scope.getDirectory(), Scope.getFilename()); + return std::unique_ptr( + new InstructionInfo(counter++, getInternedString(full_path), + Loc.getLine(), Loc.getCol(), asmLine)); + } -const std::string *InstructionInfoTable::internString(std::string s) { - std::set::iterator it = internedStrings.find(&s); - if (it==internedStrings.end()) { - std::string *interned = new std::string(s); - internedStrings.insert(interned); - return interned; - } else { - return *it; + // If nothing found, use the surrounding function + return std::unique_ptr( + new InstructionInfo(counter++, f.file, f.line, 0, asmLine)); + } +}; + +InstructionInfoTable::InstructionInfoTable(const llvm::Module &m) { + DebugInfoExtractor DI(internedStrings, m); + for (const auto &Func : m) { + auto F = DI.getFunctionInfo(Func); + auto FD = F.get(); + functionInfos.insert(std::make_pair(&Func, std::move(F))); + + for (auto it = llvm::inst_begin(Func), ie = llvm::inst_end(Func); it != ie; + ++it) { + auto instr = &*it; + infos.insert(std::make_pair(instr, DI.getInstructionInfo(*instr, *FD))); + } } } unsigned InstructionInfoTable::getMaxID() const { - return infos.size(); + return infos.size() + functionInfos.size(); } const InstructionInfo & -InstructionInfoTable::getInfo(const Instruction *inst) const { - std::map::const_iterator it = - infos.find(inst); +InstructionInfoTable::getInfo(const llvm::Instruction &inst) const { + auto it = infos.find(&inst); if (it == infos.end()) llvm::report_fatal_error("invalid instruction, not present in " "initial module!"); - return it->second; + return *it->second.get(); } -const InstructionInfo & -InstructionInfoTable::getFunctionInfo(const Function *f) const { - if (f->isDeclaration()) { - // FIXME: We should probably eliminate this dummyInfo object, and instead - // allocate a per-function object to track the stats for that function - // (otherwise, anyone actually trying to use those stats is getting ones - // shared across all functions). I'd like to see if this matters in practice - // and construct a test case for it if it does, though. - return dummyInfo; - } else { - return getInfo(&*(f->begin()->begin())); - } +const FunctionInfo & +InstructionInfoTable::getFunctionInfo(const llvm::Function &f) const { + auto found = functionInfos.find(&f); + if (found == functionInfos.end()) + llvm::report_fatal_error("invalid instruction, not present in " + "initial module!"); + + return *found->second.get(); } diff --git a/lib/Module/IntrinsicCleaner.cpp b/lib/Module/IntrinsicCleaner.cpp index ee65be69..ba8ebcc0 100644 --- a/lib/Module/IntrinsicCleaner.cpp +++ b/lib/Module/IntrinsicCleaner.cpp @@ -198,11 +198,11 @@ bool IntrinsicCleanerPass::runOnBasicBlock(BasicBlock &b, Module &M) { case Intrinsic::dbg_value: case Intrinsic::dbg_declare: { - // Remove these regardless of lower intrinsics flag. This can - // be removed once IntrinsicLowering is fixed to not have bad - // caches. - ii->eraseFromParent(); - dirty = true; + // // Remove these regardless of lower intrinsics flag. This can + // // be removed once IntrinsicLowering is fixed to not have bad + // // caches. + // ii->eraseFromParent(); + // dirty = true; break; } diff --git a/lib/Module/KInstruction.cpp b/lib/Module/KInstruction.cpp index c7c841a4..ee54b67c 100644 --- a/lib/Module/KInstruction.cpp +++ b/lib/Module/KInstruction.cpp @@ -21,6 +21,7 @@ KInstruction::~KInstruction() { std::string KInstruction::getSourceLocation() const { if (!info->file.empty()) - return info->file + ":" + std::to_string(info->line); + return info->file + ":" + std::to_string(info->line) + " " + + std::to_string(info->column); else return "[no debug info]"; } diff --git a/lib/Module/KModule.cpp b/lib/Module/KModule.cpp index 2a15b02f..9cd46798 100644 --- a/lib/Module/KModule.cpp +++ b/lib/Module/KModule.cpp @@ -327,7 +327,7 @@ void KModule::manifest(InterpreterHandler *ih, bool forceSourceOutput) { /* Build shadow structures */ infos = std::unique_ptr( - new InstructionInfoTable(module.get())); + new InstructionInfoTable(*module.get())); std::vector declarations; @@ -341,7 +341,7 @@ void KModule::manifest(InterpreterHandler *ih, bool forceSourceOutput) { for (unsigned i=0; inumInstructions; ++i) { KInstruction *ki = kf->instructions[i]; - ki->info = &infos->getInfo(ki->inst); + ki->info = &infos->getInfo(*ki->inst); } functionMap.insert(std::make_pair(&Function, kf.get())); -- cgit 1.4.1