From d5ce6b3b2c62badebc7534550f09f1b5592a7aa3 Mon Sep 17 00:00:00 2001 From: Martin Nowack Date: Tue, 7 Aug 2018 17:03:22 +0100 Subject: Refactor InstructionInfoTable Better debug information --- lib/Module/InstructionInfoTable.cpp | 237 ++++++++++++++++++++---------------- lib/Module/IntrinsicCleaner.cpp | 10 +- lib/Module/KInstruction.cpp | 3 +- lib/Module/KModule.cpp | 4 +- 4 files changed, 138 insertions(+), 116 deletions(-) (limited to 'lib/Module') diff --git a/lib/Module/InstructionInfoTable.cpp b/lib/Module/InstructionInfoTable.cpp index 3d9bf5ae..b67335e3 100644 --- a/lib/Module/InstructionInfoTable.cpp +++ b/lib/Module/InstructionInfoTable.cpp @@ -13,6 +13,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" # if LLVM_VERSION_CODE < LLVM_VERSION(3,5) @@ -36,160 +37,180 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Path.h" +#include #include #include -using namespace llvm; using namespace klee; class InstructionToLineAnnotator : public llvm::AssemblyAnnotationWriter { public: - void emitInstructionAnnot(const Instruction *i, + void emitInstructionAnnot(const llvm::Instruction *i, llvm::formatted_raw_ostream &os) { os << "%%%"; - os << (uintptr_t) i; + os << reinterpret_cast(i); + } + + void emitFunctionAnnot(const llvm::Function *f, + llvm::formatted_raw_ostream &os) { + os << "%%%"; + os << reinterpret_cast(f); } }; - -static void buildInstructionToLineMap(Module *m, - std::map &out) { + +static std::map +buildInstructionToLineMap(const llvm::Module &m) { + + std::map mapping; InstructionToLineAnnotator a; std::string str; + llvm::raw_string_ostream os(str); - m->print(os, &a); + m.print(os, &a); os.flush(); + const char *s; unsigned line = 1; for (s=str.c_str(); *s; s++) { - if (*s=='\n') { - line++; - if (s[1]=='%' && s[2]=='%' && s[3]=='%') { - s += 4; - char *end; - unsigned long long value = strtoull(s, &end, 10); - if (end!=s) { - out.insert(std::make_pair((const Instruction*) value, line)); - } - s = end; - } + if (*s != '\n') + continue; + + line++; + if (s[1] != '%' || s[2] != '%' || s[3] != '%') + continue; + + s += 4; + char *end; + uint64_t value = strtoull(s, &end, 10); + if (end != s) { + mapping.insert(std::make_pair(value, line)); } + s = end; } + + return mapping; } -static std::string getDSPIPath(const DILocation &Loc) { - std::string dir = Loc.getDirectory(); - std::string file = Loc.getFilename(); - if (dir.empty() || file[0] == '/') { - return file; - } else if (*dir.rbegin() == '/') { - return dir + file; - } else { - return dir + "/" + file; - } +static std::string getFullPath(llvm::StringRef Directory, + llvm::StringRef FileName) { + llvm::SmallString<128> file_pathname(Directory); + llvm::sys::path::append(file_pathname, FileName); + + return file_pathname.str(); } -bool InstructionInfoTable::getInstructionDebugInfo(const llvm::Instruction *I, - const std::string *&File, - unsigned &Line) { - if (MDNode *N = I->getMetadata("dbg")) { -#if LLVM_VERSION_CODE >= LLVM_VERSION(3, 7) - DILocation *Loc = cast(N); - File = internString(getDSPIPath(*Loc)); - Line = Loc->getLine(); -#else - DILocation Loc(N); - File = internString(getDSPIPath(Loc)); - Line = Loc.getLineNumber(); -#endif - return true; +class DebugInfoExtractor { + std::vector> &internedStrings; + llvm::DebugInfoFinder DIF; + + uint64_t counter; + + std::map lineTable; + + const llvm::Module &module; + +public: + DebugInfoExtractor( + std::vector> &_internedStrings, + const llvm::Module &_module) + : internedStrings(_internedStrings), counter(0), module(_module) { + DIF.processModule(module); + lineTable = buildInstructionToLineMap(module); } - return false; -} + std::string &getInternedString(const std::string &s) { + auto found = std::find_if(internedStrings.begin(), internedStrings.end(), + [&s](const std::unique_ptr &item) { + return *item.get() == s; + }); + if (found != internedStrings.end()) + return *found->get(); -InstructionInfoTable::InstructionInfoTable(Module *m) - : dummyString(""), dummyInfo(0, dummyString, 0, 0) { - unsigned id = 0; - std::map lineTable; - buildInstructionToLineMap(m, lineTable); - - for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); - fnIt != fn_ie; ++fnIt) { - Function *fn = &*fnIt; - - // We want to ensure that as all instructions have source information, if - // available. Clang sometimes will not write out debug information on the - // initial instructions in a function (correspond to the formal parameters), - // so we first search forward to find the first instruction with debug info, - // if any. - const std::string *initialFile = &dummyString; - unsigned initialLine = 0; - for (inst_iterator it = inst_begin(fn), ie = inst_end(fn); it != ie; ++it) { - if (getInstructionDebugInfo(&*it, initialFile, initialLine)) - break; - } + auto newItem = std::unique_ptr(new std::string(s)); + auto result = newItem.get(); + + internedStrings.emplace_back(std::move(newItem)); + return *result; + } - const std::string *file = initialFile; - unsigned line = initialLine; - for (inst_iterator it = inst_begin(fn), ie = inst_end(fn); it != ie; - ++it) { - Instruction *instr = &*it; - unsigned assemblyLine = lineTable[instr]; + std::unique_ptr getFunctionInfo(const llvm::Function &Func) { + auto asmLine = lineTable.at(reinterpret_cast(&Func)); - // Update our source level debug information. - getInstructionDebugInfo(instr, file, line); + // Acquire function debug information + for (auto subIt = DIF.subprogram_begin(), subItE = DIF.subprogram_end(); + subIt != subItE; ++subIt) { + llvm::DISubprogram SubProgram(*subIt); + if (SubProgram.getFunction() != &Func) + continue; - infos.insert(std::make_pair(instr, - InstructionInfo(id++, *file, line, - assemblyLine))); + auto path = + getFullPath(SubProgram.getDirectory(), SubProgram.getFilename()); + + return std::unique_ptr( + new FunctionInfo(counter++, getInternedString(path), + SubProgram.getLineNumber(), asmLine)); } + + return std::unique_ptr( + new FunctionInfo(counter++, getInternedString(""), 0, asmLine)); } -} -InstructionInfoTable::~InstructionInfoTable() { - for (std::set::iterator - it = internedStrings.begin(), ie = internedStrings.end(); - it != ie; ++it) - delete *it; -} + std::unique_ptr + getInstructionInfo(const llvm::Instruction &Inst, const FunctionInfo &f) { + auto asmLine = lineTable.at(reinterpret_cast(&Inst)); + + llvm::DebugLoc Loc(Inst.getDebugLoc()); + if (!Loc.isUnknown()) { + llvm::DIScope Scope(Loc.getScope(module.getContext())); + auto full_path = getFullPath(Scope.getDirectory(), Scope.getFilename()); + return std::unique_ptr( + new InstructionInfo(counter++, getInternedString(full_path), + Loc.getLine(), Loc.getCol(), asmLine)); + } -const std::string *InstructionInfoTable::internString(std::string s) { - std::set::iterator it = internedStrings.find(&s); - if (it==internedStrings.end()) { - std::string *interned = new std::string(s); - internedStrings.insert(interned); - return interned; - } else { - return *it; + // If nothing found, use the surrounding function + return std::unique_ptr( + new InstructionInfo(counter++, f.file, f.line, 0, asmLine)); + } +}; + +InstructionInfoTable::InstructionInfoTable(const llvm::Module &m) { + DebugInfoExtractor DI(internedStrings, m); + for (const auto &Func : m) { + auto F = DI.getFunctionInfo(Func); + auto FD = F.get(); + functionInfos.insert(std::make_pair(&Func, std::move(F))); + + for (auto it = llvm::inst_begin(Func), ie = llvm::inst_end(Func); it != ie; + ++it) { + auto instr = &*it; + infos.insert(std::make_pair(instr, DI.getInstructionInfo(*instr, *FD))); + } } } unsigned InstructionInfoTable::getMaxID() const { - return infos.size(); + return infos.size() + functionInfos.size(); } const InstructionInfo & -InstructionInfoTable::getInfo(const Instruction *inst) const { - std::map::const_iterator it = - infos.find(inst); +InstructionInfoTable::getInfo(const llvm::Instruction &inst) const { + auto it = infos.find(&inst); if (it == infos.end()) llvm::report_fatal_error("invalid instruction, not present in " "initial module!"); - return it->second; + return *it->second.get(); } -const InstructionInfo & -InstructionInfoTable::getFunctionInfo(const Function *f) const { - if (f->isDeclaration()) { - // FIXME: We should probably eliminate this dummyInfo object, and instead - // allocate a per-function object to track the stats for that function - // (otherwise, anyone actually trying to use those stats is getting ones - // shared across all functions). I'd like to see if this matters in practice - // and construct a test case for it if it does, though. - return dummyInfo; - } else { - return getInfo(&*(f->begin()->begin())); - } +const FunctionInfo & +InstructionInfoTable::getFunctionInfo(const llvm::Function &f) const { + auto found = functionInfos.find(&f); + if (found == functionInfos.end()) + llvm::report_fatal_error("invalid instruction, not present in " + "initial module!"); + + return *found->second.get(); } diff --git a/lib/Module/IntrinsicCleaner.cpp b/lib/Module/IntrinsicCleaner.cpp index ee65be69..ba8ebcc0 100644 --- a/lib/Module/IntrinsicCleaner.cpp +++ b/lib/Module/IntrinsicCleaner.cpp @@ -198,11 +198,11 @@ bool IntrinsicCleanerPass::runOnBasicBlock(BasicBlock &b, Module &M) { case Intrinsic::dbg_value: case Intrinsic::dbg_declare: { - // Remove these regardless of lower intrinsics flag. This can - // be removed once IntrinsicLowering is fixed to not have bad - // caches. - ii->eraseFromParent(); - dirty = true; + // // Remove these regardless of lower intrinsics flag. This can + // // be removed once IntrinsicLowering is fixed to not have bad + // // caches. + // ii->eraseFromParent(); + // dirty = true; break; } diff --git a/lib/Module/KInstruction.cpp b/lib/Module/KInstruction.cpp index c7c841a4..ee54b67c 100644 --- a/lib/Module/KInstruction.cpp +++ b/lib/Module/KInstruction.cpp @@ -21,6 +21,7 @@ KInstruction::~KInstruction() { std::string KInstruction::getSourceLocation() const { if (!info->file.empty()) - return info->file + ":" + std::to_string(info->line); + return info->file + ":" + std::to_string(info->line) + " " + + std::to_string(info->column); else return "[no debug info]"; } diff --git a/lib/Module/KModule.cpp b/lib/Module/KModule.cpp index 2a15b02f..9cd46798 100644 --- a/lib/Module/KModule.cpp +++ b/lib/Module/KModule.cpp @@ -327,7 +327,7 @@ void KModule::manifest(InterpreterHandler *ih, bool forceSourceOutput) { /* Build shadow structures */ infos = std::unique_ptr( - new InstructionInfoTable(module.get())); + new InstructionInfoTable(*module.get())); std::vector declarations; @@ -341,7 +341,7 @@ void KModule::manifest(InterpreterHandler *ih, bool forceSourceOutput) { for (unsigned i=0; inumInstructions; ++i) { KInstruction *ki = kf->instructions[i]; - ki->info = &infos->getInfo(ki->inst); + ki->info = &infos->getInfo(*ki->inst); } functionMap.insert(std::make_pair(&Function, kf.get())); -- cgit 1.4.1