diff options
Diffstat (limited to 'instrumentation')
31 files changed, 11762 insertions, 0 deletions
diff --git a/instrumentation/LLVMInsTrim.so.cc b/instrumentation/LLVMInsTrim.so.cc new file mode 100644 index 00000000..61a420ba --- /dev/null +++ b/instrumentation/LLVMInsTrim.so.cc @@ -0,0 +1,598 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <unistd.h> + +#include "llvm/Config/llvm-config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 +typedef long double max_align_t; +#endif + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) + #include "llvm/IR/CFG.h" + #include "llvm/IR/Dominators.h" + #include "llvm/IR/DebugInfo.h" +#else + #include "llvm/Support/CFG.h" + #include "llvm/Analysis/Dominators.h" + #include "llvm/DebugInfo.h" +#endif +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/IR/BasicBlock.h" +#include <unordered_set> +#include <random> +#include <list> +#include <string> +#include <fstream> + +#include "MarkNodes.h" +#include "afl-llvm-common.h" +#include "llvm-ngram-coverage.h" + +#include "config.h" +#include "debug.h" + +using namespace llvm; + +static cl::opt<bool> MarkSetOpt("markset", cl::desc("MarkSet"), + cl::init(false)); +static cl::opt<bool> LoopHeadOpt("loophead", cl::desc("LoopHead"), + cl::init(false)); + +namespace { + +struct InsTrim : public ModulePass { + + protected: + uint32_t function_minimum_size = 1; + char * skip_nozero = NULL; + + private: + std::mt19937 generator; + int total_instr = 0; + + unsigned int genLabel() { + + return generator() & (MAP_SIZE - 1); + + } + + public: + static char ID; + + InsTrim() : ModulePass(ID), generator(0) { + + initInstrumentList(); + + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + + AU.addRequired<DominatorTreeWrapperPass>(); + + } + +#if LLVM_VERSION_MAJOR < 4 + const char * +#else + StringRef +#endif + getPassName() const override { + + return "InstTrim Instrumentation"; + + } + +#if LLVM_VERSION_MAJOR > 4 || \ + (LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_PATCH >= 1) + #define AFL_HAVE_VECTOR_INTRINSICS 1 +#endif + + bool runOnModule(Module &M) override { + + setvbuf(stdout, NULL, _IONBF, 0); + + if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) { + + SAYF(cCYA "LLVMInsTrim" VERSION cRST " by csienslab\n"); + + } else + + be_quiet = 1; + + if (getenv("AFL_DEBUG") != NULL) debug = 1; + + LLVMContext &C = M.getContext(); + + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + +#if LLVM_VERSION_MAJOR < 9 + char *neverZero_counters_str; + if ((neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO")) != NULL) + if (!be_quiet) OKF("LLVM neverZero activated (by hexcoder)\n"); +#endif + skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO"); + + if (getenv("AFL_LLVM_INSTRIM_LOOPHEAD") != NULL || + getenv("LOOPHEAD") != NULL) { + + LoopHeadOpt = true; + + } + + unsigned int PrevLocSize = 0; + char * ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE"); + if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE"); + char *ctx_str = getenv("AFL_LLVM_CTX"); + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + unsigned int ngram_size = 0; + /* Decide previous location vector size (must be a power of two) */ + VectorType *PrevLocTy = NULL; + + if (ngram_size_str) + if (sscanf(ngram_size_str, "%u", &ngram_size) != 1 || ngram_size < 2 || + ngram_size > NGRAM_SIZE_MAX) + FATAL( + "Bad value of AFL_NGRAM_SIZE (must be between 2 and NGRAM_SIZE_MAX " + "(%u))", + NGRAM_SIZE_MAX); + + if (ngram_size) + PrevLocSize = ngram_size - 1; + else +#else + if (ngram_size_str) + #ifdef LLVM_VERSION_STRING + FATAL( + "Sorry, NGRAM branch coverage is not supported with llvm version %s!", + LLVM_VERSION_STRING); + #else + #ifndef LLVM_VERSION_PATCH + FATAL( + "Sorry, NGRAM branch coverage is not supported with llvm version " + "%d.%d.%d!", + LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, 0); + #else + FATAL( + "Sorry, NGRAM branch coverage is not supported with llvm version " + "%d.%d.%d!", + LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERISON_PATCH); + #endif + #endif +#endif + PrevLocSize = 1; + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + // IntegerType *Int64Ty = IntegerType::getInt64Ty(C); + int PrevLocVecSize = PowerOf2Ceil(PrevLocSize); + IntegerType *IntLocTy = + IntegerType::getIntNTy(C, sizeof(PREV_LOC_T) * CHAR_BIT); + if (ngram_size) + PrevLocTy = VectorType::get(IntLocTy, PrevLocVecSize + #if LLVM_VERSION_MAJOR >= 12 + , + false + #endif + ); +#endif + + /* Get globals for the SHM region and the previous location. Note that + __afl_prev_loc is thread-local. */ + + GlobalVariable *AFLMapPtr = + new GlobalVariable(M, PointerType::get(Int8Ty, 0), false, + GlobalValue::ExternalLinkage, 0, "__afl_area_ptr"); + GlobalVariable *AFLPrevLoc; + GlobalVariable *AFLContext = NULL; + LoadInst * PrevCtx = NULL; // for CTX sensitive coverage + + if (ctx_str) +#ifdef __ANDROID__ + AFLContext = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx"); +#else + AFLContext = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx", + 0, GlobalVariable::GeneralDynamicTLSModel, 0, false); +#endif + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ngram_size) + #ifdef __ANDROID__ + AFLPrevLoc = new GlobalVariable( + M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage, + /* Initializer */ nullptr, "__afl_prev_loc"); + #else + AFLPrevLoc = new GlobalVariable( + M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage, + /* Initializer */ nullptr, "__afl_prev_loc", + /* InsertBefore */ nullptr, GlobalVariable::GeneralDynamicTLSModel, + /* AddressSpace */ 0, /* IsExternallyInitialized */ false); + #endif + else +#endif +#ifdef __ANDROID__ + AFLPrevLoc = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc"); +#else + AFLPrevLoc = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", 0, + GlobalVariable::GeneralDynamicTLSModel, 0, false); +#endif + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + /* Create the vector shuffle mask for updating the previous block history. + Note that the first element of the vector will store cur_loc, so just set + it to undef to allow the optimizer to do its thing. */ + + SmallVector<Constant *, 32> PrevLocShuffle = {UndefValue::get(Int32Ty)}; + + for (unsigned I = 0; I < PrevLocSize - 1; ++I) + PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, I)); + + for (int I = PrevLocSize; I < PrevLocVecSize; ++I) + PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, PrevLocSize)); + + Constant *PrevLocShuffleMask = ConstantVector::get(PrevLocShuffle); +#endif + + // this is our default + MarkSetOpt = true; + + ConstantInt *Zero = ConstantInt::get(Int8Ty, 0); + ConstantInt *One = ConstantInt::get(Int8Ty, 1); + + u64 total_rs = 0; + u64 total_hs = 0; + + scanForDangerousFunctions(&M); + + for (Function &F : M) { + + if (debug) { + + uint32_t bb_cnt = 0; + + for (auto &BB : F) + if (BB.size() > 0) ++bb_cnt; + SAYF(cMGN "[D] " cRST "Function %s size %zu %u\n", + F.getName().str().c_str(), F.size(), bb_cnt); + + } + + if (!isInInstrumentList(&F)) continue; + + // if the function below our minimum size skip it (1 or 2) + if (F.size() < function_minimum_size) { continue; } + + std::unordered_set<BasicBlock *> MS; + if (!MarkSetOpt) { + + for (auto &BB : F) { + + MS.insert(&BB); + + } + + total_rs += F.size(); + + } else { + + auto Result = markNodes(&F); + auto RS = Result.first; + auto HS = Result.second; + + MS.insert(RS.begin(), RS.end()); + if (!LoopHeadOpt) { + + MS.insert(HS.begin(), HS.end()); + total_rs += MS.size(); + + } else { + + DenseSet<std::pair<BasicBlock *, BasicBlock *>> EdgeSet; + DominatorTreeWrapperPass * DTWP = + &getAnalysis<DominatorTreeWrapperPass>(F); + auto DT = &DTWP->getDomTree(); + + total_rs += RS.size(); + total_hs += HS.size(); + + for (BasicBlock *BB : HS) { + + bool Inserted = false; + for (auto BI = pred_begin(BB), BE = pred_end(BB); BI != BE; ++BI) { + + auto Edge = BasicBlockEdge(*BI, BB); + if (Edge.isSingleEdge() && DT->dominates(Edge, BB)) { + + EdgeSet.insert({*BI, BB}); + Inserted = true; + break; + + } + + } + + if (!Inserted) { + + MS.insert(BB); + total_rs += 1; + total_hs -= 1; + + } + + } + + for (auto I = EdgeSet.begin(), E = EdgeSet.end(); I != E; ++I) { + + auto PredBB = I->first; + auto SuccBB = I->second; + auto NewBB = + SplitBlockPredecessors(SuccBB, {PredBB}, ".split", DT, nullptr, +#if LLVM_VERSION_MAJOR >= 8 + nullptr, +#endif + false); + MS.insert(NewBB); + + } + + } + + for (BasicBlock &BB : F) { + + if (MS.find(&BB) == MS.end()) { continue; } + IRBuilder<> IRB(&*BB.getFirstInsertionPt()); + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ngram_size) { + + LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc); + PrevLoc->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + Value *ShuffledPrevLoc = IRB.CreateShuffleVector( + PrevLoc, UndefValue::get(PrevLocTy), PrevLocShuffleMask); + Value *UpdatedPrevLoc = IRB.CreateInsertElement( + ShuffledPrevLoc, ConstantInt::get(Int32Ty, genLabel()), + (uint64_t)0); + + IRB.CreateStore(UpdatedPrevLoc, AFLPrevLoc) + ->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } else + +#endif + { + + IRB.CreateStore(ConstantInt::get(Int32Ty, genLabel()), AFLPrevLoc); + + } + + } + + } + + int has_calls = 0; + for (BasicBlock &BB : F) { + + auto PI = pred_begin(&BB); + auto PE = pred_end(&BB); + IRBuilder<> IRB(&*BB.getFirstInsertionPt()); + Value * L = NULL; + unsigned int cur_loc; + + // Context sensitive coverage + if (ctx_str && &BB == &F.getEntryBlock()) { + + PrevCtx = IRB.CreateLoad(AFLContext); + PrevCtx->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + // does the function have calls? and is any of the calls larger than + // one basic block? + has_calls = 0; + for (auto &BB : F) { + + if (has_calls) break; + for (auto &IN : BB) { + + CallInst *callInst = nullptr; + if ((callInst = dyn_cast<CallInst>(&IN))) { + + Function *Callee = callInst->getCalledFunction(); + if (!Callee || Callee->size() < function_minimum_size) + continue; + else { + + has_calls = 1; + break; + + } + + } + + } + + } + + // if yes we store a context ID for this function in the global var + if (has_calls) { + + ConstantInt *NewCtx = ConstantInt::get(Int32Ty, genLabel()); + StoreInst * StoreCtx = IRB.CreateStore(NewCtx, AFLContext); + StoreCtx->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + } // END of ctx_str + + if (MarkSetOpt && MS.find(&BB) == MS.end()) { continue; } + + if (PI == PE) { + + cur_loc = genLabel(); + L = ConstantInt::get(Int32Ty, cur_loc); + + } else { + + auto *PN = PHINode::Create(Int32Ty, 0, "", &*BB.begin()); + DenseMap<BasicBlock *, unsigned> PredMap; + for (auto PI = pred_begin(&BB), PE = pred_end(&BB); PI != PE; ++PI) { + + BasicBlock *PBB = *PI; + auto It = PredMap.insert({PBB, genLabel()}); + unsigned Label = It.first->second; + cur_loc = Label; + PN->addIncoming(ConstantInt::get(Int32Ty, Label), PBB); + + } + + L = PN; + + } + + /* Load prev_loc */ + LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc); + PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + Value *PrevLocTrans; + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + /* "For efficiency, we propose to hash the tuple as a key into the + hit_count map as (prev_block_trans << 1) ^ curr_block_trans, where + prev_block_trans = (block_trans_1 ^ ... ^ block_trans_(n-1)" */ + + if (ngram_size) + PrevLocTrans = + IRB.CreateZExt(IRB.CreateXorReduce(PrevLoc), IRB.getInt32Ty()); + else +#endif + PrevLocTrans = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty()); + + if (ctx_str) + PrevLocTrans = + IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCtx), Int32Ty); + + /* Load SHM pointer */ + LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); + MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + Value *MapPtrIdx; +#ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ngram_size) + MapPtrIdx = IRB.CreateGEP( + MapPtr, IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, L), Int32Ty)); + else +#endif + MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocTrans, L)); + + /* Update bitmap */ + LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); + Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + Value *Incr = IRB.CreateAdd(Counter, One); + +#if LLVM_VERSION_MAJOR < 9 + if (neverZero_counters_str != + NULL) // with llvm 9 we make this the default as the bug in llvm is + // then fixed +#else + if (!skip_nozero) +#endif + { + + /* hexcoder: Realize a counter that skips zero during overflow. + * Once this counter reaches its maximum value, it next increments to + * 1 + * + * Instead of + * Counter + 1 -> Counter + * we inject now this + * Counter + 1 -> {Counter, OverflowFlag} + * Counter + OverflowFlag -> Counter + */ + auto cf = IRB.CreateICmpEQ(Incr, Zero); + auto carry = IRB.CreateZExt(cf, Int8Ty); + Incr = IRB.CreateAdd(Incr, carry); + + } + + IRB.CreateStore(Incr, MapPtrIdx) + ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + if (ctx_str && has_calls) { + + // in CTX mode we have to restore the original context for the + // caller - she might be calling other functions which need the + // correct CTX + Instruction *Inst = BB.getTerminator(); + if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) { + + IRBuilder<> Post_IRB(Inst); + StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext); + RestoreCtx->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + } + + total_instr++; + + } + + } + + if (!be_quiet) { + + char modeline[100]; + snprintf(modeline, sizeof(modeline), "%s%s%s%s%s", + getenv("AFL_HARDEN") ? "hardened" : "non-hardened", + getenv("AFL_USE_ASAN") ? ", ASAN" : "", + getenv("AFL_USE_MSAN") ? ", MSAN" : "", + getenv("AFL_USE_CFISAN") ? ", CFISAN" : "", + getenv("AFL_USE_UBSAN") ? ", UBSAN" : ""); + + OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n", total_instr, + total_rs, total_hs, modeline); + + } + + return false; + + } + +}; // end of struct InsTrim + +} // end of anonymous namespace + +char InsTrim::ID = 0; + +static void registerAFLPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + PM.add(new InsTrim()); + +} + +static RegisterStandardPasses RegisterAFLPass( + PassManagerBuilder::EP_OptimizerLast, registerAFLPass); + +static RegisterStandardPasses RegisterAFLPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLPass); + diff --git a/instrumentation/MarkNodes.cc b/instrumentation/MarkNodes.cc new file mode 100644 index 00000000..20a7df35 --- /dev/null +++ b/instrumentation/MarkNodes.cc @@ -0,0 +1,481 @@ +#include <algorithm> +#include <map> +#include <queue> +#include <set> +#include <vector> + +#include "llvm/Config/llvm-config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 +typedef long double max_align_t; +#endif + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) + #include "llvm/IR/CFG.h" +#else + #include "llvm/Support/CFG.h" +#endif +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +DenseMap<BasicBlock *, uint32_t> LMap; +std::vector<BasicBlock *> Blocks; +std::set<uint32_t> Marked, Markabove; +std::vector<std::vector<uint32_t> > Succs, Preds; + +void reset() { + + LMap.clear(); + Blocks.clear(); + Marked.clear(); + Markabove.clear(); + +} + +uint32_t start_point; + +void labelEachBlock(Function *F) { + + // Fake single endpoint; + LMap[NULL] = Blocks.size(); + Blocks.push_back(NULL); + + // Assign the unique LabelID to each block; + for (auto I = F->begin(), E = F->end(); I != E; ++I) { + + BasicBlock *BB = &*I; + LMap[BB] = Blocks.size(); + Blocks.push_back(BB); + + } + + start_point = LMap[&F->getEntryBlock()]; + +} + +void buildCFG(Function *F) { + + Succs.resize(Blocks.size()); + Preds.resize(Blocks.size()); + for (size_t i = 0; i < Succs.size(); i++) { + + Succs[i].clear(); + Preds[i].clear(); + + } + + for (auto S = F->begin(), E = F->end(); S != E; ++S) { + + BasicBlock *BB = &*S; + uint32_t MyID = LMap[BB]; + + for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + + Succs[MyID].push_back(LMap[*I]); + + } + + } + +} + +std::vector<std::vector<uint32_t> > tSuccs; +std::vector<bool> tag, indfs; + +void DFStree(size_t now_id) { + + if (tag[now_id]) return; + tag[now_id] = true; + indfs[now_id] = true; + for (auto succ : tSuccs[now_id]) { + + if (tag[succ] and indfs[succ]) { + + Marked.insert(succ); + Markabove.insert(succ); + continue; + + } + + Succs[now_id].push_back(succ); + Preds[succ].push_back(now_id); + DFStree(succ); + + } + + indfs[now_id] = false; + +} + +void turnCFGintoDAG() { + + tSuccs = Succs; + tag.resize(Blocks.size()); + indfs.resize(Blocks.size()); + for (size_t i = 0; i < Blocks.size(); ++i) { + + Succs[i].clear(); + tag[i] = false; + indfs[i] = false; + + } + + DFStree(start_point); + for (size_t i = 0; i < Blocks.size(); ++i) + if (Succs[i].empty()) { + + Succs[i].push_back(0); + Preds[0].push_back(i); + + } + +} + +uint32_t timeStamp; +namespace DominatorTree { + +std::vector<std::vector<uint32_t> > cov; +std::vector<uint32_t> dfn, nfd, par, sdom, idom, mom, mn; + +bool Compare(uint32_t u, uint32_t v) { + + return dfn[u] < dfn[v]; + +} + +uint32_t eval(uint32_t u) { + + if (mom[u] == u) return u; + uint32_t res = eval(mom[u]); + if (Compare(sdom[mn[mom[u]]], sdom[mn[u]])) { mn[u] = mn[mom[u]]; } + return mom[u] = res; + +} + +void DFS(uint32_t now) { + + timeStamp += 1; + dfn[now] = timeStamp; + nfd[timeStamp - 1] = now; + for (auto succ : Succs[now]) { + + if (dfn[succ] == 0) { + + par[succ] = now; + DFS(succ); + + } + + } + +} + +void DominatorTree() { + + if (Blocks.empty()) return; + uint32_t s = start_point; + + // Initialization + mn.resize(Blocks.size()); + cov.resize(Blocks.size()); + dfn.resize(Blocks.size()); + nfd.resize(Blocks.size()); + par.resize(Blocks.size()); + mom.resize(Blocks.size()); + sdom.resize(Blocks.size()); + idom.resize(Blocks.size()); + + for (uint32_t i = 0; i < Blocks.size(); i++) { + + dfn[i] = 0; + nfd[i] = Blocks.size(); + cov[i].clear(); + idom[i] = mom[i] = mn[i] = sdom[i] = i; + + } + + timeStamp = 0; + DFS(s); + + for (uint32_t i = Blocks.size() - 1; i >= 1u; i--) { + + uint32_t now = nfd[i]; + if (now == Blocks.size()) { continue; } + for (uint32_t pre : Preds[now]) { + + if (dfn[pre]) { + + eval(pre); + if (Compare(sdom[mn[pre]], sdom[now])) { sdom[now] = sdom[mn[pre]]; } + + } + + } + + cov[sdom[now]].push_back(now); + mom[now] = par[now]; + for (uint32_t x : cov[par[now]]) { + + eval(x); + if (Compare(sdom[mn[x]], par[now])) { + + idom[x] = mn[x]; + + } else { + + idom[x] = par[now]; + + } + + } + + } + + for (uint32_t i = 1; i < Blocks.size(); i += 1) { + + uint32_t now = nfd[i]; + if (now == Blocks.size()) { continue; } + if (idom[now] != sdom[now]) idom[now] = idom[idom[now]]; + + } + +} + +} // namespace DominatorTree + +std::vector<uint32_t> Visited, InStack; +std::vector<uint32_t> TopoOrder, InDeg; +std::vector<std::vector<uint32_t> > t_Succ, t_Pred; + +void Go(uint32_t now, uint32_t tt) { + + if (now == tt) return; + Visited[now] = InStack[now] = timeStamp; + + for (uint32_t nxt : Succs[now]) { + + if (Visited[nxt] == timeStamp and InStack[nxt] == timeStamp) { + + Marked.insert(nxt); + + } + + t_Succ[now].push_back(nxt); + t_Pred[nxt].push_back(now); + InDeg[nxt] += 1; + if (Visited[nxt] == timeStamp) { continue; } + Go(nxt, tt); + + } + + InStack[now] = 0; + +} + +void TopologicalSort(uint32_t ss, uint32_t tt) { + + timeStamp += 1; + + Go(ss, tt); + + TopoOrder.clear(); + std::queue<uint32_t> wait; + wait.push(ss); + while (not wait.empty()) { + + uint32_t now = wait.front(); + wait.pop(); + TopoOrder.push_back(now); + for (uint32_t nxt : t_Succ[now]) { + + InDeg[nxt] -= 1; + if (InDeg[nxt] == 0u) { wait.push(nxt); } + + } + + } + +} + +std::vector<std::set<uint32_t> > NextMarked; +bool Indistinguish(uint32_t node1, uint32_t node2) { + + if (NextMarked[node1].size() > NextMarked[node2].size()) { + + uint32_t _swap = node1; + node1 = node2; + node2 = _swap; + + } + + for (uint32_t x : NextMarked[node1]) { + + if (NextMarked[node2].find(x) != NextMarked[node2].end()) { return true; } + + } + + return false; + +} + +void MakeUniq(uint32_t now) { + + bool StopFlag = false; + if (Marked.find(now) == Marked.end()) { + + for (uint32_t pred1 : t_Pred[now]) { + + for (uint32_t pred2 : t_Pred[now]) { + + if (pred1 == pred2) continue; + if (Indistinguish(pred1, pred2)) { + + Marked.insert(now); + StopFlag = true; + break; + + } + + } + + if (StopFlag) { break; } + + } + + } + + if (Marked.find(now) != Marked.end()) { + + NextMarked[now].insert(now); + + } else { + + for (uint32_t pred : t_Pred[now]) { + + for (uint32_t x : NextMarked[pred]) { + + NextMarked[now].insert(x); + + } + + } + + } + +} + +bool MarkSubGraph(uint32_t ss, uint32_t tt) { + + TopologicalSort(ss, tt); + if (TopoOrder.empty()) return false; + + for (uint32_t i : TopoOrder) { + + NextMarked[i].clear(); + + } + + NextMarked[TopoOrder[0]].insert(TopoOrder[0]); + for (uint32_t i = 1; i < TopoOrder.size(); i += 1) { + + MakeUniq(TopoOrder[i]); + + } + + // Check if there is an empty path. + if (NextMarked[tt].count(TopoOrder[0]) > 0) return true; + return false; + +} + +void MarkVertice() { + + uint32_t s = start_point; + + InDeg.resize(Blocks.size()); + Visited.resize(Blocks.size()); + InStack.resize(Blocks.size()); + t_Succ.resize(Blocks.size()); + t_Pred.resize(Blocks.size()); + NextMarked.resize(Blocks.size()); + + for (uint32_t i = 0; i < Blocks.size(); i += 1) { + + Visited[i] = InStack[i] = InDeg[i] = 0; + t_Succ[i].clear(); + t_Pred[i].clear(); + + } + + timeStamp = 0; + uint32_t t = 0; + bool emptyPathExists = true; + + while (s != t) { + + emptyPathExists &= MarkSubGraph(DominatorTree::idom[t], t); + t = DominatorTree::idom[t]; + + } + + if (emptyPathExists) { + + // Mark all exit blocks to catch the empty path. + Marked.insert(t_Pred[0].begin(), t_Pred[0].end()); + + } + +} + +// return {marked nodes} +std::pair<std::vector<BasicBlock *>, std::vector<BasicBlock *> > markNodes( + Function *F) { + + assert(F->size() > 0 && "Function can not be empty"); + + reset(); + labelEachBlock(F); + buildCFG(F); + turnCFGintoDAG(); + DominatorTree::DominatorTree(); + MarkVertice(); + + std::vector<BasicBlock *> Result, ResultAbove; + for (uint32_t x : Markabove) { + + auto it = Marked.find(x); + if (it != Marked.end()) Marked.erase(it); + if (x) ResultAbove.push_back(Blocks[x]); + + } + + for (uint32_t x : Marked) { + + if (x == 0) { + + continue; + + } else { + + Result.push_back(Blocks[x]); + + } + + } + + return {Result, ResultAbove}; + +} + diff --git a/instrumentation/MarkNodes.h b/instrumentation/MarkNodes.h new file mode 100644 index 00000000..8ddc978d --- /dev/null +++ b/instrumentation/MarkNodes.h @@ -0,0 +1,12 @@ +#ifndef __MARK_NODES__ +#define __MARK_NODES__ + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Function.h" +#include <vector> + +std::pair<std::vector<llvm::BasicBlock *>, std::vector<llvm::BasicBlock *>> +markNodes(llvm::Function *F); + +#endif + diff --git a/instrumentation/README.cmplog.md b/instrumentation/README.cmplog.md new file mode 100644 index 00000000..5f855e1f --- /dev/null +++ b/instrumentation/README.cmplog.md @@ -0,0 +1,42 @@ +# CmpLog instrumentation + +The CmpLog instrumentation enables the logging of the comparisons operands in a +shared memory. + +These values can be used by various mutators built on top of it. +At the moment we support the RedQueen mutator (input-2-state instructions only). + +## Build + +To use CmpLog, you have to build two versions of the instrumented target +program. + +The first version is built using the regular AFL++ instrumentation. + +The second one, the CmpLog binary, with setting AFL_LLVM_CMPLOG during the compilation. + +For example: + +``` +./configure --cc=~/path/to/afl-clang-fast +make +cp ./program ./program.afl +make clean +export AFL_LLVM_CMPLOG=1 +./configure --cc=~/path/to/afl-clang-fast +make +cp ./program ./program.cmplog +``` + +## Use + +AFL++ has the new -c option that needs to be used to specify the CmpLog binary (the second +build). + +For example: + +``` +afl-fuzz -i input -o output -c ./program.cmplog -m none -- ./program.afl @@ +``` + +Be sure to use `-m none` because CmpLog can map a lot of pages. diff --git a/instrumentation/README.ctx.md b/instrumentation/README.ctx.md new file mode 100644 index 00000000..caf2c09a --- /dev/null +++ b/instrumentation/README.ctx.md @@ -0,0 +1,22 @@ +# AFL Context Sensitive Branch Coverage + +## What is this? + +This is an LLVM-based implementation of the context sensitive branch coverage. + +Basically every function gets its own ID and that ID is combined with the +edges of the called functions. + +So if both function A and function B call a function C, the coverage +collected in C will be different. + +In math the coverage is collected as follows: +`map[current_location_ID ^ previous_location_ID >> 1 ^ previous_callee_ID] += 1` + +## Usage + +Set the `AFL_LLVM_INSTRUMENT=CTX` or `AFL_LLVM_CTX=1` environment variable. + +It is highly recommended to increase the MAP_SIZE_POW2 definition in +config.h to at least 18 and maybe up to 20 for this as otherwise too +many map collisions occur. diff --git a/instrumentation/README.gcc_plugin.md b/instrumentation/README.gcc_plugin.md new file mode 100644 index 00000000..9d6bc200 --- /dev/null +++ b/instrumentation/README.gcc_plugin.md @@ -0,0 +1,158 @@ +# GCC-based instrumentation for afl-fuzz + + (See [../README.md](../README.md) for the general instruction manual.) + (See [README.llvm.md](README.llvm.md) for the LLVM-based instrumentation.) + +!!! TODO items are: +!!! => inline instrumentation has to work! +!!! + + +## 1) Introduction + +The code in this directory allows you to instrument programs for AFL using +true compiler-level instrumentation, instead of the more crude +assembly-level rewriting approach taken by afl-gcc and afl-clang. This has +several interesting properties: + + - The compiler can make many optimizations that are hard to pull off when + manually inserting assembly. As a result, some slow, CPU-bound programs will + run up to around faster. + + The gains are less pronounced for fast binaries, where the speed is limited + chiefly by the cost of creating new processes. In such cases, the gain will + probably stay within 10%. + + - The instrumentation is CPU-independent. At least in principle, you should + be able to rely on it to fuzz programs on non-x86 architectures (after + building afl-fuzz with AFL_NOX86=1). + + - Because the feature relies on the internals of GCC, it is gcc-specific + and will *not* work with LLVM (see ../llvm_mode for an alternative). + +Once this implementation is shown to be sufficiently robust and portable, it +will probably replace afl-gcc. For now, it can be built separately and +co-exists with the original code. + +The idea and much of the implementation comes from Laszlo Szekeres. + +## 2) How to use + +In order to leverage this mechanism, you need to have modern enough GCC +(>= version 4.5.0) and the plugin headers installed on your system. That +should be all you need. On Debian machines, these headers can be acquired by +installing the `gcc-<VERSION>-plugin-dev` packages. + +To build the instrumentation itself, type 'make'. This will generate binaries +called afl-gcc-fast and afl-g++-fast in the parent directory. +If the CC/CXX have been overridden, those compilers will be used from +those wrappers without using AFL_CXX/AFL_CC settings. +Once this is done, you can instrument third-party code in a way similar to the +standard operating mode of AFL, e.g.: + + CC=/path/to/afl/afl-gcc-fast ./configure [...options...] + make + +Be sure to also include CXX set to afl-g++-fast for C++ code. + +The tool honors roughly the same environmental variables as afl-gcc (see +[env_variables.md](../docs/env_variables.md). This includes AFL_INST_RATIO, AFL_USE_ASAN, +AFL_HARDEN, and AFL_DONT_OPTIMIZE. + +Note: if you want the GCC plugin to be installed on your system for all +users, you need to build it before issuing 'make install' in the parent +directory. + +## 3) Gotchas, feedback, bugs + +This is an early-stage mechanism, so field reports are welcome. You can send bug +reports to <hexcoder-@github.com>. + +## 4) Bonus feature #1: deferred initialization + +AFL tries to optimize performance by executing the targeted binary just once, +stopping it just before main(), and then cloning this "main" process to get +a steady supply of targets to fuzz. + +Although this approach eliminates much of the OS-, linker- and libc-level +costs of executing the program, it does not always help with binaries that +perform other time-consuming initialization steps - say, parsing a large config +file before getting to the fuzzed data. + +In such cases, it's beneficial to initialize the forkserver a bit later, once +most of the initialization work is already done, but before the binary attempts +to read the fuzzed input and parse it; in some cases, this can offer a 10x+ +performance gain. You can implement delayed initialization in LLVM mode in a +fairly simple way. + +First, locate a suitable location in the code where the delayed cloning can +take place. This needs to be done with *extreme* care to avoid breaking the +binary. In particular, the program will probably malfunction if you select +a location after: + + - The creation of any vital threads or child processes - since the forkserver + can't clone them easily. + + - The initialization of timers via setitimer() or equivalent calls. + + - The creation of temporary files, network sockets, offset-sensitive file + descriptors, and similar shared-state resources - but only provided that + their state meaningfully influences the behavior of the program later on. + + - Any access to the fuzzed input, including reading the metadata about its + size. + +With the location selected, add this code in the appropriate spot: + +``` +#ifdef __AFL_HAVE_MANUAL_CONTROL + __AFL_INIT(); +#endif +``` + +You don't need the #ifdef guards, but they will make the program still work as +usual when compiled with a tool other than afl-gcc-fast/afl-clang-fast. + +Finally, recompile the program with afl-gcc-fast (afl-gcc or afl-clang will +*not* generate a deferred-initialization binary) - and you should be all set! + +## 5) Bonus feature #2: persistent mode + +Some libraries provide APIs that are stateless, or whose state can be reset in +between processing different input files. When such a reset is performed, a +single long-lived process can be reused to try out multiple test cases, +eliminating the need for repeated fork() calls and the associated OS overhead. + +The basic structure of the program that does this would be: + +``` + while (__AFL_LOOP(1000)) { + + /* Read input data. */ + /* Call library code to be fuzzed. */ + /* Reset state. */ + + } + + /* Exit normally */ +``` + +The numerical value specified within the loop controls the maximum number +of iterations before AFL will restart the process from scratch. This minimizes +the impact of memory leaks and similar glitches; 1000 is a good starting point. + +A more detailed template is shown in ../examples/persistent_demo/. +Similarly to the previous mode, the feature works only with afl-gcc-fast or +afl-clang-fast; #ifdef guards can be used to suppress it when using other +compilers. + +Note that as with the previous mode, the feature is easy to misuse; if you +do not reset the critical state fully, you may end up with false positives or +waste a whole lot of CPU power doing nothing useful at all. Be particularly +wary of memory leaks and the state of file descriptors. + +When running in this mode, the execution paths will inherently vary a bit +depending on whether the input loop is being entered for the first time or +executed again. To avoid spurious warnings, the feature implies +AFL_NO_VAR_CHECK and hides the "variable path" warnings in the UI. + diff --git a/instrumentation/README.instrim.md b/instrumentation/README.instrim.md new file mode 100644 index 00000000..99f6477a --- /dev/null +++ b/instrumentation/README.instrim.md @@ -0,0 +1,30 @@ +# InsTrim + +InsTrim: Lightweight Instrumentation for Coverage-guided Fuzzing + +## Introduction + +InsTrim is the work of Chin-Chia Hsu, Che-Yu Wu, Hsu-Chun Hsiao and Shih-Kun Huang. + +It uses a CFG (call flow graph) and markers to instrument just what +is necessary in the binary (ie less than llvm_mode). As a result the binary is +about 10-15% faster compared to normal llvm_mode however with some coverage loss. +It requires at least llvm version 3.8.0 to build. +If you have LLVM 7+ we recommend PCGUARD instead. + +## Usage + +Set the environment variable `AFL_LLVM_INSTRUMENT=CFG` or `AFL_LLVM_INSTRIM=1` +during compilation of the target. + +There is also special mode which instruments loops in a way so that +afl-fuzz can see which loop path has been selected but not being able to +see how often the loop has been rerun. +This again is a tradeoff for speed for less path information. +To enable this mode set `AFL_LLVM_INSTRIM_LOOPHEAD=1`. + +## Background + +The paper from Chin-Chia Hsu, Che-Yu Wu, Hsu-Chun Hsiao and Shih-Kun Huang: +[InsTrim: Lightweight Instrumentation for Coverage-guided Fuzzing] +(https://www.ndss-symposium.org/wp-content/uploads/2018/07/bar2018_14_Hsu_paper.pdf) diff --git a/instrumentation/README.instrument_list.md b/instrumentation/README.instrument_list.md new file mode 100644 index 00000000..60474ec6 --- /dev/null +++ b/instrumentation/README.instrument_list.md @@ -0,0 +1,87 @@ +# Using afl++ with partial instrumentation + + This file describes how to selectively instrument only source files + or functions that are of interest to you using the LLVM instrumentation + provided by afl++. + +## 1) Description and purpose + +When building and testing complex programs where only a part of the program is +the fuzzing target, it often helps to only instrument the necessary parts of +the program, leaving the rest uninstrumented. This helps to focus the fuzzer +on the important parts of the program, avoiding undesired noise and +disturbance by uninteresting code being exercised. + +For this purpose, a "partial instrumentation" support en par with llvm sancov +is provided by afl++ that allows to specify on a source file and function +level which function should be compiled with or without instrumentation. + +Note: When using PCGUARD mode - and llvm 12+ - you can use this instead: +https://clang.llvm.org/docs/SanitizerCoverage.html#partially-disabling-instrumentation + +The llvm sancov list format is fully supported by afl++, however afl++ has +more flexibility. + +## 2) Building the LLVM module + +The new code is part of the existing afl++ LLVM module in the instrumentation/ +subdirectory. There is nothing specifically to do for the build :) + +## 3) How to use the partial instrumentation mode + +In order to build with partial instrumentation, you need to build with +afl-clang-fast/afl-clang-fast++ or afl-clang-lto/afl-clang-lto++. +The only required change is that you need to set either the environment variable +AFL_LLVM_ALLOWLIST or AFL_LLVM_DENYLIST set with a filename. + +That file should contain the file names or functions that are to be instrumented +(AFL_LLVM_ALLOWLIST) or are specifically NOT to be instrumented (AFL_LLVM_DENYLIST). + +For matching to succeed, the function/file name that is being compiled must end in the +function/file name entry contained in this instrument file list. That is to avoid +breaking the match when absolute paths are used during compilation. + +**NOTE:** In builds with optimization enabled, functions might be inlined and would not match! + +For example if your source tree looks like this: +``` +project/ +project/feature_a/a1.cpp +project/feature_a/a2.cpp +project/feature_b/b1.cpp +project/feature_b/b2.cpp +``` + +and you only want to test feature_a, then create an "instrument file list" file containing: +``` +feature_a/a1.cpp +feature_a/a2.cpp +``` + +However if the "instrument file list" file contains only this, it works as well: +``` +a1.cpp +a2.cpp +``` +but it might lead to files being unwantedly instrumented if the same filename +exists somewhere else in the project directories. + +You can also specify function names. Note that for C++ the function names +must be mangled to match! `nm` can print these names. + +afl++ is able to identify whether an entry is a filename or a function. +However if you want to be sure (and compliant to the sancov allow/blocklist +format), you can specify source file entries like this: +``` +src: *malloc.c +``` +and function entries like this: +``` +fun: MallocFoo +``` +Note that whitespace is ignored and comments (`# foo`) are supported. + +## 4) UNIX-style pattern matching + +You can add UNIX-style pattern matching in the "instrument file list" entries. +See `man fnmatch` for the syntax. We do not set any of the `fnmatch` flags. diff --git a/instrumentation/README.laf-intel.md b/instrumentation/README.laf-intel.md new file mode 100644 index 00000000..c50a6979 --- /dev/null +++ b/instrumentation/README.laf-intel.md @@ -0,0 +1,56 @@ +# laf-intel instrumentation + +## Introduction + +This originally is the work of an individual nicknamed laf-intel. +His blog [Circumventing Fuzzing Roadblocks with Compiler Transformations] +(https://lafintel.wordpress.com/) and gitlab repo [laf-llvm-pass] +(https://gitlab.com/laf-intel/laf-llvm-pass/) +describe some code transformations that +help afl++ to enter conditional blocks, where conditions consist of +comparisons of large values. + +## Usage + +By default these passes will not run when you compile programs using +afl-clang-fast. Hence, you can use AFL as usual. +To enable the passes you must set environment variables before you +compile the target project. + +The following options exist: + +`export AFL_LLVM_LAF_SPLIT_SWITCHES=1` + +Enables the split-switches pass. + +`export AFL_LLVM_LAF_TRANSFORM_COMPARES=1` + +Enables the transform-compares pass (strcmp, memcmp, strncmp, +strcasecmp, strncasecmp). + +`export AFL_LLVM_LAF_SPLIT_COMPARES=1` + +Enables the split-compares pass. +By default it will +1. simplify operators >= (and <=) into chains of > (<) and == comparisons +2. change signed integer comparisons to a chain of sign-only comparison +and unsigned integer comparisons +3. split all unsigned integer comparisons with bit widths of +64, 32 or 16 bits to chains of 8 bits comparisons. + +You can change the behaviour of the last step by setting +`export AFL_LLVM_LAF_SPLIT_COMPARES_BITW=<bit_width>`, where +bit_width may be 64, 32 or 16. For example, a bit_width of 16 +would split larger comparisons down to 16 bit comparisons. + +A new experimental feature is splitting floating point comparisons into a +series of sign, exponent and mantissa comparisons followed by splitting each +of them into 8 bit comparisons when necessary. +It is activated with the `AFL_LLVM_LAF_SPLIT_FLOATS` setting. +Please note that full IEEE 754 functionality is not preserved, that is +values of nan and infinity will probably behave differently. + +Note that setting this automatically activates `AFL_LLVM_LAF_SPLIT_COMPARES` + +You can also set `AFL_LLVM_LAF_ALL` and have all of the above enabled :-) + diff --git a/instrumentation/README.llvm.md b/instrumentation/README.llvm.md new file mode 100644 index 00000000..51e9995b --- /dev/null +++ b/instrumentation/README.llvm.md @@ -0,0 +1,194 @@ +# Fast LLVM-based instrumentation for afl-fuzz + + (See [../README.md](../README.md) for the general instruction manual.) + + (See [README.gcc_plugon.md](../README.gcc_plugin.md) for the GCC-based instrumentation.) + +## 1) Introduction + +! llvm_mode works with llvm versions 3.4 up to 12 ! + +The code in this directory allows you to instrument programs for AFL using +true compiler-level instrumentation, instead of the more crude +assembly-level rewriting approach taken by afl-gcc and afl-clang. This has +several interesting properties: + + - The compiler can make many optimizations that are hard to pull off when + manually inserting assembly. As a result, some slow, CPU-bound programs will + run up to around 2x faster. + + The gains are less pronounced for fast binaries, where the speed is limited + chiefly by the cost of creating new processes. In such cases, the gain will + probably stay within 10%. + + - The instrumentation is CPU-independent. At least in principle, you should + be able to rely on it to fuzz programs on non-x86 architectures (after + building afl-fuzz with AFL_NO_X86=1). + + - The instrumentation can cope a bit better with multi-threaded targets. + + - Because the feature relies on the internals of LLVM, it is clang-specific + and will *not* work with GCC (see ../gcc_plugin/ for an alternative once + it is available). + +Once this implementation is shown to be sufficiently robust and portable, it +will probably replace afl-clang. For now, it can be built separately and +co-exists with the original code. + +The idea and much of the intial implementation came from Laszlo Szekeres. + +## 2a) How to use this - short + +Set the `LLVM_CONFIG` variable to the clang version you want to use, e.g. +``` +LLVM_CONFIG=llvm-config-9 make +``` +In case you have your own compiled llvm version specify the full path: +``` +LLVM_CONFIG=~/llvm-project/build/bin/llvm-config make +``` +If you try to use a new llvm version on an old Linux this can fail because of +old c++ libraries. In this case usually switching to gcc/g++ to compile +llvm_mode will work: +``` +LLVM_CONFIG=llvm-config-7 REAL_CC=gcc REAL_CXX=g++ make +``` +It is highly recommended to use the newest clang version you can put your +hands on :) + +Then look at [README.persistent_mode.md](README.persistent_mode.md). + +## 2b) How to use this - long + +In order to leverage this mechanism, you need to have clang installed on your +system. You should also make sure that the llvm-config tool is in your path +(or pointed to via LLVM_CONFIG in the environment). + +Note that if you have several LLVM versions installed, pointing LLVM_CONFIG +to the version you want to use will switch compiling to this specific +version - if you installation is set up correctly :-) + +Unfortunately, some systems that do have clang come without llvm-config or the +LLVM development headers; one example of this is FreeBSD. FreeBSD users will +also run into problems with clang being built statically and not being able to +load modules (you'll see "Service unavailable" when loading afl-llvm-pass.so). + +To solve all your problems, you can grab pre-built binaries for your OS from: + + http://llvm.org/releases/download.html + +...and then put the bin/ directory from the tarball at the beginning of your +$PATH when compiling the feature and building packages later on. You don't need +to be root for that. + +To build the instrumentation itself, type 'make'. This will generate binaries +called afl-clang-fast and afl-clang-fast++ in the parent directory. Once this +is done, you can instrument third-party code in a way similar to the standard +operating mode of AFL, e.g.: + +``` + CC=/path/to/afl/afl-clang-fast ./configure [...options...] + make +``` + +Be sure to also include CXX set to afl-clang-fast++ for C++ code. + +Note that afl-clang-fast/afl-clang-fast++ are just pointers to afl-cc. +You can also use afl-cc/afl-c++ and instead direct it to use LLVM +instrumentation by either setting `AFL_CC_COMPILER=LLVM` or pass the parameter +`--afl-llvm` via CFLAGS/CXXFLAGS/CPPFLAGS. + +The tool honors roughly the same environmental variables as afl-gcc (see +[docs/env_variables.md](../docs/env_variables.md)). This includes AFL_USE_ASAN, +AFL_HARDEN, and AFL_DONT_OPTIMIZE. However AFL_INST_RATIO is not honored +as it does not serve a good purpose with the more effective PCGUARD, LTO and + instrim CFG analysis. + +## 3) Options + +Several options are present to make llvm_mode faster or help it rearrange +the code to make afl-fuzz path discovery easier. + +If you need just to instrument specific parts of the code, you can the instrument file list +which C/C++ files to actually instrument. See [README.instrument_list.md](README.instrument_list.md) + +For splitting memcmp, strncmp, etc. please see [README.laf-intel.md](README.laf-intel.md) + +Then there are different ways of instrumenting the target: + +1. There is an optimized instrumentation strategy that uses CFGs and +markers to just instrument what is needed. This increases speed by 10-15% +without any disadvantages +If you want to use this, set AFL_LLVM_INSTRUMENT=CFG or AFL_LLVM_INSTRIM=1 +See [README.instrim.md](README.instrim.md) + +2. An even better instrumentation strategy uses LTO and link time +instrumentation. Note that not all targets can compile in this mode, however +if it works it is the best option you can use. +Simply use afl-clang-lto/afl-clang-lto++ to use this option. +See [README.lto.md](README.lto.md) + +3. Alternativly you can choose a completely different coverage method: + +3a. N-GRAM coverage - which combines the previous visited edges with the +current one. This explodes the map but on the other hand has proven to be +effective for fuzzing. +See [README.ngram.md](README.ngram.md) + +3b. Context sensitive coverage - which combines the visited edges with an +individual caller ID (the function that called the current one) +[README.ctx.md](README.ctx.md) + +Then - additionally to one of the instrumentation options above - there is +a very effective new instrumentation option called CmpLog as an alternative to +laf-intel that allow AFL++ to apply mutations similar to Redqueen. +See [README.cmplog.md](README.cmplog.md) + +Finally if your llvm version is 8 or lower, you can activate a mode that +prevents that a counter overflow result in a 0 value. This is good for +path discovery, but the llvm implementation for x86 for this functionality +is not optimal and was only fixed in llvm 9. +You can set this with AFL_LLVM_NOT_ZERO=1 +See [README.neverzero.md](README.neverzero.md) + +## 4) Snapshot feature + +To speed up fuzzing you can use a linux loadable kernel module which enables +a snapshot feature. +See [README.snapshot.md](README.snapshot.md) + +## 5) Gotchas, feedback, bugs + +This is an early-stage mechanism, so field reports are welcome. You can send bug +reports to <afl-users@googlegroups.com>. + +## 6) deferred initialization, persistent mode, shared memory fuzzing + +This is the most powerful and effective fuzzing you can do. +Please see [README.persistent_mode.md](README.persistent_mode.md) for a +full explanation. + +## 7) Bonus feature: 'trace-pc-guard' mode + +LLVM is shipping with a built-in execution tracing feature +that provides AFL with the necessary tracing data without the need to +post-process the assembly or install any compiler plugins. See: + + http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards + +If you have not an outdated compiler and want to give it a try, build +targets this way: + +``` +AFL_LLVM_INSTRUMENT=PCGUARD make +``` + +Note that this us currently the default if you use LLVM >= 7, as it is the best +mode. Recommended is LLVM >= 9. +If you have llvm 11+ and compiled afl-clang-lto - this is the only better mode. + +## 8) Bonus feature: 'dict2file' pass + +Just specify `AFL_LLVM_DICT2FILE=/absolute/path/file.txt` and during compilation +all constant string compare parameters will be written to this file to be +used with afl-fuzz' `-x` option. diff --git a/instrumentation/README.lto.md b/instrumentation/README.lto.md new file mode 100644 index 00000000..abdbd2ac --- /dev/null +++ b/instrumentation/README.lto.md @@ -0,0 +1,290 @@ +# afl-clang-lto - collision free instrumentation at link time + +## TLDR; + +This version requires a current llvm 11+ compiled from the github master. + +1. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better + coverage than anything else that is out there in the AFL world + +2. You can use it together with llvm_mode: laf-intel and the instrument file listing + features and can be combined with cmplog/Redqueen + +3. It only works with llvm 11+ + +4. AUTODICTIONARY feature! see below + +5. If any problems arise be sure to set `AR=llvm-ar RANLIB=llvm-ranlib`. + Some targets might need `LD=afl-clang-lto` and others `LD=afl-ld-lto`. + +## Introduction and problem description + +A big issue with how afl/afl++ works is that the basic block IDs that are +set during compilation are random - and hence naturally the larger the number +of instrumented locations, the higher the number of edge collisions are in the +map. This can result in not discovering new paths and therefore degrade the +efficiency of the fuzzing process. + +*This issue is underestimated in the fuzzing community!* +With a 2^16 = 64kb standard map at already 256 instrumented blocks there is +on average one collision. On average a target has 10.000 to 50.000 +instrumented blocks hence the real collisions are between 750-18.000! + +To reach a solution that prevents any collisions took several approaches +and many dead ends until we got to this: + + * We instrument at link time when we have all files pre-compiled + * To instrument at link time we compile in LTO (link time optimization) mode + * Our compiler (afl-clang-lto/afl-clang-lto++) takes care of setting the + correct LTO options and runs our own afl-ld linker instead of the system + linker + * The LLVM linker collects all LTO files to link and instruments them so that + we have non-colliding edge overage + * We use a new (for afl) edge coverage - which is the same as in llvm + -fsanitize=coverage edge coverage mode :) + +The result: + * 10-25% speed gain compared to llvm_mode + * guaranteed non-colliding edge coverage :-) + * The compile time especially for binaries to an instrumented library can be + much longer + +Example build output from a libtiff build: +``` +libtool: link: afl-clang-lto -g -O2 -Wall -W -o thumbnail thumbnail.o ../libtiff/.libs/libtiff.a ../port/.libs/libport.a -llzma -ljbig -ljpeg -lz -lm +afl-clang-lto++2.63d by Marc "vanHauser" Heuse <mh@mh-sec.de> in mode LTO +afl-llvm-lto++2.63d by Marc "vanHauser" Heuse <mh@mh-sec.de> +AUTODICTIONARY: 11 strings found +[+] Instrumented 12071 locations with no collisions (on average 1046 collisions would be in afl-gcc/afl-clang-fast) (non-hardened mode). +``` + +## Getting llvm 11+ + +### Installing llvm from the llvm repository (version 11) + +Installing the llvm snapshot builds is easy and mostly painless: + +In the follow line change `NAME` for your Debian or Ubuntu release name +(e.g. buster, focal, eon, etc.): +``` +echo deb http://apt.llvm.org/NAME/ llvm-toolchain-NAME NAME >> /etc/apt/sources.list +``` +then add the pgp key of llvm and install the packages: +``` +wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - +apt-get update && apt-get upgrade -y +apt-get install -y clang-11 clang-tools-11 libc++1-11 libc++-11-dev \ + libc++abi1-11 libc++abi-11-dev libclang1-11 libclang-11-dev \ + libclang-common-11-dev libclang-cpp11 libclang-cpp11-dev liblld-11 \ + liblld-11-dev liblldb-11 liblldb-11-dev libllvm11 libomp-11-dev \ + libomp5-11 lld-11 lldb-11 llvm-11 llvm-11-dev llvm-11-runtime llvm-11-tools +``` + +### Building llvm yourself (version 12) + +Building llvm from github takes quite some long time and is not painless: +``` +sudo apt install binutils-dev # this is *essential*! +git clone https://github.com/llvm/llvm-project +cd llvm-project +mkdir build +cd build +cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/ +make -j $(nproc) +export PATH=`pwd`/bin:$PATH +export LLVM_CONFIG=`pwd`/bin/llvm-config +cd /path/to/AFLplusplus/ +make +sudo make install +``` + +## How to use afl-clang-lto + +Just use afl-clang-lto like you did with afl-clang-fast or afl-gcc. + +Also the instrument file listing (AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST -> [README.instrument_list.md](README.instrument_list.md)) and +laf-intel/compcov (AFL_LLVM_LAF_* -> [README.laf-intel.md](README.laf-intel.md)) work. + +Example: +``` +CC=afl-clang-lto CXX=afl-clang-lto++ RANLIB=llvm-ranlib AR=llvm-ar ./configure +make +``` + +NOTE: some targets also need to set the linker, try both `afl-clang-lto` and +`afl-ld-lto` for `LD=` before `configure`. + +## AUTODICTIONARY feature + +While compiling, a dictionary based on string comparisons is automatically +generated and put into the target binary. This dictionary is transfered to afl-fuzz +on start. This improves coverage statistically by 5-10% :) + +## Fixed memory map + +To speed up fuzzing, it is possible to set a fixed shared memory map. +Recommended is the value 0x10000. +In most cases this will work without any problems. However if a target uses +early constructors, ifuncs or a deferred forkserver this can crash the target. +On unusual operating systems/processors/kernels or weird libraries this might +fail so to change the fixed address at compile time set +AFL_LLVM_MAP_ADDR with a better value (a value of 0 or empty sets the map address +to be dynamic - the original afl way, which is slower). + +## Document edge IDs + +Setting `export AFL_LLVM_DOCUMENT_IDS=file` will document in a file which edge +ID was given to which function. This helps to identify functions with variable +bytes or which functions were touched by an input. + +## Solving difficult targets + +Some targets are difficult because the configure script does unusual stuff that +is unexpected for afl. See the next chapter `Potential issues` for how to solve +these. + +### Example: ffmpeg + +An example of a hard to solve target is ffmpeg. Here is how to successfully +instrument it: + +1. Get and extract the current ffmpeg and change to its directory + +2. Running configure with --cc=clang fails and various other items will fail + when compiling, so we have to trick configure: + +``` +./configure --enable-lto --disable-shared --disable-inline-asm +``` + +3. Now the configuration is done - and we edit the settings in `./ffbuild/config.mak` + (-: the original line, +: what to change it into): +``` +-CC=gcc ++CC=afl-clang-lto +-CXX=g++ ++CXX=afl-clang-lto++ +-AS=gcc ++AS=llvm-as +-LD=gcc ++LD=afl-clang-lto++ +-DEPCC=gcc ++DEPCC=afl-clang-lto +-DEPAS=gcc ++DEPAS=afl-clang-lto++ +-AR=ar ++AR=llvm-ar +-AR_CMD=ar ++AR_CMD=llvm-ar +-NM_CMD=nm -g ++NM_CMD=llvm-nm -g +-RANLIB=ranlib -D ++RANLIB=llvm-ranlib -D +``` + +4. Then type make, wait for a long time and you are done :) + +### Example: WebKit jsc + +Building jsc is difficult as the build script has bugs. + +1. checkout Webkit: +``` +svn checkout https://svn.webkit.org/repository/webkit/trunk WebKit +cd WebKit +``` + +2. Fix the build environment: +``` +mkdir -p WebKitBuild/Release +cd WebKitBuild/Release +ln -s ../../../../../usr/bin/llvm-ar-12 llvm-ar-12 +ln -s ../../../../../usr/bin/llvm-ranlib-12 llvm-ranlib-12 +cd ../.. +``` + +3. Build :) + +``` +Tools/Scripts/build-jsc --jsc-only --cli --cmakeargs="-DCMAKE_AR='llvm-ar-12' -DCMAKE_RANLIB='llvm-ranlib-12' -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_CC_FLAGS='-O3 -lrt' -DCMAKE_CXX_FLAGS='-O3 -lrt' -DIMPORTED_LOCATION='/lib/x86_64-linux-gnu/' -DCMAKE_CC=afl-clang-lto -DCMAKE_CXX=afl-clang-lto++ -DENABLE_STATIC_JSC=ON" +``` + +## Potential issues + +### compiling libraries fails + +If you see this message: +``` +/bin/ld: libfoo.a: error adding symbols: archive has no index; run ranlib to add one +``` +This is because usually gnu gcc ranlib is being called which cannot deal with clang LTO files. +The solution is simple: when you ./configure you also have to set RANLIB=llvm-ranlib and AR=llvm-ar + +Solution: +``` +AR=llvm-ar RANLIB=llvm-ranlib CC=afl-clang-lto CXX=afl-clang-lto++ ./configure --disable-shared +``` +and on some targets you have to set AR=/RANLIB= even for make as the configure script does not save it. +Other targets ignore environment variables and need the parameters set via +`./configure --cc=... --cxx= --ranlib= ...` etc. (I am looking at you ffmpeg!). + + +If you see this message +``` +assembler command failed ... +``` +then try setting `llvm-as` for configure: +``` +AS=llvm-as ... +``` + +### compiling programs still fail + +afl-clang-lto is still work in progress. + +Known issues: + * Anything that llvm 11+ cannot compile, afl-clang-lto cannot compile either - obviously + * Anything that does not compile with LTO, afl-clang-lto cannot compile either - obviously + +Hence if building a target with afl-clang-lto fails try to build it with llvm12 +and LTO enabled (`CC=clang-12` `CXX=clang++-12` `CFLAGS=-flto=full` and +`CXXFLAGS=-flto=full`). + +If this succeeeds then there is an issue with afl-clang-lto. Please report at +[https://github.com/AFLplusplus/AFLplusplus/issues/226](https://github.com/AFLplusplus/AFLplusplus/issues/226) + +Even some targets where clang-12 fails can be build if the fail is just in +`./configure`, see `Solving difficult targets` above. + +## History + +This was originally envisioned by hexcoder- in Summer 2019, however we saw no +way to create a pass that is run at link time - although there is a option +for this in the PassManager: EP_FullLinkTimeOptimizationLast +("Fun" info - nobody knows what this is doing. And the developer who +implemented this didn't respond to emails.) + +In December then came the idea to implement this as a pass that is run via +the llvm "opt" program, which is performed via an own linker that afterwards +calls the real linker. +This was first implemented in January and work ... kinda. +The LTO time instrumentation worked, however "how" the basic blocks were +instrumented was a problem, as reducing duplicates turned out to be very, +very difficult with a program that has so many paths and therefore so many +dependencies. A lot of strategies were implemented - and failed. +And then sat solvers were tried, but with over 10.000 variables that turned +out to be a dead-end too. + +The final idea to solve this came from domenukk who proposed to insert a block +into an edge and then just use incremental counters ... and this worked! +After some trials and errors to implement this vanhauser-thc found out that +there is actually an llvm function for this: SplitEdge() :-) + +Still more problems came up though as this only works without bugs from +llvm 9 onwards, and with high optimization the link optimization ruins +the instrumented control flow graph. + +This is all now fixed with llvm 11+. The llvm's own linker is now able to +load passes and this bypasses all problems we had. + +Happy end :) diff --git a/instrumentation/README.neverzero.md b/instrumentation/README.neverzero.md new file mode 100644 index 00000000..5c894d6e --- /dev/null +++ b/instrumentation/README.neverzero.md @@ -0,0 +1,35 @@ +# NeverZero counters for LLVM instrumentation + +## Usage + +In larger, complex or reiterative programs the byte sized counters that collect +the edge coverage can easily fill up and wrap around. +This is not that much of an issue - unless by chance it wraps just to a value +of zero when the program execution ends. +In this case afl-fuzz is not able to see that the edge has been accessed and +will ignore it. + +NeverZero prevents this behaviour. If a counter wraps, it jumps over the value +0 directly to a 1. This improves path discovery (by a very little amount) +at a very little cost (one instruction per edge). + +(The alternative of saturated counters has been tested also and proved to be +inferior in terms of path discovery.) + +This is implemented in afl-gcc, however for llvm_mode this is optional if +the llvm version is below 9 - as there is a perfomance bug that is only fixed +in version 9 and onwards. + +If you want to enable this for llvm versions below 9 then set + +``` +export AFL_LLVM_NOT_ZERO=1 +``` + +In case you are on llvm 9 or greater and you do not want this behaviour then +you can set: +``` +AFL_LLVM_SKIP_NEVERZERO=1 +``` +If the target does not have extensive loops or functions that are called +a lot then this can give a small performance boost. diff --git a/instrumentation/README.ngram.md b/instrumentation/README.ngram.md new file mode 100644 index 00000000..de3ba432 --- /dev/null +++ b/instrumentation/README.ngram.md @@ -0,0 +1,28 @@ +# AFL N-Gram Branch Coverage + +## Source + +This is an LLVM-based implementation of the n-gram branch coverage proposed in +the paper ["Be Sensitive and Collaborative: Analzying Impact of Coverage Metrics +in Greybox Fuzzing"](https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf), +by Jinghan Wang, et. al. + +Note that the original implementation (available +[here](https://github.com/bitsecurerlab/afl-sensitive)) +is built on top of AFL's QEMU mode. +This is essentially a port that uses LLVM vectorized instructions to achieve +the same results when compiling source code. + +In math the branch coverage is performed as follows: +`map[current_location ^ prev_location[0] >> 1 ^ prev_location[1] >> 1 ^ ... up to n-1`] += 1` + +## Usage + +The size of `n` (i.e., the number of branches to remember) is an option +that is specified either in the `AFL_LLVM_INSTRUMENT=NGRAM-{value}` or the +`AFL_LLVM_NGRAM_SIZE` environment variable. +Good values are 2, 4 or 8, valid are 2-16. + +It is highly recommended to increase the MAP_SIZE_POW2 definition in +config.h to at least 18 and maybe up to 20 for this as otherwise too +many map collisions occur. diff --git a/instrumentation/README.persistent_mode.md b/instrumentation/README.persistent_mode.md new file mode 100644 index 00000000..e095f036 --- /dev/null +++ b/instrumentation/README.persistent_mode.md @@ -0,0 +1,209 @@ +# llvm_mode persistent mode + +## 1) Introduction + +The most effective way is to fuzz in persistent mode, as the speed can easily +be x10 or x20 times faster without any disadvanges. +*All professional fuzzing is using this mode.* + +This requires that the target can be called in a (or several) function(s), +and that its state can be resetted so that multiple calls can be performed +without resource leaks and former runs having no impact on following runs +(this can be seen by the `stability` indicator in the `afl-fuzz` UI). + +Examples can be found in [examples/persistent_mode](../examples/persistent_mode). + +## 2) TLDR; + +Example `fuzz_target.c`: +``` +#include "what_you_need_for_your_target.h" + +__AFL_FUZZ_INIT(); + +main() { + +#ifdef __AFL_HAVE_MANUAL_CONTROL + __AFL_INIT(); +#endif + + unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; // must be after __AFL_INIT + + while (__AFL_LOOP(10000)) { + + int len = __AFL_FUZZ_TESTCASE_LEN; + if (len < 8) continue; // check for a required/useful minimum input length + + /* Setup function call, e.g. struct target *tmp = libtarget_init() */ + /* Call function to be fuzzed, e.g.: */ + target_function(buf, len); + /* Reset state. e.g. libtarget_free(tmp) */ + + } + + return 0; + +} +``` +And then compile: +``` +afl-clang-fast -o fuzz_target fuzz_target.c -lwhat_you_need_for_your_target +``` +And that is it! +The speed increase is usually x10 to x20. + +If you want to be able to compile the target without afl-clang-fast/lto then +add this just after the includes: + +``` +#ifndef __AFL_FUZZ_TESTCASE_LEN + ssize_t fuzz_len; + #define __AFL_FUZZ_TESTCASE_LEN fuzz_len + unsigned char fuzz_buf[1024000]; + #define __AFL_FUZZ_TESTCASE_BUF fuzz_buf + #define __AFL_FUZZ_INIT() void sync(void); + #define __AFL_LOOP(x) ((fuzz_len = read(0, fuzz_buf, sizeof(fuzz_buf))) > 0 ? + #define __AFL_INIT() sync() +#endif +``` + +## 3) Deferred initialization + +AFL tries to optimize performance by executing the targeted binary just once, +stopping it just before main(), and then cloning this "main" process to get +a steady supply of targets to fuzz. + +Although this approach eliminates much of the OS-, linker- and libc-level +costs of executing the program, it does not always help with binaries that +perform other time-consuming initialization steps - say, parsing a large config +file before getting to the fuzzed data. + +In such cases, it's beneficial to initialize the forkserver a bit later, once +most of the initialization work is already done, but before the binary attempts +to read the fuzzed input and parse it; in some cases, this can offer a 10x+ +performance gain. You can implement delayed initialization in LLVM mode in a +fairly simple way. + +First, find a suitable location in the code where the delayed cloning can +take place. This needs to be done with *extreme* care to avoid breaking the +binary. In particular, the program will probably malfunction if you select +a location after: + + - The creation of any vital threads or child processes - since the forkserver + can't clone them easily. + + - The initialization of timers via setitimer() or equivalent calls. + + - The creation of temporary files, network sockets, offset-sensitive file + descriptors, and similar shared-state resources - but only provided that + their state meaningfully influences the behavior of the program later on. + + - Any access to the fuzzed input, including reading the metadata about its + size. + +With the location selected, add this code in the appropriate spot: + +```c +#ifdef __AFL_HAVE_MANUAL_CONTROL + __AFL_INIT(); +#endif +``` + +You don't need the #ifdef guards, but including them ensures that the program +will keep working normally when compiled with a tool other than afl-clang-fast. + +Finally, recompile the program with afl-clang-fast/lto (afl-gcc or afl-clang will +*not* generate a deferred-initialization binary) - and you should be all set! + +*NOTE:* In the code between `main` and `__AFL_INIT()` should not be any code +run that is instrumented - otherwise a crash might occure. +In case this is useful (e.g. for expensive one time initialization) you can +try to do the following: + +Add after the includes: +``` +extern unsigned char *__afl_area_ptr; +#define MAX_DUMMY_SIZE 256000 + +__attribute__((constructor(1))) void __afl_protect(void) { +#ifdef MAP_FIXED_NOREPLACE + __afl_area_ptr = (unsigned char*) mmap((void *)0x10000, MAX_DUMMY_SIZE, PROT_READ | PROT_WRITE, MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if ((uint64_t)__afl_area_ptr == -1) +#endif + __afl_area_ptr = (unsigned char*) mmap((void *)0x10000, MAX_DUMMY_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if ((uint64_t)__afl_area_ptr == -1) + __afl_area_ptr = (unsigned char*) mmap(NULL, MAX_DUMMY_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); +} + +``` +and just before `__AFL_INIT()`: +``` + munmap(__afl_area_ptr, MAX_DUMMY_SIZE); + __afl_area_ptr = NULL; +``` + +## 4) Persistent mode + +Some libraries provide APIs that are stateless, or whose state can be reset in +between processing different input files. When such a reset is performed, a +single long-lived process can be reused to try out multiple test cases, +eliminating the need for repeated fork() calls and the associated OS overhead. + +The basic structure of the program that does this would be: + +```c + while (__AFL_LOOP(1000)) { + + /* Read input data. */ + /* Call library code to be fuzzed. */ + /* Reset state. */ + + } + + /* Exit normally */ +``` + +The numerical value specified within the loop controls the maximum number +of iterations before AFL will restart the process from scratch. This minimizes +the impact of memory leaks and similar glitches; 1000 is a good starting point, +and going much higher increases the likelihood of hiccups without giving you +any real performance benefits. + +A more detailed template is shown in ../examples/persistent_demo/. +Similarly to the previous mode, the feature works only with afl-clang-fast; #ifdef +guards can be used to suppress it when using other compilers. + +Note that as with the previous mode, the feature is easy to misuse; if you +do not fully reset the critical state, you may end up with false positives or +waste a whole lot of CPU power doing nothing useful at all. Be particularly +wary of memory leaks and of the state of file descriptors. + +PS. Because there are task switches still involved, the mode isn't as fast as +"pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot +faster than the normal fork() model, and compared to in-process fuzzing, +should be a lot more robust. + +## 5) Shared memory fuzzing + +You can speed up the fuzzing process even more by receiving the fuzzing data +via shared memory instead of stdin or files. +This is a further speed multiplier of about 2x. + +Setting this up is very easy: + +After the includes set the following macro: + +``` +__AFL_FUZZ_INIT(); +``` +Directly at the start of main - or if you are using the deferred forkserver +with `__AFL_INIT()` then *after* `__AFL_INIT? : +``` + unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; +``` + +Then as first line after the `__AFL_LOOP` while loop: +``` + int len = __AFL_FUZZ_TESTCASE_LEN; +``` +and that is all! diff --git a/instrumentation/README.snapshot.md b/instrumentation/README.snapshot.md new file mode 100644 index 00000000..c40a956a --- /dev/null +++ b/instrumentation/README.snapshot.md @@ -0,0 +1,16 @@ +# AFL++ snapshot feature + +Snapshotting is a feature that makes a snapshot from a process and then +restores its state, which is faster then forking it again. + +All targets compiled with llvm_mode are automatically enabled for the +snapshot feature. + +To use the snapshot feature for fuzzing compile and load this kernel +module: [https://github.com/AFLplusplus/AFL-Snapshot-LKM](https://github.com/AFLplusplus/AFL-Snapshot-LKM) + +Note that is has little value for persistent (__AFL_LOOP) fuzzing. + +## Notes + +Snapshot does not work with multithreaded targets yet. Still in WIP, it is now usable only for single threaded applications. diff --git a/instrumentation/SanitizerCoverageLTO.so.cc b/instrumentation/SanitizerCoverageLTO.so.cc new file mode 100644 index 00000000..64162145 --- /dev/null +++ b/instrumentation/SanitizerCoverageLTO.so.cc @@ -0,0 +1,1613 @@ +/* SanitizeCoverage.cpp ported to afl++ LTO :-) */ + +#define AFL_LLVM_PASS + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <sys/time.h> + +#include <list> +#include <string> +#include <fstream> +#include <set> +#include <iostream> + +#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/SpecialCaseList.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +#include "config.h" +#include "debug.h" +#include "afl-llvm-common.h" + +using namespace llvm; + +#define DEBUG_TYPE "sancov" + +static const char *const SanCovTracePCIndirName = + "__sanitizer_cov_trace_pc_indir"; +static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc"; +// static const char *const SanCovTracePCGuardName = +// "__sanitizer_cov_trace_pc_guard"; +static const char *const SanCovGuardsSectionName = "sancov_guards"; +static const char *const SanCovCountersSectionName = "sancov_cntrs"; +static const char *const SanCovBoolFlagSectionName = "sancov_bools"; +static const char *const SanCovPCsSectionName = "sancov_pcs"; + +static cl::opt<int> ClCoverageLevel( + "lto-coverage-level", + cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, " + "3: all blocks and critical edges"), + cl::Hidden, cl::init(3)); + +static cl::opt<bool> ClTracePC("lto-coverage-trace-pc", + cl::desc("Experimental pc tracing"), cl::Hidden, + cl::init(false)); + +static cl::opt<bool> ClTracePCGuard("lto-coverage-trace-pc-guard", + cl::desc("pc tracing with a guard"), + cl::Hidden, cl::init(false)); + +// If true, we create a global variable that contains PCs of all instrumented +// BBs, put this global into a named section, and pass this section's bounds +// to __sanitizer_cov_pcs_init. +// This way the coverage instrumentation does not need to acquire the PCs +// at run-time. Works with trace-pc-guard, inline-8bit-counters, and +// inline-bool-flag. +static cl::opt<bool> ClCreatePCTable("lto-coverage-pc-table", + cl::desc("create a static PC table"), + cl::Hidden, cl::init(false)); + +static cl::opt<bool> ClInline8bitCounters( + "lto-coverage-inline-8bit-counters", + cl::desc("increments 8-bit counter for every edge"), cl::Hidden, + cl::init(false)); + +static cl::opt<bool> ClInlineBoolFlag( + "lto-coverage-inline-bool-flag", + cl::desc("sets a boolean flag for every edge"), cl::Hidden, + cl::init(false)); + +static cl::opt<bool> ClPruneBlocks( + "lto-coverage-prune-blocks", + cl::desc("Reduce the number of instrumented blocks"), cl::Hidden, + cl::init(true)); + +namespace { + +SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) { + + SanitizerCoverageOptions Res; + switch (LegacyCoverageLevel) { + + case 0: + Res.CoverageType = SanitizerCoverageOptions::SCK_None; + break; + case 1: + Res.CoverageType = SanitizerCoverageOptions::SCK_Function; + break; + case 2: + Res.CoverageType = SanitizerCoverageOptions::SCK_BB; + break; + case 3: + Res.CoverageType = SanitizerCoverageOptions::SCK_Edge; + break; + case 4: + Res.CoverageType = SanitizerCoverageOptions::SCK_Edge; + Res.IndirectCalls = true; + break; + + } + + return Res; + +} + +SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) { + + // Sets CoverageType and IndirectCalls. + SanitizerCoverageOptions CLOpts = getOptions(ClCoverageLevel); + Options.CoverageType = std::max(Options.CoverageType, CLOpts.CoverageType); + Options.IndirectCalls |= CLOpts.IndirectCalls; + Options.TracePC |= ClTracePC; + Options.TracePCGuard |= ClTracePCGuard; + Options.Inline8bitCounters |= ClInline8bitCounters; + Options.InlineBoolFlag |= ClInlineBoolFlag; + Options.PCTable |= ClCreatePCTable; + Options.NoPrune |= !ClPruneBlocks; + if (!Options.TracePCGuard && !Options.TracePC && + !Options.Inline8bitCounters && !Options.InlineBoolFlag) + Options.TracePCGuard = true; // TracePCGuard is default. + return Options; + +} + +using DomTreeCallback = function_ref<const DominatorTree *(Function &F)>; +using PostDomTreeCallback = + function_ref<const PostDominatorTree *(Function &F)>; + +class ModuleSanitizerCoverage { + + public: + ModuleSanitizerCoverage( + const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()) + : Options(OverrideFromCL(Options)) { + + /* , + const SpecialCaseList * Allowlist = nullptr, + const SpecialCaseList * Blocklist = nullptr) + , + Allowlist(Allowlist), + Blocklist(Blocklist) { + + */ + + } + + bool instrumentModule(Module &M, DomTreeCallback DTCallback, + PostDomTreeCallback PDTCallback); + + private: + void instrumentFunction(Function &F, DomTreeCallback DTCallback, + PostDomTreeCallback PDTCallback); + void InjectCoverageForIndirectCalls(Function & F, + ArrayRef<Instruction *> IndirCalls); + bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks, + bool IsLeafFunc = true); + GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements, + Function &F, Type *Ty, + const char *Section); + GlobalVariable *CreatePCArray(Function &F, ArrayRef<BasicBlock *> AllBlocks); + void CreateFunctionLocalArrays(Function &F, ArrayRef<BasicBlock *> AllBlocks); + void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx, + bool IsLeafFunc = true); + // std::pair<Value *, Value *> CreateSecStartEnd(Module &M, const char + // *Section, + // Type *Ty); + + void SetNoSanitizeMetadata(Instruction *I) { + + I->setMetadata(I->getModule()->getMDKindID("nosanitize"), + MDNode::get(*C, None)); + + } + + std::string getSectionName(const std::string &Section) const; + // std::string getSectionStart(const std::string &Section) const; + // std::string getSectionEnd(const std::string &Section) const; + FunctionCallee SanCovTracePCIndir; + FunctionCallee SanCovTracePC /*, SanCovTracePCGuard*/; + Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy, + *Int16Ty, *Int8Ty, *Int8PtrTy, *Int1Ty, *Int1PtrTy; + Module * CurModule; + std::string CurModuleUniqueId; + Triple TargetTriple; + LLVMContext * C; + const DataLayout *DL; + + GlobalVariable *FunctionGuardArray; // for trace-pc-guard. + GlobalVariable *Function8bitCounterArray; // for inline-8bit-counters. + GlobalVariable *FunctionBoolArray; // for inline-bool-flag. + GlobalVariable *FunctionPCsArray; // for pc-table. + SmallVector<GlobalValue *, 20> GlobalsToAppendToUsed; + SmallVector<GlobalValue *, 20> GlobalsToAppendToCompilerUsed; + + SanitizerCoverageOptions Options; + + // afl++ START + // const SpecialCaseList * Allowlist; + // const SpecialCaseList * Blocklist; + uint32_t autodictionary = 1; + uint32_t inst = 0; + uint32_t afl_global_id = 0; + uint64_t map_addr = 0; + char * skip_nozero = NULL; + std::vector<BasicBlock *> BlockList; + DenseMap<Value *, std::string *> valueMap; + std::vector<std::string> dictionary; + IntegerType * Int8Tyi = NULL; + IntegerType * Int32Tyi = NULL; + IntegerType * Int64Tyi = NULL; + ConstantInt * Zero = NULL; + ConstantInt * One = NULL; + LLVMContext * Ct = NULL; + Module * Mo = NULL; + GlobalVariable * AFLMapPtr = NULL; + Value * MapPtrFixed = NULL; + FILE * documentFile = NULL; + size_t found = 0; + // afl++ END + +}; + +class ModuleSanitizerCoverageLegacyPass : public ModulePass { + + public: + static char ID; + StringRef getPassName() const override { + + return "sancov"; + + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<PostDominatorTreeWrapperPass>(); + + } + + ModuleSanitizerCoverageLegacyPass( + const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()) + : ModulePass(ID), Options(Options) { + + /* , + const std::vector<std::string> &AllowlistFiles = + std::vector<std::string>(), + const std::vector<std::string> &BlocklistFiles = + std::vector<std::string>()) + if (AllowlistFiles.size() > 0) + Allowlist = SpecialCaseList::createOrDie(AllowlistFiles, + *vfs::getRealFileSystem()); + if (BlocklistFiles.size() > 0) + Blocklist = SpecialCaseList::createOrDie(BlocklistFiles, + *vfs::getRealFileSystem()); + */ + initializeModuleSanitizerCoverageLegacyPassPass( + *PassRegistry::getPassRegistry()); + + } + + bool runOnModule(Module &M) override { + + ModuleSanitizerCoverage ModuleSancov(Options); + // , Allowlist.get(), Blocklist.get()); + auto DTCallback = [this](Function &F) -> const DominatorTree * { + + return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); + + }; + + auto PDTCallback = [this](Function &F) -> const PostDominatorTree * { + + return &this->getAnalysis<PostDominatorTreeWrapperPass>(F) + .getPostDomTree(); + + }; + + return ModuleSancov.instrumentModule(M, DTCallback, PDTCallback); + + } + + private: + SanitizerCoverageOptions Options; + + // std::unique_ptr<SpecialCaseList> Allowlist; + // std::unique_ptr<SpecialCaseList> Blocklist; + +}; + +} // namespace + +PreservedAnalyses ModuleSanitizerCoveragePass::run(Module & M, + ModuleAnalysisManager &MAM) { + + ModuleSanitizerCoverage ModuleSancov(Options); + // Allowlist.get(), Blocklist.get()); + auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + auto DTCallback = [&FAM](Function &F) -> const DominatorTree * { + + return &FAM.getResult<DominatorTreeAnalysis>(F); + + }; + + auto PDTCallback = [&FAM](Function &F) -> const PostDominatorTree * { + + return &FAM.getResult<PostDominatorTreeAnalysis>(F); + + }; + + if (ModuleSancov.instrumentModule(M, DTCallback, PDTCallback)) + return PreservedAnalyses::none(); + + return PreservedAnalyses::all(); + +} + +/* +std::pair<Value *, Value *> ModuleSanitizerCoverage::CreateSecStartEnd( + Module &M, const char *Section, Type *Ty) { + + GlobalVariable *SecStart = + new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr, + getSectionStart(Section)); + SecStart->setVisibility(GlobalValue::HiddenVisibility); + GlobalVariable *SecEnd = + new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr, + getSectionEnd(Section)); + SecEnd->setVisibility(GlobalValue::HiddenVisibility); + IRBuilder<> IRB(M.getContext()); + Value * SecEndPtr = IRB.CreatePointerCast(SecEnd, Ty); + if (!TargetTriple.isOSBinFormatCOFF()) + return std::make_pair(IRB.CreatePointerCast(SecStart, Ty), SecEndPtr); + + // Account for the fact that on windows-msvc __start_* symbols actually + // point to a uint64_t before the start of the array. + auto SecStartI8Ptr = IRB.CreatePointerCast(SecStart, Int8PtrTy); + auto GEP = IRB.CreateGEP(Int8Ty, SecStartI8Ptr, + ConstantInt::get(IntptrTy, sizeof(uint64_t))); + return std::make_pair(IRB.CreatePointerCast(GEP, Ty), SecEndPtr); + +} + +*/ + +bool ModuleSanitizerCoverage::instrumentModule( + Module &M, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) { + + if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) return false; + /* + if (Allowlist && + !Allowlist->inSection("coverage", "src", M.getSourceFileName())) + return false; + if (Blocklist && + Blocklist->inSection("coverage", "src", M.getSourceFileName())) + return false; + */ + BlockList.clear(); + valueMap.clear(); + dictionary.clear(); + C = &(M.getContext()); + DL = &M.getDataLayout(); + CurModule = &M; + CurModuleUniqueId = getUniqueModuleId(CurModule); + TargetTriple = Triple(M.getTargetTriple()); + FunctionGuardArray = nullptr; + Function8bitCounterArray = nullptr; + FunctionBoolArray = nullptr; + FunctionPCsArray = nullptr; + IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits()); + IntptrPtrTy = PointerType::getUnqual(IntptrTy); + Type * VoidTy = Type::getVoidTy(*C); + IRBuilder<> IRB(*C); + Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty()); + Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); + Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty()); + Int1PtrTy = PointerType::getUnqual(IRB.getInt1Ty()); + Int64Ty = IRB.getInt64Ty(); + Int32Ty = IRB.getInt32Ty(); + Int16Ty = IRB.getInt16Ty(); + Int8Ty = IRB.getInt8Ty(); + Int1Ty = IRB.getInt1Ty(); + + /* afl++ START */ + char * ptr; + LLVMContext &Ctx = M.getContext(); + Ct = &Ctx; + Int8Tyi = IntegerType::getInt8Ty(Ctx); + Int32Tyi = IntegerType::getInt32Ty(Ctx); + Int64Tyi = IntegerType::getInt64Ty(Ctx); + + /* Show a banner */ + setvbuf(stdout, NULL, _IONBF, 0); + if (getenv("AFL_DEBUG")) debug = 1; + + if ((isatty(2) && !getenv("AFL_QUIET")) || debug) { + + SAYF(cCYA "afl-llvm-lto" VERSION cRST + " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n"); + + } else + + be_quiet = 1; + + skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO"); + + if ((ptr = getenv("AFL_LLVM_LTO_STARTID")) != NULL) + if ((afl_global_id = atoi(ptr)) < 0) + FATAL("AFL_LLVM_LTO_STARTID value of \"%s\" is negative\n", ptr); + + if ((ptr = getenv("AFL_LLVM_DOCUMENT_IDS")) != NULL) { + + if ((documentFile = fopen(ptr, "a")) == NULL) + WARNF("Cannot access document file %s", ptr); + + } + + // we make this the default as the fixed map has problems with + // defered forkserver, early constructors, ifuncs and maybe more + /*if (getenv("AFL_LLVM_MAP_DYNAMIC"))*/ + map_addr = 0; + + if ((ptr = getenv("AFL_LLVM_MAP_ADDR"))) { + + uint64_t val; + if (!*ptr || !strcmp(ptr, "0") || !strcmp(ptr, "0x0")) { + + map_addr = 0; + + } else if (getenv("AFL_LLVM_MAP_DYNAMIC")) { + + FATAL( + "AFL_LLVM_MAP_ADDR and AFL_LLVM_MAP_DYNAMIC cannot be used together"); + + } else if (strncmp(ptr, "0x", 2) != 0) { + + map_addr = 0x10000; // the default + + } else { + + val = strtoull(ptr, NULL, 16); + if (val < 0x100 || val > 0xffffffff00000000) { + + FATAL( + "AFL_LLVM_MAP_ADDR must be a value between 0x100 and " + "0xffffffff00000000"); + + } + + map_addr = val; + + } + + } + + /* Get/set the globals for the SHM region. */ + + if (!map_addr) { + + AFLMapPtr = + new GlobalVariable(M, PointerType::get(Int8Tyi, 0), false, + GlobalValue::ExternalLinkage, 0, "__afl_area_ptr"); + + } else { + + ConstantInt *MapAddr = ConstantInt::get(Int64Tyi, map_addr); + MapPtrFixed = + ConstantExpr::getIntToPtr(MapAddr, PointerType::getUnqual(Int8Tyi)); + + } + + Zero = ConstantInt::get(Int8Tyi, 0); + One = ConstantInt::get(Int8Tyi, 1); + + scanForDangerousFunctions(&M); + Mo = &M; + + if (autodictionary) { + + for (auto &F : M) { + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CallInst *callInst = nullptr; + CmpInst * cmpInst = nullptr; + + if ((cmpInst = dyn_cast<CmpInst>(&IN))) { + + Value * op = cmpInst->getOperand(1); + ConstantInt *ilen = dyn_cast<ConstantInt>(op); + + if (ilen) { + + u64 val2 = 0, val = ilen->getZExtValue(); + u32 len = 0; + if (val > 0x10000 && val < 0xffffffff) len = 4; + if (val > 0x100000001 && val < 0xffffffffffffffff) len = 8; + + if (len) { + + auto c = cmpInst->getPredicate(); + + switch (c) { + + case CmpInst::FCMP_OGT: // fall through + case CmpInst::FCMP_OLE: // fall through + case CmpInst::ICMP_SLE: // fall through + case CmpInst::ICMP_SGT: + + // signed comparison and it is a negative constant + if ((len == 4 && (val & 80000000)) || + (len == 8 && (val & 8000000000000000))) { + + if ((val & 0xffff) != 1) val2 = val - 1; + break; + + } + + // fall through + + case CmpInst::FCMP_UGT: // fall through + case CmpInst::FCMP_ULE: // fall through + case CmpInst::ICMP_UGT: // fall through + case CmpInst::ICMP_ULE: + if ((val & 0xffff) != 0xfffe) val2 = val + 1; + break; + + case CmpInst::FCMP_OLT: // fall through + case CmpInst::FCMP_OGE: // fall through + case CmpInst::ICMP_SLT: // fall through + case CmpInst::ICMP_SGE: + + // signed comparison and it is a negative constant + if ((len == 4 && (val & 80000000)) || + (len == 8 && (val & 8000000000000000))) { + + if ((val & 0xffff) != 1) val2 = val - 1; + break; + + } + + // fall through + + case CmpInst::FCMP_ULT: // fall through + case CmpInst::FCMP_UGE: // fall through + case CmpInst::ICMP_ULT: // fall through + case CmpInst::ICMP_UGE: + if ((val & 0xffff) != 1) val2 = val - 1; + break; + + default: + val2 = 0; + + } + + dictionary.push_back(std::string((char *)&val, len)); + found++; + + if (val2) { + + dictionary.push_back(std::string((char *)&val2, len)); + found++; + + } + + } + + } + + } + + if ((callInst = dyn_cast<CallInst>(&IN))) { + + bool isStrcmp = true; + bool isMemcmp = true; + bool isStrncmp = true; + bool isStrcasecmp = true; + bool isStrncasecmp = true; + bool isIntMemcpy = true; + bool isStdString = true; + bool addedNull = false; + size_t optLen = 0; + + Function *Callee = callInst->getCalledFunction(); + if (!Callee) continue; + if (callInst->getCallingConv() != llvm::CallingConv::C) continue; + std::string FuncName = Callee->getName().str(); + isStrcmp &= !FuncName.compare("strcmp"); + isMemcmp &= + (!FuncName.compare("memcmp") || !FuncName.compare("bcmp")); + isStrncmp &= !FuncName.compare("strncmp"); + isStrcasecmp &= !FuncName.compare("strcasecmp"); + isStrncasecmp &= !FuncName.compare("strncasecmp"); + isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64"); + isStdString &= + ((FuncName.find("basic_string") != std::string::npos && + FuncName.find("compare") != std::string::npos) || + (FuncName.find("basic_string") != std::string::npos && + FuncName.find("find") != std::string::npos)); + + /* we do something different here, putting this BB and the + successors in a block map */ + if (!FuncName.compare("__afl_persistent_loop")) { + + BlockList.push_back(&BB); + for (succ_iterator SI = succ_begin(&BB), SE = succ_end(&BB); + SI != SE; ++SI) { + + BasicBlock *succ = *SI; + BlockList.push_back(succ); + + } + + } + + if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && + !isStrncasecmp && !isIntMemcpy && !isStdString) + continue; + + /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function + * prototype */ + FunctionType *FT = Callee->getFunctionType(); + + isStrcmp &= FT->getNumParams() == 2 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()); + isStrcasecmp &= FT->getNumParams() == 2 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()); + isMemcmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy() && + FT->getParamType(2)->isIntegerTy(); + isStrncmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + isStrncasecmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + isStdString &= FT->getNumParams() >= 2 && + FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy(); + + if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && + !isStrncasecmp && !isIntMemcpy && !isStdString) + continue; + + /* is a str{n,}{case,}cmp/memcmp, check if we have + * str{case,}cmp(x, "const") or str{case,}cmp("const", x) + * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..) + * memcmp(x, "const", ..) or memcmp("const", x, ..) */ + Value *Str1P = callInst->getArgOperand(0), + *Str2P = callInst->getArgOperand(1); + std::string Str1, Str2; + StringRef TmpStr; + bool HasStr1 = getConstantStringInfo(Str1P, TmpStr); + if (TmpStr.empty()) + HasStr1 = false; + else + Str1 = TmpStr.str(); + bool HasStr2 = getConstantStringInfo(Str2P, TmpStr); + if (TmpStr.empty()) + HasStr2 = false; + else + Str2 = TmpStr.str(); + + if (debug) + fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n", + FuncName.c_str(), Str1P, Str1P->getName().str().c_str(), + Str1.c_str(), HasStr1 == true ? "true" : "false", Str2P, + Str2P->getName().str().c_str(), Str2.c_str(), + HasStr2 == true ? "true" : "false"); + + // we handle the 2nd parameter first because of llvm memcpy + if (!HasStr2) { + + auto *Ptr = dyn_cast<ConstantExpr>(Str2P); + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (Var->hasInitializer()) { + + if (auto *Array = dyn_cast<ConstantDataArray>( + Var->getInitializer())) { + + HasStr2 = true; + Str2 = Array->getAsString().str(); + + } + + } + + } + + } + + } + + // for the internal memcpy routine we only care for the second + // parameter and are not reporting anything. + if (isIntMemcpy == true) { + + if (HasStr2 == true) { + + Value * op2 = callInst->getArgOperand(2); + ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { + + uint64_t literalLength = Str2.size(); + uint64_t optLength = ilen->getZExtValue(); + if (literalLength + 1 == optLength) { + + Str2.append("\0", 1); // add null byte + addedNull = true; + + } + + } + + valueMap[Str1P] = new std::string(Str2); + + if (debug) + fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(), Str1P); + continue; + + } + + continue; + + } + + // Neither a literal nor a global variable? + // maybe it is a local variable that we saved + if (!HasStr2) { + + std::string *strng = valueMap[Str2P]; + if (strng && !strng->empty()) { + + Str2 = *strng; + HasStr2 = true; + if (debug) + fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(), + Str2P); + + } + + } + + if (!HasStr1) { + + auto Ptr = dyn_cast<ConstantExpr>(Str1P); + + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (Var->hasInitializer()) { + + if (auto *Array = dyn_cast<ConstantDataArray>( + Var->getInitializer())) { + + HasStr1 = true; + Str1 = Array->getAsString().str(); + + } + + } + + } + + } + + } + + // Neither a literal nor a global variable? + // maybe it is a local variable that we saved + if (!HasStr1) { + + std::string *strng = valueMap[Str1P]; + if (strng && !strng->empty()) { + + Str1 = *strng; + HasStr1 = true; + if (debug) + fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(), + Str1P); + + } + + } + + /* handle cases of one string is const, one string is variable */ + if (!(HasStr1 ^ HasStr2)) continue; + + std::string thestring; + + if (HasStr1) + thestring = Str1; + else + thestring = Str2; + + optLen = thestring.length(); + + if (isMemcmp || isStrncmp || isStrncasecmp) { + + Value * op2 = callInst->getArgOperand(2); + ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { + + uint64_t literalLength = optLen; + optLen = ilen->getZExtValue(); + if (literalLength + 1 == optLen) { // add null byte + thestring.append("\0", 1); + addedNull = true; + + } + + } + + } + + // add null byte if this is a string compare function and a null + // was not already added + if (!isMemcmp) { + + if (addedNull == false) { + + thestring.append("\0", 1); // add null byte + optLen++; + + } + + // ensure we do not have garbage + size_t offset = thestring.find('\0', 0); + if (offset + 1 < optLen) optLen = offset + 1; + thestring = thestring.substr(0, optLen); + + } + + if (!be_quiet) { + + std::string outstring; + fprintf(stderr, "%s: length %zu/%zu \"", FuncName.c_str(), optLen, + thestring.length()); + for (uint8_t i = 0; i < thestring.length(); i++) { + + uint8_t c = thestring[i]; + if (c <= 32 || c >= 127) + fprintf(stderr, "\\x%02x", c); + else + fprintf(stderr, "%c", c); + + } + + fprintf(stderr, "\"\n"); + + } + + // we take the longer string, even if the compare was to a + // shorter part. Note that depending on the optimizer of the + // compiler this can be wrong, but it is more likely that this + // is helping the fuzzer + if (optLen != thestring.length()) optLen = thestring.length(); + if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA; + if (optLen < MIN_AUTO_EXTRA) // too short? skip + continue; + + dictionary.push_back(thestring.substr(0, optLen)); + + } + + } + + } + + } + + } + + // afl++ END + + SanCovTracePCIndir = + M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy); + // Make sure smaller parameters are zero-extended to i64 as required by the + // x86_64 ABI. + AttributeList SanCovTraceCmpZeroExtAL; + if (TargetTriple.getArch() == Triple::x86_64) { + + SanCovTraceCmpZeroExtAL = + SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 0, Attribute::ZExt); + SanCovTraceCmpZeroExtAL = + SanCovTraceCmpZeroExtAL.addParamAttribute(*C, 1, Attribute::ZExt); + + } + + SanCovTracePC = M.getOrInsertFunction(SanCovTracePCName, VoidTy); + + // SanCovTracePCGuard = + // M.getOrInsertFunction(SanCovTracePCGuardName, VoidTy, Int32PtrTy); + + for (auto &F : M) + instrumentFunction(F, DTCallback, PDTCallback); + + // afl++ START + if (documentFile) { + + fclose(documentFile); + documentFile = NULL; + + } + + if (!getenv("AFL_LLVM_LTO_DONTWRITEID") || dictionary.size() || map_addr) { + + // yes we could create our own function, insert it into ctors ... + // but this would be a pain in the butt ... so we use afl-llvm-rt-lto.o + + Function *f = M.getFunction("__afl_auto_init_globals"); + + if (!f) { + + fprintf(stderr, + "Error: init function could not be found (this should not " + "happen)\n"); + exit(-1); + + } + + BasicBlock *bb = &f->getEntryBlock(); + if (!bb) { + + fprintf(stderr, + "Error: init function does not have an EntryBlock (this should " + "not happen)\n"); + exit(-1); + + } + + BasicBlock::iterator IP = bb->getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + + if (map_addr) { + + GlobalVariable *AFLMapAddrFixed = new GlobalVariable( + M, Int64Tyi, true, GlobalValue::ExternalLinkage, 0, "__afl_map_addr"); + ConstantInt *MapAddr = ConstantInt::get(Int64Tyi, map_addr); + StoreInst * StoreMapAddr = IRB.CreateStore(MapAddr, AFLMapAddrFixed); + StoreMapAddr->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(Ctx, None)); + + } + + if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) { + + uint32_t write_loc = afl_global_id; + + if (afl_global_id % 8) write_loc = (((afl_global_id + 8) >> 3) << 3); + + GlobalVariable *AFLFinalLoc = + new GlobalVariable(M, Int32Tyi, true, GlobalValue::ExternalLinkage, 0, + "__afl_final_loc"); + ConstantInt *const_loc = ConstantInt::get(Int32Tyi, write_loc); + StoreInst * StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc); + StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(Ctx, None)); + + } + + if (dictionary.size()) { + + size_t memlen = 0, count = 0, offset = 0; + char * ptr; + + // sort and unique the dictionary + std::sort(dictionary.begin(), dictionary.end()); + auto last = std::unique(dictionary.begin(), dictionary.end()); + dictionary.erase(last, dictionary.end()); + + for (auto token : dictionary) { + + memlen += token.length(); + count++; + + } + + if (!be_quiet) + printf("AUTODICTIONARY: %lu string%s found\n", count, + count == 1 ? "" : "s"); + + if (count) { + + if ((ptr = (char *)malloc(memlen + count)) == NULL) { + + fprintf(stderr, "Error: malloc for %lu bytes failed!\n", + memlen + count); + exit(-1); + + } + + count = 0; + + for (auto token : dictionary) { + + if (offset + token.length() < 0xfffff0 && count < MAX_AUTO_EXTRAS) { + + ptr[offset++] = (uint8_t)token.length(); + memcpy(ptr + offset, token.c_str(), token.length()); + offset += token.length(); + count++; + + } + + } + + GlobalVariable *AFLDictionaryLen = + new GlobalVariable(M, Int32Tyi, false, GlobalValue::ExternalLinkage, + 0, "__afl_dictionary_len"); + ConstantInt *const_len = ConstantInt::get(Int32Tyi, offset); + StoreInst *StoreDictLen = IRB.CreateStore(const_len, AFLDictionaryLen); + StoreDictLen->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(Ctx, None)); + + ArrayType *ArrayTy = ArrayType::get(IntegerType::get(Ctx, 8), offset); + GlobalVariable *AFLInternalDictionary = new GlobalVariable( + M, ArrayTy, true, GlobalValue::ExternalLinkage, + ConstantDataArray::get(Ctx, + *(new ArrayRef<char>((char *)ptr, offset))), + "__afl_internal_dictionary"); + AFLInternalDictionary->setInitializer(ConstantDataArray::get( + Ctx, *(new ArrayRef<char>((char *)ptr, offset)))); + AFLInternalDictionary->setConstant(true); + + GlobalVariable *AFLDictionary = new GlobalVariable( + M, PointerType::get(Int8Tyi, 0), false, + GlobalValue::ExternalLinkage, 0, "__afl_dictionary"); + + Value *AFLDictOff = IRB.CreateGEP(AFLInternalDictionary, Zero); + Value *AFLDictPtr = + IRB.CreatePointerCast(AFLDictOff, PointerType::get(Int8Tyi, 0)); + StoreInst *StoreDict = IRB.CreateStore(AFLDictPtr, AFLDictionary); + StoreDict->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(Ctx, None)); + + } + + } + + } + + /* Say something nice. */ + + if (!be_quiet) { + + if (!inst) + WARNF("No instrumentation targets found."); + else { + + char modeline[100]; + snprintf(modeline, sizeof(modeline), "%s%s%s%s%s", + getenv("AFL_HARDEN") ? "hardened" : "non-hardened", + getenv("AFL_USE_ASAN") ? ", ASAN" : "", + getenv("AFL_USE_MSAN") ? ", MSAN" : "", + getenv("AFL_USE_CFISAN") ? ", CFISAN" : "", + getenv("AFL_USE_UBSAN") ? ", UBSAN" : ""); + OKF("Instrumented %u locations with no collisions (on average %llu " + "collisions would be in afl-gcc/afl-clang-fast) (%s mode).", + inst, calculateCollisions(inst), modeline); + + } + + } + + // afl++ END + + // We don't reference these arrays directly in any of our runtime functions, + // so we need to prevent them from being dead stripped. + if (TargetTriple.isOSBinFormatMachO()) appendToUsed(M, GlobalsToAppendToUsed); + appendToCompilerUsed(M, GlobalsToAppendToCompilerUsed); + return true; + +} + +// True if block has successors and it dominates all of them. +static bool isFullDominator(const BasicBlock *BB, const DominatorTree *DT) { + + if (succ_begin(BB) == succ_end(BB)) return false; + + for (const BasicBlock *SUCC : make_range(succ_begin(BB), succ_end(BB))) { + + if (!DT->dominates(BB, SUCC)) return false; + + } + + return true; + +} + +// True if block has predecessors and it postdominates all of them. +static bool isFullPostDominator(const BasicBlock * BB, + const PostDominatorTree *PDT) { + + if (pred_begin(BB) == pred_end(BB)) return false; + + for (const BasicBlock *PRED : make_range(pred_begin(BB), pred_end(BB))) { + + if (!PDT->dominates(BB, PRED)) return false; + + } + + return true; + +} + +static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB, + const DominatorTree * DT, + const PostDominatorTree * PDT, + const SanitizerCoverageOptions &Options) { + + // Don't insert coverage for blocks containing nothing but unreachable: we + // will never call __sanitizer_cov() for them, so counting them in + // NumberOfInstrumentedBlocks() might complicate calculation of code coverage + // percentage. Also, unreachable instructions frequently have no debug + // locations. + if (isa<UnreachableInst>(BB->getFirstNonPHIOrDbgOrLifetime())) return false; + + // Don't insert coverage into blocks without a valid insertion point + // (catchswitch blocks). + if (BB->getFirstInsertionPt() == BB->end()) return false; + + // afl++ START + if (!Options.NoPrune && &F.getEntryBlock() == BB && F.size() > 1) + return false; + // afl++ END + + if (Options.NoPrune || &F.getEntryBlock() == BB) return true; + + if (Options.CoverageType == SanitizerCoverageOptions::SCK_Function && + &F.getEntryBlock() != BB) + return false; + + // Do not instrument full dominators, or full post-dominators with multiple + // predecessors. + return !isFullDominator(BB, DT) && + !(isFullPostDominator(BB, PDT) && !BB->getSinglePredecessor()); + +} + +void ModuleSanitizerCoverage::instrumentFunction( + Function &F, DomTreeCallback DTCallback, PostDomTreeCallback PDTCallback) { + + if (F.empty()) return; + if (F.getName().find(".module_ctor") != std::string::npos) + return; // Should not instrument sanitizer init functions. + if (F.getName().startswith("__sanitizer_")) + return; // Don't instrument __sanitizer_* callbacks. + // Don't touch available_externally functions, their actual body is elewhere. + if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return; + // Don't instrument MSVC CRT configuration helpers. They may run before normal + // initialization. + if (F.getName() == "__local_stdio_printf_options" || + F.getName() == "__local_stdio_scanf_options") + return; + if (isa<UnreachableInst>(F.getEntryBlock().getTerminator())) return; + // Don't instrument functions using SEH for now. Splitting basic blocks like + // we do for coverage breaks WinEHPrepare. + // FIXME: Remove this when SEH no longer uses landingpad pattern matching. + if (F.hasPersonalityFn() && + isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) + return; + // if (Allowlist && !Allowlist->inSection("coverage", "fun", F.getName())) + // return; + // if (Blocklist && Blocklist->inSection("coverage", "fun", F.getName())) + // return; + + // afl++ START + if (!F.size()) return; + if (isIgnoreFunction(&F)) return; + // afl++ END + + if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge) + SplitAllCriticalEdges( + F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests()); + SmallVector<Instruction *, 8> IndirCalls; + SmallVector<BasicBlock *, 16> BlocksToInstrument; + + const DominatorTree * DT = DTCallback(F); + const PostDominatorTree *PDT = PDTCallback(F); + bool IsLeafFunc = true; + + for (auto &BB : F) { + + if (shouldInstrumentBlock(F, &BB, DT, PDT, Options)) + BlocksToInstrument.push_back(&BB); + for (auto &Inst : BB) { + + if (Options.IndirectCalls) { + + CallBase *CB = dyn_cast<CallBase>(&Inst); + if (CB && !CB->getCalledFunction()) IndirCalls.push_back(&Inst); + + } + + } + + } + + InjectCoverage(F, BlocksToInstrument, IsLeafFunc); + InjectCoverageForIndirectCalls(F, IndirCalls); + +} + +GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection( + size_t NumElements, Function &F, Type *Ty, const char *Section) { + + ArrayType *ArrayTy = ArrayType::get(Ty, NumElements); + auto Array = new GlobalVariable( + *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage, + Constant::getNullValue(ArrayTy), "__sancov_gen_"); + + if (TargetTriple.supportsCOMDAT() && !F.isInterposable()) + if (auto Comdat = + GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId)) + Array->setComdat(Comdat); + Array->setSection(getSectionName(Section)); + Array->setAlignment(Align(DL->getTypeStoreSize(Ty).getFixedSize())); + GlobalsToAppendToUsed.push_back(Array); + GlobalsToAppendToCompilerUsed.push_back(Array); + MDNode *MD = MDNode::get(F.getContext(), ValueAsMetadata::get(&F)); + Array->addMetadata(LLVMContext::MD_associated, *MD); + + return Array; + +} + +GlobalVariable *ModuleSanitizerCoverage::CreatePCArray( + Function &F, ArrayRef<BasicBlock *> AllBlocks) { + + size_t N = AllBlocks.size(); + assert(N); + SmallVector<Constant *, 32> PCs; + IRBuilder<> IRB(&*F.getEntryBlock().getFirstInsertionPt()); + for (size_t i = 0; i < N; i++) { + + if (&F.getEntryBlock() == AllBlocks[i]) { + + PCs.push_back((Constant *)IRB.CreatePointerCast(&F, IntptrPtrTy)); + PCs.push_back((Constant *)IRB.CreateIntToPtr( + ConstantInt::get(IntptrTy, 1), IntptrPtrTy)); + + } else { + + PCs.push_back((Constant *)IRB.CreatePointerCast( + BlockAddress::get(AllBlocks[i]), IntptrPtrTy)); + PCs.push_back((Constant *)IRB.CreateIntToPtr( + ConstantInt::get(IntptrTy, 0), IntptrPtrTy)); + + } + + } + + auto *PCArray = CreateFunctionLocalArrayInSection(N * 2, F, IntptrPtrTy, + SanCovPCsSectionName); + PCArray->setInitializer( + ConstantArray::get(ArrayType::get(IntptrPtrTy, N * 2), PCs)); + PCArray->setConstant(true); + + return PCArray; + +} + +void ModuleSanitizerCoverage::CreateFunctionLocalArrays( + Function &F, ArrayRef<BasicBlock *> AllBlocks) { + + if (Options.TracePCGuard) + FunctionGuardArray = CreateFunctionLocalArrayInSection( + AllBlocks.size(), F, Int32Ty, SanCovGuardsSectionName); + if (Options.Inline8bitCounters) + Function8bitCounterArray = CreateFunctionLocalArrayInSection( + AllBlocks.size(), F, Int8Ty, SanCovCountersSectionName); + if (Options.InlineBoolFlag) + FunctionBoolArray = CreateFunctionLocalArrayInSection( + AllBlocks.size(), F, Int1Ty, SanCovBoolFlagSectionName); + if (Options.PCTable) FunctionPCsArray = CreatePCArray(F, AllBlocks); + +} + +bool ModuleSanitizerCoverage::InjectCoverage(Function & F, + ArrayRef<BasicBlock *> AllBlocks, + bool IsLeafFunc) { + + if (AllBlocks.empty()) return false; + CreateFunctionLocalArrays(F, AllBlocks); + for (size_t i = 0, N = AllBlocks.size(); i < N; i++) { + + // afl++ START + if (BlockList.size()) { + + int skip = 0; + for (uint32_t k = 0; k < BlockList.size(); k++) { + + if (AllBlocks[i] == BlockList[k]) { + + if (debug) + fprintf(stderr, + "DEBUG: Function %s skipping BB with/after __afl_loop\n", + F.getName().str().c_str()); + skip = 1; + + } + + } + + if (skip) continue; + + } + + // afl++ END + + InjectCoverageAtBlock(F, *AllBlocks[i], i, IsLeafFunc); + + } + + return true; + +} + +// On every indirect call we call a run-time function +// __sanitizer_cov_indir_call* with two parameters: +// - callee address, +// - global cache array that contains CacheSize pointers (zero-initialized). +// The cache is used to speed up recording the caller-callee pairs. +// The address of the caller is passed implicitly via caller PC. +// CacheSize is encoded in the name of the run-time function. +void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls( + Function &F, ArrayRef<Instruction *> IndirCalls) { + + if (IndirCalls.empty()) return; + assert(Options.TracePC || Options.TracePCGuard || + Options.Inline8bitCounters || Options.InlineBoolFlag); + for (auto I : IndirCalls) { + + IRBuilder<> IRB(I); + CallBase & CB = cast<CallBase>(*I); + Value * Callee = CB.getCalledOperand(); + if (isa<InlineAsm>(Callee)) continue; + IRB.CreateCall(SanCovTracePCIndir, IRB.CreatePointerCast(Callee, IntptrTy)); + + } + +} + +void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, + size_t Idx, + bool IsLeafFunc) { + + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + bool IsEntryBB = &BB == &F.getEntryBlock(); + DebugLoc EntryLoc; + if (IsEntryBB) { + + if (auto SP = F.getSubprogram()) + EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP); + // Keep static allocas and llvm.localescape calls in the entry block. Even + // if we aren't splitting the block, it's nice for allocas to be before + // calls. + IP = PrepareToSplitEntryBlock(BB, IP); + + } else { + + EntryLoc = IP->getDebugLoc(); + + } + + IRBuilder<> IRB(&*IP); + IRB.SetCurrentDebugLocation(EntryLoc); + if (Options.TracePC) { + + IRB.CreateCall(SanCovTracePC) +#if LLVM_VERSION_MAJOR < 12 + ->cannotMerge(); // gets the PC using GET_CALLER_PC. +#else + ->setCannotMerge(); // gets the PC using GET_CALLER_PC. +#endif + + } + + if (Options.TracePCGuard) { + + // afl++ START + ++afl_global_id; + + if (documentFile) { + + unsigned long long int moduleID = + (((unsigned long long int)(rand() & 0xffffffff)) << 32) | getpid(); + fprintf(documentFile, "ModuleID=%llu Function=%s edgeID=%u\n", moduleID, + F.getName().str().c_str(), afl_global_id); + + } + + /* Set the ID of the inserted basic block */ + + ConstantInt *CurLoc = ConstantInt::get(Int32Tyi, afl_global_id); + + /* Load SHM pointer */ + + Value *MapPtrIdx; + + if (map_addr) { + + MapPtrIdx = IRB.CreateGEP(MapPtrFixed, CurLoc); + + } else { + + LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); + MapPtr->setMetadata(Mo->getMDKindID("nosanitize"), + MDNode::get(*Ct, None)); + MapPtrIdx = IRB.CreateGEP(MapPtr, CurLoc); + + } + + /* Update bitmap */ + + LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); + Counter->setMetadata(Mo->getMDKindID("nosanitize"), MDNode::get(*Ct, None)); + + Value *Incr = IRB.CreateAdd(Counter, One); + + if (skip_nozero == NULL) { + + auto cf = IRB.CreateICmpEQ(Incr, Zero); + auto carry = IRB.CreateZExt(cf, Int8Tyi); + Incr = IRB.CreateAdd(Incr, carry); + + } + + IRB.CreateStore(Incr, MapPtrIdx) + ->setMetadata(Mo->getMDKindID("nosanitize"), MDNode::get(*Ct, None)); + + // done :) + + inst++; + // afl++ END + + /* + XXXXXXXXXXXXXXXXXXX + + auto GuardPtr = IRB.CreateIntToPtr( + IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), + ConstantInt::get(IntptrTy, Idx * 4)), + Int32PtrTy); + + IRB.CreateCall(SanCovTracePCGuard, GuardPtr)->setCannotMerge(); + */ + + } + + if (Options.Inline8bitCounters) { + + auto CounterPtr = IRB.CreateGEP( + Function8bitCounterArray->getValueType(), Function8bitCounterArray, + {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)}); + auto Load = IRB.CreateLoad(Int8Ty, CounterPtr); + auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1)); + auto Store = IRB.CreateStore(Inc, CounterPtr); + SetNoSanitizeMetadata(Load); + SetNoSanitizeMetadata(Store); + + } + + if (Options.InlineBoolFlag) { + + auto FlagPtr = IRB.CreateGEP( + FunctionBoolArray->getValueType(), FunctionBoolArray, + {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)}); + auto Load = IRB.CreateLoad(Int1Ty, FlagPtr); + auto ThenTerm = + SplitBlockAndInsertIfThen(IRB.CreateIsNull(Load), &*IP, false); + IRBuilder<> ThenIRB(ThenTerm); + auto Store = ThenIRB.CreateStore(ConstantInt::getTrue(Int1Ty), FlagPtr); + SetNoSanitizeMetadata(Load); + SetNoSanitizeMetadata(Store); + + } + +} + +std::string ModuleSanitizerCoverage::getSectionName( + const std::string &Section) const { + + if (TargetTriple.isOSBinFormatCOFF()) { + + if (Section == SanCovCountersSectionName) return ".SCOV$CM"; + if (Section == SanCovBoolFlagSectionName) return ".SCOV$BM"; + if (Section == SanCovPCsSectionName) return ".SCOVP$M"; + return ".SCOV$GM"; // For SanCovGuardsSectionName. + + } + + if (TargetTriple.isOSBinFormatMachO()) return "__DATA,__" + Section; + return "__" + Section; + +} + +/* +std::string ModuleSanitizerCoverage::getSectionStart( + const std::string &Section) const { + + if (TargetTriple.isOSBinFormatMachO()) + return "\1section$start$__DATA$__" + Section; + return "__start___" + Section; + +} + +std::string ModuleSanitizerCoverage::getSectionEnd( + const std::string &Section) const { + + if (TargetTriple.isOSBinFormatMachO()) + return "\1section$end$__DATA$__" + Section; + return "__stop___" + Section; + +} + +*/ + +char ModuleSanitizerCoverageLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(ModuleSanitizerCoverageLegacyPass, "sancov", + "Pass for instrumenting coverage on functions", false, + false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov", + "Pass for instrumenting coverage on functions", false, + false) + +ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass( + const SanitizerCoverageOptions &Options, + const std::vector<std::string> &AllowlistFiles, + const std::vector<std::string> &BlocklistFiles) { + + return new ModuleSanitizerCoverageLegacyPass(Options); + //, AllowlistFiles, BlocklistFiles); + +} + +static void registerLTOPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + auto p = new ModuleSanitizerCoverageLegacyPass(); + PM.add(p); + +} + +static RegisterStandardPasses RegisterCompTransPass( + PassManagerBuilder::EP_OptimizerLast, registerLTOPass); + +static RegisterStandardPasses RegisterCompTransPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerLTOPass); + +#if LLVM_VERSION_MAJOR >= 11 +static RegisterStandardPasses RegisterCompTransPassLTO( + PassManagerBuilder::EP_FullLinkTimeOptimizationLast, registerLTOPass); +#endif + diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c new file mode 100644 index 00000000..a3d75b15 --- /dev/null +++ b/instrumentation/afl-compiler-rt.o.c @@ -0,0 +1,1254 @@ +/* + american fuzzy lop++ - instrumentation bootstrap + ------------------------------------------------ + + Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2019-2020 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + +*/ + +#ifdef __ANDROID__ + #include "android-ashmem.h" +#endif +#include "config.h" +#include "types.h" +#include "cmplog.h" +#include "llvm-ngram-coverage.h" + +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <unistd.h> +#include <string.h> +#include <assert.h> +#include <stdint.h> +#include <stddef.h> +#include <limits.h> +#include <errno.h> + +#include <sys/mman.h> +#include <sys/shm.h> +#include <sys/wait.h> +#include <sys/types.h> + +#include "llvm/Config/llvm-config.h" + +#ifdef __linux__ + #include "snapshot-inl.h" +#endif + +/* This is a somewhat ugly hack for the experimental 'trace-pc-guard' mode. + Basically, we need to make sure that the forkserver is initialized after + the LLVM-generated runtime initialization pass, not before. */ + +#ifndef MAP_FIXED_NOREPLACE + #ifdef MAP_EXCL + #define MAP_FIXED_NOREPLACE MAP_EXCL | MAP_FIXED + #else + #define MAP_FIXED_NOREPLACE MAP_FIXED + #endif +#endif + +#define CTOR_PRIO 3 + +#include <sys/mman.h> +#include <fcntl.h> + +/* Globals needed by the injected instrumentation. The __afl_area_initial region + is used for instrumentation output before __afl_map_shm() has a chance to + run. It will end up as .comm, so it shouldn't be too wasteful. */ + +#if MAP_SIZE <= 65536 + #define MAP_INITIAL_SIZE 256000 +#else + #define MAP_INITIAL_SIZE MAP_SIZE +#endif + +u8 __afl_area_initial[MAP_INITIAL_SIZE]; +u8 * __afl_area_ptr = __afl_area_initial; +u8 * __afl_dictionary; +u8 * __afl_fuzz_ptr; +u32 __afl_fuzz_len_dummy; +u32 *__afl_fuzz_len = &__afl_fuzz_len_dummy; + +u32 __afl_final_loc; +u32 __afl_map_size = MAP_SIZE; +u32 __afl_dictionary_len; +u64 __afl_map_addr; + +#ifdef __ANDROID__ +PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX]; +u32 __afl_prev_ctx; +u32 __afl_cmp_counter; +#else +__thread PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX]; +__thread u32 __afl_prev_ctx; +__thread u32 __afl_cmp_counter; +#endif + +int __afl_sharedmem_fuzzing __attribute__((weak)); + +struct cmp_map *__afl_cmp_map; + +/* Running in persistent mode? */ + +static u8 is_persistent; + +/* Are we in sancov mode? */ + +static u8 _is_sancov; + +/* Uninspired gcc plugin instrumentation */ + +void __afl_trace(const u32 x) { + +#if 1 /* enable for neverZero feature. */ + __afl_area_ptr[__afl_prev_loc[0] ^ x] += + 1 + ((u8)(1 + __afl_area_ptr[__afl_prev_loc[0] ^ x]) == 0); +#else + ++__afl_area_ptr[__afl_prev_loc[0] ^ x]; +#endif + + __afl_prev_loc[0] = (x >> 1); + return; + +} + +/* Error reporting to forkserver controller */ + +void send_forkserver_error(int error) { + + u32 status; + if (!error || error > 0xffff) return; + status = (FS_OPT_ERROR | FS_OPT_SET_ERROR(error)); + if (write(FORKSRV_FD + 1, (char *)&status, 4) != 4) return; + +} + +/* SHM fuzzing setup. */ + +static void __afl_map_shm_fuzz() { + + char *id_str = getenv(SHM_FUZZ_ENV_VAR); + + if (id_str) { + + u8 *map = NULL; + +#ifdef USEMMAP + const char * shm_file_path = id_str; + int shm_fd = -1; + unsigned char *shm_base = NULL; + + /* create the shared memory segment as if it was a file */ + shm_fd = shm_open(shm_file_path, O_RDWR, 0600); + if (shm_fd == -1) { + + fprintf(stderr, "shm_open() failed for fuzz\n"); + send_forkserver_error(FS_ERROR_SHM_OPEN); + exit(1); + + } + + map = + (u8 *)mmap(0, MAX_FILE + sizeof(u32), PROT_READ, MAP_SHARED, shm_fd, 0); + +#else + u32 shm_id = atoi(id_str); + map = (u8 *)shmat(shm_id, NULL, 0); + +#endif + + /* Whooooops. */ + + if (!map || map == (void *)-1) { + + perror("Could not access fuzzign shared memory"); + exit(1); + + } + + __afl_fuzz_len = (u32 *)map; + __afl_fuzz_ptr = map + sizeof(u32); + + if (getenv("AFL_DEBUG")) { + + fprintf(stderr, "DEBUG: successfully got fuzzing shared memory\n"); + + } + + } else { + + fprintf(stderr, "Error: variable for fuzzing shared memory is not set\n"); + exit(1); + + } + +} + +/* SHM setup. */ + +static void __afl_map_shm(void) { + + // we we are not running in afl ensure the map exists + if (!__afl_area_ptr) { __afl_area_ptr = __afl_area_initial; } + + char *id_str = getenv(SHM_ENV_VAR); + + if (__afl_final_loc) { + + if (__afl_final_loc % 8) + __afl_final_loc = (((__afl_final_loc + 7) >> 3) << 3); + __afl_map_size = __afl_final_loc; + + if (__afl_final_loc > MAP_SIZE) { + + char *ptr; + u32 val = 0; + if ((ptr = getenv("AFL_MAP_SIZE")) != NULL) val = atoi(ptr); + if (val < __afl_final_loc) { + + if (__afl_final_loc > FS_OPT_MAX_MAPSIZE) { + + if (!getenv("AFL_QUIET")) + fprintf(stderr, + "Error: AFL++ tools *require* to set AFL_MAP_SIZE to %u " + "to be able to run this instrumented program!\n", + __afl_final_loc); + + if (id_str) { + + send_forkserver_error(FS_ERROR_MAP_SIZE); + exit(-1); + + } + + } else { + + if (!getenv("AFL_QUIET")) + fprintf(stderr, + "Warning: AFL++ tools will need to set AFL_MAP_SIZE to %u " + "to be able to run this instrumented program!\n", + __afl_final_loc); + + } + + } + + } + + } + + /* If we're running under AFL, attach to the appropriate region, replacing the + early-stage __afl_area_initial region that is needed to allow some really + hacky .init code to work correctly in projects such as OpenSSL. */ + + if (getenv("AFL_DEBUG")) + fprintf(stderr, + "DEBUG: id_str %s, __afl_area_ptr %p, __afl_area_initial %p, " + "__afl_map_addr 0x%llx, MAP_SIZE %u, __afl_final_loc %u, " + "max_size_forkserver %u/0x%x\n", + id_str == NULL ? "<null>" : id_str, __afl_area_ptr, + __afl_area_initial, __afl_map_addr, MAP_SIZE, __afl_final_loc, + FS_OPT_MAX_MAPSIZE, FS_OPT_MAX_MAPSIZE); + + if (id_str) { + + if (__afl_area_ptr && __afl_area_ptr != __afl_area_initial) { + + if (__afl_map_addr) + munmap((void *)__afl_map_addr, __afl_final_loc); + else + free(__afl_area_ptr); + __afl_area_ptr = __afl_area_initial; + + } + +#ifdef USEMMAP + const char * shm_file_path = id_str; + int shm_fd = -1; + unsigned char *shm_base = NULL; + + /* create the shared memory segment as if it was a file */ + shm_fd = shm_open(shm_file_path, O_RDWR, 0600); + if (shm_fd == -1) { + + fprintf(stderr, "shm_open() failed\n"); + send_forkserver_error(FS_ERROR_SHM_OPEN); + exit(1); + + } + + /* map the shared memory segment to the address space of the process */ + if (__afl_map_addr) { + + shm_base = + mmap((void *)__afl_map_addr, __afl_map_size, PROT_READ | PROT_WRITE, + MAP_FIXED_NOREPLACE | MAP_SHARED, shm_fd, 0); + + } else { + + shm_base = mmap(0, __afl_map_size, PROT_READ | PROT_WRITE, MAP_SHARED, + shm_fd, 0); + + } + + if (shm_base == MAP_FAILED) { + + close(shm_fd); + shm_fd = -1; + + fprintf(stderr, "mmap() failed\n"); + if (__afl_map_addr) + send_forkserver_error(FS_ERROR_MAP_ADDR); + else + send_forkserver_error(FS_ERROR_MMAP); + exit(2); + + } + + __afl_area_ptr = shm_base; +#else + u32 shm_id = atoi(id_str); + + __afl_area_ptr = shmat(shm_id, (void *)__afl_map_addr, 0); + +#endif + + /* Whooooops. */ + + if (__afl_area_ptr == (void *)-1) { + + if (__afl_map_addr) + send_forkserver_error(FS_ERROR_MAP_ADDR); + else + send_forkserver_error(FS_ERROR_SHMAT); + _exit(1); + + } + + /* Write something into the bitmap so that even with low AFL_INST_RATIO, + our parent doesn't give up on us. */ + + __afl_area_ptr[0] = 1; + + } else if ((!__afl_area_ptr || __afl_area_ptr == __afl_area_initial) && + + __afl_map_addr) { + + __afl_area_ptr = + mmap((void *)__afl_map_addr, __afl_map_size, PROT_READ | PROT_WRITE, + MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0); + + if (__afl_area_ptr == MAP_FAILED) { + + fprintf(stderr, "can not acquire mmap for address %p\n", + (void *)__afl_map_addr); + exit(1); + + } + + } else if (_is_sancov && __afl_area_ptr != __afl_area_initial) { + + free(__afl_area_ptr); + __afl_area_ptr = NULL; + if (__afl_final_loc > MAP_INITIAL_SIZE) + __afl_area_ptr = malloc(__afl_final_loc); + if (!__afl_area_ptr) __afl_area_ptr = __afl_area_initial; + + } + + id_str = getenv(CMPLOG_SHM_ENV_VAR); + + if (getenv("AFL_DEBUG")) { + + fprintf(stderr, "DEBUG: cmplog id_str %s\n", + id_str == NULL ? "<null>" : id_str); + + } + + if (id_str) { + +#ifdef USEMMAP + const char * shm_file_path = id_str; + int shm_fd = -1; + unsigned char *shm_base = NULL; + + /* create the shared memory segment as if it was a file */ + shm_fd = shm_open(shm_file_path, O_RDWR, 0600); + if (shm_fd == -1) { + + fprintf(stderr, "shm_open() failed\n"); + exit(1); + + } + + /* map the shared memory segment to the address space of the process */ + shm_base = mmap(0, sizeof(struct cmp_map), PROT_READ | PROT_WRITE, + MAP_SHARED, shm_fd, 0); + if (shm_base == MAP_FAILED) { + + close(shm_fd); + shm_fd = -1; + + fprintf(stderr, "mmap() failed\n"); + exit(2); + + } + + __afl_cmp_map = shm_base; +#else + u32 shm_id = atoi(id_str); + + __afl_cmp_map = shmat(shm_id, NULL, 0); +#endif + + if (__afl_cmp_map == (void *)-1) _exit(1); + + } + +} + +#ifdef __linux__ +static void __afl_start_snapshots(void) { + + static u8 tmp[4] = {0, 0, 0, 0}; + s32 child_pid; + u32 status = 0; + u32 already_read_first = 0; + u32 was_killed; + + u8 child_stopped = 0; + + void (*old_sigchld_handler)(int) = 0; // = signal(SIGCHLD, SIG_DFL); + + /* Phone home and tell the parent that we're OK. If parent isn't there, + assume we're not running in forkserver mode and just execute program. */ + + status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT); + if (__afl_sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ; + if (__afl_map_size <= FS_OPT_MAX_MAPSIZE) + status |= (FS_OPT_SET_MAPSIZE(__afl_map_size) | FS_OPT_MAPSIZE); + if (__afl_dictionary_len && __afl_dictionary) status |= FS_OPT_AUTODICT; + memcpy(tmp, &status, 4); + + if (write(FORKSRV_FD + 1, tmp, 4) != 4) return; + + if (__afl_sharedmem_fuzzing || (__afl_dictionary_len && __afl_dictionary)) { + + if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + + if (getenv("AFL_DEBUG")) { + + fprintf(stderr, "target forkserver recv: %08x\n", was_killed); + + } + + if ((was_killed & (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) == + (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) { + + __afl_map_shm_fuzz(); + + } + + if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) == + (FS_OPT_ENABLED | FS_OPT_AUTODICT)) { + + // great lets pass the dictionary through the forkserver FD + u32 len = __afl_dictionary_len, offset = 0; + s32 ret; + + if (write(FORKSRV_FD + 1, &len, 4) != 4) { + + write(2, "Error: could not send dictionary len\n", + strlen("Error: could not send dictionary len\n")); + _exit(1); + + } + + while (len != 0) { + + ret = write(FORKSRV_FD + 1, __afl_dictionary + offset, len); + + if (ret < 1) { + + write(2, "Error: could not send dictionary\n", + strlen("Error: could not send dictionary\n")); + _exit(1); + + } + + len -= ret; + offset += ret; + + } + + } else { + + // uh this forkserver does not understand extended option passing + // or does not want the dictionary + if (!__afl_fuzz_ptr) already_read_first = 1; + + } + + } + + while (1) { + + int status; + + if (already_read_first) { + + already_read_first = 0; + + } else { + + /* Wait for parent by reading from the pipe. Abort if read fails. */ + if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + + } + + #ifdef _AFL_DOCUMENT_MUTATIONS + if (__afl_fuzz_ptr) { + + static uint32_t counter = 0; + char fn[32]; + sprintf(fn, "%09u:forkserver", counter); + s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600); + if (fd_doc >= 0) { + + if (write(fd_doc, __afl_fuzz_ptr, *__afl_fuzz_len) != *__afl_fuzz_len) { + + fprintf(stderr, "write of mutation file failed: %s\n", fn); + unlink(fn); + + } + + close(fd_doc); + + } + + counter++; + + } + + #endif + + /* If we stopped the child in persistent mode, but there was a race + condition and afl-fuzz already issued SIGKILL, write off the old + process. */ + + if (child_stopped && was_killed) { + + child_stopped = 0; + if (waitpid(child_pid, &status, 0) < 0) _exit(1); + + } + + if (!child_stopped) { + + /* Once woken up, create a clone of our process. */ + + child_pid = fork(); + if (child_pid < 0) _exit(1); + + /* In child process: close fds, resume execution. */ + + if (!child_pid) { + + //(void)nice(-20); // does not seem to improve + + signal(SIGCHLD, old_sigchld_handler); + + close(FORKSRV_FD); + close(FORKSRV_FD + 1); + + if (!afl_snapshot_take(AFL_SNAPSHOT_MMAP | AFL_SNAPSHOT_FDS | + AFL_SNAPSHOT_REGS | AFL_SNAPSHOT_EXIT)) { + + raise(SIGSTOP); + + } + + __afl_area_ptr[0] = 1; + memset(__afl_prev_loc, 0, NGRAM_SIZE_MAX * sizeof(PREV_LOC_T)); + + return; + + } + + } else { + + /* Special handling for persistent mode: if the child is alive but + currently stopped, simply restart it with SIGCONT. */ + + kill(child_pid, SIGCONT); + child_stopped = 0; + + } + + /* In parent process: write PID to pipe, then wait for child. */ + + if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) _exit(1); + + if (waitpid(child_pid, &status, WUNTRACED) < 0) _exit(1); + + /* In persistent mode, the child stops itself with SIGSTOP to indicate + a successful run. In this case, we want to wake it up without forking + again. */ + + if (WIFSTOPPED(status)) child_stopped = 1; + + /* Relay wait status to pipe, then loop back. */ + + if (write(FORKSRV_FD + 1, &status, 4) != 4) _exit(1); + + } + +} + +#endif + +/* Fork server logic. */ + +static void __afl_start_forkserver(void) { + +#ifdef __linux__ + if (/*!is_persistent &&*/ !__afl_cmp_map && !getenv("AFL_NO_SNAPSHOT") && + afl_snapshot_init() >= 0) { + + __afl_start_snapshots(); + return; + + } + +#endif + + u8 tmp[4] = {0, 0, 0, 0}; + s32 child_pid; + u32 status = 0; + u32 already_read_first = 0; + u32 was_killed; + + u8 child_stopped = 0; + + void (*old_sigchld_handler)(int) = 0; // = signal(SIGCHLD, SIG_DFL); + + if (__afl_map_size <= FS_OPT_MAX_MAPSIZE) + status |= (FS_OPT_SET_MAPSIZE(__afl_map_size) | FS_OPT_MAPSIZE); + if (__afl_dictionary_len && __afl_dictionary) status |= FS_OPT_AUTODICT; + if (__afl_sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ; + if (status) status |= (FS_OPT_ENABLED); + memcpy(tmp, &status, 4); + + /* Phone home and tell the parent that we're OK. If parent isn't there, + assume we're not running in forkserver mode and just execute program. */ + + if (write(FORKSRV_FD + 1, tmp, 4) != 4) return; + + if (__afl_sharedmem_fuzzing || (__afl_dictionary_len && __afl_dictionary)) { + + if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + + if (getenv("AFL_DEBUG")) { + + fprintf(stderr, "target forkserver recv: %08x\n", was_killed); + + } + + if ((was_killed & (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) == + (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) { + + __afl_map_shm_fuzz(); + + } + + if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) == + (FS_OPT_ENABLED | FS_OPT_AUTODICT)) { + + // great lets pass the dictionary through the forkserver FD + u32 len = __afl_dictionary_len, offset = 0; + s32 ret; + + if (write(FORKSRV_FD + 1, &len, 4) != 4) { + + write(2, "Error: could not send dictionary len\n", + strlen("Error: could not send dictionary len\n")); + _exit(1); + + } + + while (len != 0) { + + ret = write(FORKSRV_FD + 1, __afl_dictionary + offset, len); + + if (ret < 1) { + + write(2, "Error: could not send dictionary\n", + strlen("Error: could not send dictionary\n")); + _exit(1); + + } + + len -= ret; + offset += ret; + + } + + } else { + + // uh this forkserver does not understand extended option passing + // or does not want the dictionary + if (!__afl_fuzz_ptr) already_read_first = 1; + + } + + } + + while (1) { + + int status; + + /* Wait for parent by reading from the pipe. Abort if read fails. */ + + if (already_read_first) { + + already_read_first = 0; + + } else { + + if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + + } + +#ifdef _AFL_DOCUMENT_MUTATIONS + if (__afl_fuzz_ptr) { + + static uint32_t counter = 0; + char fn[32]; + sprintf(fn, "%09u:forkserver", counter); + s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600); + if (fd_doc >= 0) { + + if (write(fd_doc, __afl_fuzz_ptr, *__afl_fuzz_len) != *__afl_fuzz_len) { + + fprintf(stderr, "write of mutation file failed: %s\n", fn); + unlink(fn); + + } + + close(fd_doc); + + } + + counter++; + + } + +#endif + + /* If we stopped the child in persistent mode, but there was a race + condition and afl-fuzz already issued SIGKILL, write off the old + process. */ + + if (child_stopped && was_killed) { + + child_stopped = 0; + if (waitpid(child_pid, &status, 0) < 0) _exit(1); + + } + + if (!child_stopped) { + + /* Once woken up, create a clone of our process. */ + + child_pid = fork(); + if (child_pid < 0) _exit(1); + + /* In child process: close fds, resume execution. */ + + if (!child_pid) { + + //(void)nice(-20); + + signal(SIGCHLD, old_sigchld_handler); + + close(FORKSRV_FD); + close(FORKSRV_FD + 1); + return; + + } + + } else { + + /* Special handling for persistent mode: if the child is alive but + currently stopped, simply restart it with SIGCONT. */ + + kill(child_pid, SIGCONT); + child_stopped = 0; + + } + + /* In parent process: write PID to pipe, then wait for child. */ + + if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) _exit(1); + + if (waitpid(child_pid, &status, is_persistent ? WUNTRACED : 0) < 0) + _exit(1); + + /* In persistent mode, the child stops itself with SIGSTOP to indicate + a successful run. In this case, we want to wake it up without forking + again. */ + + if (WIFSTOPPED(status)) child_stopped = 1; + + /* Relay wait status to pipe, then loop back. */ + + if (write(FORKSRV_FD + 1, &status, 4) != 4) _exit(1); + + } + +} + +/* A simplified persistent mode handler, used as explained in + * README.llvm.md. */ + +int __afl_persistent_loop(unsigned int max_cnt) { + + static u8 first_pass = 1; + static u32 cycle_cnt; + + if (first_pass) { + + /* Make sure that every iteration of __AFL_LOOP() starts with a clean slate. + On subsequent calls, the parent will take care of that, but on the first + iteration, it's our job to erase any trace of whatever happened + before the loop. */ + + if (is_persistent) { + + memset(__afl_area_ptr, 0, __afl_map_size); + __afl_area_ptr[0] = 1; + memset(__afl_prev_loc, 0, NGRAM_SIZE_MAX * sizeof(PREV_LOC_T)); + + } + + cycle_cnt = max_cnt; + first_pass = 0; + return 1; + + } + + if (is_persistent) { + + if (--cycle_cnt) { + + raise(SIGSTOP); + + __afl_area_ptr[0] = 1; + memset(__afl_prev_loc, 0, NGRAM_SIZE_MAX * sizeof(PREV_LOC_T)); + + return 1; + + } else { + + /* When exiting __AFL_LOOP(), make sure that the subsequent code that + follows the loop is not traced. We do that by pivoting back to the + dummy output region. */ + + __afl_area_ptr = __afl_area_initial; + + } + + } + + return 0; + +} + +/* This one can be called from user code when deferred forkserver mode + is enabled. */ + +void __afl_manual_init(void) { + + static u8 init_done; + + if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) { + + init_done = 1; + is_persistent = 0; + __afl_sharedmem_fuzzing = 0; + if (__afl_area_ptr == NULL) __afl_area_ptr = __afl_area_initial; + + if (getenv("AFL_DEBUG")) + fprintf(stderr, + "DEBUG: disabled instrumentation because of " + "AFL_DISABLE_LLVM_INSTRUMENTATION\n"); + + } + + if (!init_done) { + + __afl_start_forkserver(); + init_done = 1; + + } + +} + +/* Initialization of the forkserver - latest possible */ + +__attribute__((constructor())) void __afl_auto_init(void) { + + if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return; + + if (getenv(DEFER_ENV_VAR)) return; + + __afl_manual_init(); + +} + +/* Initialization of the shmem - earliest possible because of LTO fixed mem. */ + +__attribute__((constructor(CTOR_PRIO))) void __afl_auto_early(void) { + + is_persistent = !!getenv(PERSIST_ENV_VAR); + + if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return; + + __afl_map_shm(); + +} + +/* preset __afl_area_ptr #2 */ + +__attribute__((constructor(1))) void __afl_auto_second(void) { + + if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return; + u8 *ptr; + + if (__afl_final_loc) { + + if (__afl_area_ptr && __afl_area_ptr != __afl_area_initial) + free(__afl_area_ptr); + + if (__afl_map_addr) + ptr = (u8 *)mmap((void *)__afl_map_addr, __afl_final_loc, + PROT_READ | PROT_WRITE, + MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0); + else + ptr = (u8 *)malloc(__afl_final_loc); + + if (ptr && (ssize_t)ptr != -1) __afl_area_ptr = ptr; + + } + +} + +/* preset __afl_area_ptr #1 - at constructor level 0 global variables have + not been set */ + +__attribute__((constructor(0))) void __afl_auto_first(void) { + + if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return; + u8 *ptr; + + ptr = (u8 *)malloc(1024000); + + if (ptr && (ssize_t)ptr != -1) __afl_area_ptr = ptr; + +} + +/* The following stuff deals with supporting -fsanitize-coverage=trace-pc-guard. + It remains non-operational in the traditional, plugin-backed LLVM mode. + For more info about 'trace-pc-guard', see README.llvm.md. + + The first function (__sanitizer_cov_trace_pc_guard) is called back on every + edge (as opposed to every basic block). */ + +void __sanitizer_cov_trace_pc_guard(uint32_t *guard) { + + // For stability analysis, if you want to know to which function unstable + // edge IDs belong - uncomment, recompile+install llvm_mode, recompile + // the target. libunwind and libbacktrace are better solutions. + // Set AFL_DEBUG_CHILD_OUTPUT=1 and run afl-fuzz with 2>file to capture + // the backtrace output + /* + uint32_t unstable[] = { ... unstable edge IDs }; + uint32_t idx; + char bt[1024]; + for (idx = 0; i < sizeof(unstable)/sizeof(uint32_t); i++) { + + if (unstable[idx] == __afl_area_ptr[*guard]) { + + int bt_size = backtrace(bt, 256); + if (bt_size > 0) { + + char **bt_syms = backtrace_symbols(bt, bt_size); + if (bt_syms) { + + fprintf(stderr, "DEBUG: edge=%u caller=%s\n", unstable[idx], + bt_syms[0]); + free(bt_syms); + + } + + } + + } + + } + + */ + +#if (LLVM_VERSION_MAJOR < 9) + + __afl_area_ptr[*guard]++; + +#else + + __afl_area_ptr[*guard] = + __afl_area_ptr[*guard] + 1 + (__afl_area_ptr[*guard] == 255 ? 1 : 0); + +#endif + +} + +/* Init callback. Populates instrumentation IDs. Note that we're using + ID of 0 as a special value to indicate non-instrumented bits. That may + still touch the bitmap, but in a fairly harmless way. */ + +void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { + + u32 inst_ratio = 100; + char *x; + + _is_sancov = 1; + + if (getenv("AFL_DEBUG")) { + + fprintf(stderr, "Running __sanitizer_cov_trace_pc_guard_init: %p-%p\n", + start, stop); + + } + + if (start == stop || *start) return; + + x = getenv("AFL_INST_RATIO"); + if (x) inst_ratio = (u32)atoi(x); + + if (!inst_ratio || inst_ratio > 100) { + + fprintf(stderr, "[-] ERROR: Invalid AFL_INST_RATIO (must be 1-100).\n"); + abort(); + + } + + /* Make sure that the first element in the range is always set - we use that + to avoid duplicate calls (which can happen as an artifact of the underlying + implementation in LLVM). */ + + *(start++) = R(MAP_SIZE - 1) + 1; + + while (start < stop) { + + if (R(100) < inst_ratio) + *start = ++__afl_final_loc; + else + *start = 0; + + start++; + + } + +} + +///// CmpLog instrumentation + +void __cmplog_ins_hook1(uint8_t arg1, uint8_t arg2) { + + if (unlikely(!__afl_cmp_map)) return; + + uintptr_t k = (uintptr_t)__builtin_return_address(0); + k = (k >> 4) ^ (k << 8); + k &= CMP_MAP_W - 1; + + __afl_cmp_map->headers[k].type = CMP_TYPE_INS; + + u32 hits = __afl_cmp_map->headers[k].hits; + __afl_cmp_map->headers[k].hits = hits + 1; + // if (!__afl_cmp_map->headers[k].cnt) + // __afl_cmp_map->headers[k].cnt = __afl_cmp_counter++; + + __afl_cmp_map->headers[k].shape = 0; + + hits &= CMP_MAP_H - 1; + __afl_cmp_map->log[k][hits].v0 = arg1; + __afl_cmp_map->log[k][hits].v1 = arg2; + +} + +void __cmplog_ins_hook2(uint16_t arg1, uint16_t arg2) { + + if (unlikely(!__afl_cmp_map)) return; + + uintptr_t k = (uintptr_t)__builtin_return_address(0); + k = (k >> 4) ^ (k << 8); + k &= CMP_MAP_W - 1; + + __afl_cmp_map->headers[k].type = CMP_TYPE_INS; + + u32 hits = __afl_cmp_map->headers[k].hits; + __afl_cmp_map->headers[k].hits = hits + 1; + + __afl_cmp_map->headers[k].shape = 1; + + hits &= CMP_MAP_H - 1; + __afl_cmp_map->log[k][hits].v0 = arg1; + __afl_cmp_map->log[k][hits].v1 = arg2; + +} + +void __cmplog_ins_hook4(uint32_t arg1, uint32_t arg2) { + + if (unlikely(!__afl_cmp_map)) return; + + uintptr_t k = (uintptr_t)__builtin_return_address(0); + k = (k >> 4) ^ (k << 8); + k &= CMP_MAP_W - 1; + + __afl_cmp_map->headers[k].type = CMP_TYPE_INS; + + u32 hits = __afl_cmp_map->headers[k].hits; + __afl_cmp_map->headers[k].hits = hits + 1; + + __afl_cmp_map->headers[k].shape = 3; + + hits &= CMP_MAP_H - 1; + __afl_cmp_map->log[k][hits].v0 = arg1; + __afl_cmp_map->log[k][hits].v1 = arg2; + +} + +void __cmplog_ins_hook8(uint64_t arg1, uint64_t arg2) { + + if (unlikely(!__afl_cmp_map)) return; + + uintptr_t k = (uintptr_t)__builtin_return_address(0); + k = (k >> 4) ^ (k << 8); + k &= CMP_MAP_W - 1; + + __afl_cmp_map->headers[k].type = CMP_TYPE_INS; + + u32 hits = __afl_cmp_map->headers[k].hits; + __afl_cmp_map->headers[k].hits = hits + 1; + + __afl_cmp_map->headers[k].shape = 7; + + hits &= CMP_MAP_H - 1; + __afl_cmp_map->log[k][hits].v0 = arg1; + __afl_cmp_map->log[k][hits].v1 = arg2; + +} + +#if defined(__APPLE__) + #pragma weak __sanitizer_cov_trace_const_cmp1 = __cmplog_ins_hook1 + #pragma weak __sanitizer_cov_trace_const_cmp2 = __cmplog_ins_hook2 + #pragma weak __sanitizer_cov_trace_const_cmp4 = __cmplog_ins_hook4 + #pragma weak __sanitizer_cov_trace_const_cmp8 = __cmplog_ins_hook8 + + #pragma weak __sanitizer_cov_trace_cmp1 = __cmplog_ins_hook1 + #pragma weak __sanitizer_cov_trace_cmp2 = __cmplog_ins_hook2 + #pragma weak __sanitizer_cov_trace_cmp4 = __cmplog_ins_hook4 + #pragma weak __sanitizer_cov_trace_cmp8 = __cmplog_ins_hook8 +#else +void __sanitizer_cov_trace_const_cmp1(uint8_t arg1, uint8_t arg2) + __attribute__((alias("__cmplog_ins_hook1"))); +void __sanitizer_cov_trace_const_cmp2(uint16_t arg1, uint16_t arg2) + __attribute__((alias("__cmplog_ins_hook2"))); +void __sanitizer_cov_trace_const_cmp4(uint32_t arg1, uint32_t arg2) + __attribute__((alias("__cmplog_ins_hook4"))); +void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2) + __attribute__((alias("__cmplog_ins_hook8"))); + +void __sanitizer_cov_trace_cmp1(uint8_t arg1, uint8_t arg2) + __attribute__((alias("__cmplog_ins_hook1"))); +void __sanitizer_cov_trace_cmp2(uint16_t arg1, uint16_t arg2) + __attribute__((alias("__cmplog_ins_hook2"))); +void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2) + __attribute__((alias("__cmplog_ins_hook4"))); +void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2) + __attribute__((alias("__cmplog_ins_hook8"))); +#endif /* defined(__APPLE__) */ + +void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) { + + if (unlikely(!__afl_cmp_map)) return; + + for (uint64_t i = 0; i < cases[0]; i++) { + + uintptr_t k = (uintptr_t)__builtin_return_address(0) + i; + k = (k >> 4) ^ (k << 8); + k &= CMP_MAP_W - 1; + + __afl_cmp_map->headers[k].type = CMP_TYPE_INS; + + u32 hits = __afl_cmp_map->headers[k].hits; + __afl_cmp_map->headers[k].hits = hits + 1; + + __afl_cmp_map->headers[k].shape = 7; + + hits &= CMP_MAP_H - 1; + __afl_cmp_map->log[k][hits].v0 = val; + __afl_cmp_map->log[k][hits].v1 = cases[i + 2]; + + } + +} + +// POSIX shenanigan to see if an area is mapped. +// If it is mapped as X-only, we have a problem, so maybe we should add a check +// to avoid to call it on .text addresses +static int area_is_mapped(void *ptr, size_t len) { + + char *p = ptr; + char *page = (char *)((uintptr_t)p & ~(sysconf(_SC_PAGE_SIZE) - 1)); + + int r = msync(page, (p - page) + len, MS_ASYNC); + if (r < 0) return errno != ENOMEM; + return 1; + +} + +void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) { + + if (unlikely(!__afl_cmp_map)) return; + + if (!area_is_mapped(ptr1, 32) || !area_is_mapped(ptr2, 32)) return; + + uintptr_t k = (uintptr_t)__builtin_return_address(0); + k = (k >> 4) ^ (k << 8); + k &= CMP_MAP_W - 1; + + __afl_cmp_map->headers[k].type = CMP_TYPE_RTN; + + u32 hits = __afl_cmp_map->headers[k].hits; + __afl_cmp_map->headers[k].hits = hits + 1; + + __afl_cmp_map->headers[k].shape = 31; + + hits &= CMP_MAP_RTN_H - 1; + __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0, + ptr1, 32); + __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1, + ptr2, 32); + +} + diff --git a/instrumentation/afl-gcc-pass.so.cc b/instrumentation/afl-gcc-pass.so.cc new file mode 100644 index 00000000..c5614aca --- /dev/null +++ b/instrumentation/afl-gcc-pass.so.cc @@ -0,0 +1,601 @@ +// +// There are some TODOs in this file: +// - fix instrumentation via external call +// - fix inline instrumentation +// - implement instrument list feature +// - dont instrument blocks that are uninteresting +// - implement neverZero +// + +/* + american fuzzy lop++ - GCC instrumentation pass + --------------------------------------------- + + Written by Austin Seipp <aseipp@pobox.com> with bits from + Emese Revfy <re.emese@gmail.com> + + Fixed by Heiko Eißfeldt 2019-2020 for AFL++ + + GCC integration design is based on the LLVM design, which comes + from Laszlo Szekeres. Some of the boilerplate code below for + afl_pass to adapt to different GCC versions was taken from Emese + Revfy's Size Overflow plugin for GCC, licensed under the GPLv2/v3. + + (NOTE: this plugin code is under GPLv3, in order to comply with the + GCC runtime library exception, which states that you may distribute + "Target Code" from the compiler under a license of your choice, as + long as the "Compilation Process" is "Eligible", and contains no + GPL-incompatible software in GCC "during the process of + transforming high level code to target code". In this case, the + plugin will be used to generate "Target Code" during the + "Compilation Process", and thus it must be GPLv3 to be "eligible".) + + Copyright (C) 2015 Austin Seipp + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + + */ + +#define BUILD_INLINE_INST + +#include "../include/config.h" +#include "../include/debug.h" + +/* clear helper macros AFL types pull in, which intervene with gcc-plugin + * headers from GCC-8 */ +#ifdef likely + #undef likely +#endif +#ifdef unlikely + #undef unlikely +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <list> +#include <string> +#include <fstream> + +#include <gcc-plugin.h> +#include <plugin-version.h> +#include <diagnostic.h> +#include <tree.h> +#include <tree-ssa.h> +#include <tree-pass.h> +#include <tree-ssa-alias.h> +#include <basic-block.h> +#include <gimple-expr.h> +#include <gimple.h> +#include <gimple-iterator.h> +#include <gimple-ssa.h> +#include <version.h> +#include <toplev.h> +#include <intl.h> +#include <context.h> +#include <stringpool.h> +#include <cgraph.h> +#include <cfgloop.h> + +/* -------------------------------------------------------------------------- */ +/* -- AFL instrumentation pass ---------------------------------------------- */ + +static int be_quiet = 0; +static unsigned int inst_ratio = 100; +static bool inst_ext = true; +static std::list<std::string> myInstrumentList; + +static unsigned int ext_call_instrument(function *fun) { + + /* Instrument all the things! */ + basic_block bb; + unsigned finst_blocks = 0; + unsigned fcnt_blocks = 0; + + tree fntype = build_function_type_list(void_type_node, /* return */ + uint32_type_node, /* args */ + NULL_TREE); /* done */ + tree fndecl = build_fn_decl("__afl_trace", fntype); + TREE_STATIC(fndecl) = 1; /* Defined elsewhere */ + TREE_PUBLIC(fndecl) = 1; /* Public */ + DECL_EXTERNAL(fndecl) = 1; /* External linkage */ + DECL_ARTIFICIAL(fndecl) = 1; /* Injected by compiler */ + + FOR_EACH_BB_FN(bb, fun) { + + gimple_seq fcall; + gimple_seq seq = NULL; + gimple_stmt_iterator bentry; + ++fcnt_blocks; + + // only instrument if this basic block is the destination of a previous + // basic block that has multiple successors + // this gets rid of ~5-10% of instrumentations that are unnecessary + // result: a little more speed and less map pollution + + int more_than_one = -1; + edge ep; + edge_iterator eip; + + FOR_EACH_EDGE(ep, eip, bb->preds) { + + int count = 0; + if (more_than_one == -1) more_than_one = 0; + + basic_block Pred = ep->src; + edge es; + edge_iterator eis; + FOR_EACH_EDGE(es, eis, Pred->succs) { + + basic_block Succ = es->dest; + if (Succ != NULL) count++; + + } + + if (count > 1) more_than_one = 1; + + } + + if (more_than_one != 1) continue; + + /* Bail on this block if we trip the specified ratio */ + if (R(100) >= inst_ratio) continue; + + /* Make up cur_loc */ + unsigned int rand_loc = R(MAP_SIZE); + tree cur_loc = build_int_cst(uint32_type_node, rand_loc); + + /* Update bitmap via external call */ + /* to quote: + * /+ Trace a basic block with some ID +/ + * void __afl_trace(u32 x); + */ + + fcall = gimple_build_call( + fndecl, 1, + cur_loc); /* generate the function _call_ to above built reference, with + *1* parameter -> the random const for the location */ + gimple_seq_add_stmt(&seq, fcall); /* and insert into a sequence */ + + /* Done - grab the entry to the block and insert sequence */ + bentry = gsi_after_labels(bb); + gsi_insert_seq_before(&bentry, seq, GSI_SAME_STMT); + + ++finst_blocks; + + } + + /* Say something nice. */ + if (!be_quiet) { + + if (!finst_blocks) + WARNF(G_("No instrumentation targets found in " cBRI "%s" cRST), + function_name(fun)); + else if (finst_blocks < fcnt_blocks) + OKF(G_("Instrumented %2u /%2u locations in " cBRI "%s" cRST), + finst_blocks, fcnt_blocks, function_name(fun)); + else + OKF(G_("Instrumented %2u locations in " cBRI "%s" cRST), finst_blocks, + function_name(fun)); + + } + + return 0; + +} + +static unsigned int inline_instrument(function *fun) { + + /* Instrument all the things! */ + basic_block bb; + unsigned finst_blocks = 0; + unsigned fcnt_blocks = 0; + tree one = build_int_cst(unsigned_char_type_node, 1); + // tree zero = build_int_cst(unsigned_char_type_node, 0); + + /* Set up global type declarations */ + tree map_type = build_pointer_type(unsigned_char_type_node); + tree map_ptr_g = + build_decl(UNKNOWN_LOCATION, VAR_DECL, + get_identifier_with_length("__afl_area_ptr", 14), map_type); + TREE_USED(map_ptr_g) = 1; + TREE_STATIC(map_ptr_g) = 1; /* Defined elsewhere */ + DECL_EXTERNAL(map_ptr_g) = 1; /* External linkage */ + DECL_PRESERVE_P(map_ptr_g) = 1; + DECL_ARTIFICIAL(map_ptr_g) = 1; /* Injected by compiler */ + rest_of_decl_compilation(map_ptr_g, 1, 0); + + tree prev_loc_g = build_decl(UNKNOWN_LOCATION, VAR_DECL, + get_identifier_with_length("__afl_prev_loc", 14), + uint32_type_node); + TREE_USED(prev_loc_g) = 1; + TREE_STATIC(prev_loc_g) = 1; /* Defined elsewhere */ + DECL_EXTERNAL(prev_loc_g) = 1; /* External linkage */ + DECL_PRESERVE_P(prev_loc_g) = 1; + DECL_ARTIFICIAL(prev_loc_g) = 1; /* Injected by compiler */ + set_decl_tls_model(prev_loc_g, TLS_MODEL_REAL); /* TLS attribute */ + rest_of_decl_compilation(prev_loc_g, 1, 0); + + FOR_EACH_BB_FN(bb, fun) { + + gimple_seq seq = NULL; + gimple_stmt_iterator bentry; + ++fcnt_blocks; + + // only instrument if this basic block is the destination of a previous + // basic block that has multiple successors + // this gets rid of ~5-10% of instrumentations that are unnecessary + // result: a little more speed and less map pollution + + int more_than_one = -1; + edge ep; + edge_iterator eip; + FOR_EACH_EDGE(ep, eip, bb->preds) { + + int count = 0; + if (more_than_one == -1) more_than_one = 0; + + basic_block Pred = ep->src; + edge es; + edge_iterator eis; + FOR_EACH_EDGE(es, eis, Pred->succs) { + + basic_block Succ = es->dest; + if (Succ != NULL) count++; + + } + + if (count > 1) more_than_one = 1; + + } + + if (more_than_one != 1) continue; + + /* Bail on this block if we trip the specified ratio */ + if (R(100) >= inst_ratio) continue; + + /* Make up cur_loc */ + + unsigned int rand_loc = R(MAP_SIZE); + tree cur_loc = build_int_cst(uint32_type_node, rand_loc); + + /* Load prev_loc, xor with cur_loc */ + // gimple_assign <var_decl, prev_loc.0_1, prev_loc, NULL, NULL> + tree prev_loc = create_tmp_var_raw(uint32_type_node, "prev_loc"); + gassign *g = gimple_build_assign(prev_loc, VAR_DECL, prev_loc_g); + gimple_seq_add_stmt(&seq, g); // load prev_loc + update_stmt(g); + + // gimple_assign <bit_xor_expr, _2, prev_loc.0_1, 47231, NULL> + tree area_off = create_tmp_var_raw(uint32_type_node, "area_off"); + g = gimple_build_assign(area_off, BIT_XOR_EXPR, prev_loc, cur_loc); + gimple_seq_add_stmt(&seq, g); // area_off = prev_loc ^ cur_loc + update_stmt(g); + + /* Update bitmap */ + + // gimple_assign <addr_expr, p_6, &map[_2], NULL, NULL> + tree map_ptr = create_tmp_var(map_type, "map_ptr"); + tree map_ptr2 = create_tmp_var(map_type, "map_ptr2"); + + g = gimple_build_assign(map_ptr, map_ptr_g); + gimple_seq_add_stmt(&seq, g); // map_ptr = __afl_area_ptr + update_stmt(g); + +#if 1 + #if 0 + tree addr = build2(ADDR_EXPR, map_type, map_ptr, area_off); + g = gimple_build_assign(map_ptr2, MODIFY_EXPR, addr); + gimple_seq_add_stmt(&seq, g); // map_ptr2 = map_ptr + area_off + update_stmt(g); + #else + g = gimple_build_assign(map_ptr2, PLUS_EXPR, map_ptr, area_off); + gimple_seq_add_stmt(&seq, g); // map_ptr2 = map_ptr + area_off + update_stmt(g); + #endif + + // gimple_assign <mem_ref, _3, *p_6, NULL, NULL> + tree tmp1 = create_tmp_var_raw(unsigned_char_type_node, "tmp1"); + g = gimple_build_assign(tmp1, MEM_REF, map_ptr2); + gimple_seq_add_stmt(&seq, g); // tmp1 = *map_ptr2 + update_stmt(g); +#else + tree atIndex = build2(PLUS_EXPR, uint32_type_node, map_ptr, area_off); + tree array_address = build1(ADDR_EXPR, map_type, atIndex); + tree array_access = build1(INDIRECT_REF, map_type, array_address); + tree tmp1 = create_tmp_var(unsigned_char_type_node, "tmp1"); + g = gimple_build_assign(tmp1, array_access); + gimple_seq_add_stmt(&seq, g); // tmp1 = *(map_ptr + area_off) + update_stmt(g); +#endif + // gimple_assign <plus_expr, _4, _3, 1, NULL> + tree tmp2 = create_tmp_var_raw(unsigned_char_type_node, "tmp2"); + g = gimple_build_assign(tmp2, PLUS_EXPR, tmp1, one); + gimple_seq_add_stmt(&seq, g); // tmp2 = tmp1 + 1 + update_stmt(g); + + // TODO: neverZero: here we have to check if tmp3 == 0 + // and add 1 if so + + // gimple_assign <ssa_name, *p_6, _4, NULL, NULL> + // tree map_ptr3 = create_tmp_var_raw(map_type, "map_ptr3"); + g = gimple_build_assign(map_ptr2, INDIRECT_REF, tmp2); + gimple_seq_add_stmt(&seq, g); // *map_ptr2 = tmp2 + update_stmt(g); + + /* Set prev_loc to cur_loc >> 1 */ + + // gimple_assign <integer_cst, prev_loc, 23615, NULL, NULL> + tree shifted_loc = build_int_cst(TREE_TYPE(prev_loc_g), rand_loc >> 1); + tree prev_loc2 = create_tmp_var_raw(uint32_type_node, "prev_loc2"); + g = gimple_build_assign(prev_loc2, shifted_loc); + gimple_seq_add_stmt(&seq, g); // __afl_prev_loc = cur_loc >> 1 + update_stmt(g); + g = gimple_build_assign(prev_loc_g, prev_loc2); + gimple_seq_add_stmt(&seq, g); // __afl_prev_loc = cur_loc >> 1 + update_stmt(g); + + /* Done - grab the entry to the block and insert sequence */ + + bentry = gsi_after_labels(bb); + gsi_insert_seq_before(&bentry, seq, GSI_NEW_STMT); + + ++finst_blocks; + + } + + /* Say something nice. */ + if (!be_quiet) { + + if (!finst_blocks) + WARNF(G_("No instrumentation targets found in " cBRI "%s" cRST), + function_name(fun)); + else if (finst_blocks < fcnt_blocks) + OKF(G_("Instrumented %2u /%2u locations in " cBRI "%s" cRST), + finst_blocks, fcnt_blocks, function_name(fun)); + else + OKF(G_("Instrumented %2u locations in " cBRI "%s" cRST), finst_blocks, + function_name(fun)); + + } + + return 0; + +} + +/* -------------------------------------------------------------------------- */ +/* -- Boilerplate and initialization ---------------------------------------- */ + +static const struct pass_data afl_pass_data = { + + .type = GIMPLE_PASS, + .name = "afl-inst", + .optinfo_flags = OPTGROUP_NONE, + + .tv_id = TV_NONE, + .properties_required = 0, + .properties_provided = 0, + .properties_destroyed = 0, + .todo_flags_start = 0, + // NOTE(aseipp): it's very, very important to include + // at least 'TODO_update_ssa' here so that GCC will + // properly update the resulting SSA form, e.g., to + // include new PHI nodes for newly added symbols or + // names. Do not remove this. Do not taunt Happy Fun + // Ball. + .todo_flags_finish = TODO_update_ssa | TODO_verify_il | TODO_cleanup_cfg, + +}; + +namespace { + +class afl_pass : public gimple_opt_pass { + + private: + bool do_ext_call; + + public: + afl_pass(bool ext_call, gcc::context *g) + : gimple_opt_pass(afl_pass_data, g), do_ext_call(ext_call) { + + } + + unsigned int execute(function *fun) override { + + if (!myInstrumentList.empty()) { + + bool instrumentBlock = false; + std::string instFilename; + unsigned int instLine = 0; + + /* EXPR_FILENAME + This macro returns the name of the file in which the entity was declared, + as a char*. For an entity declared implicitly by the compiler (like + __builtin_ memcpy), this will be the string "<internal>". + */ + const char *fname = DECL_SOURCE_FILE(fun->decl); + + if (0 != strncmp("<internal>", fname, 10) && + 0 != strncmp("<built-in>", fname, 10)) { + + instFilename = fname; + instLine = DECL_SOURCE_LINE(fun->decl); + + /* Continue only if we know where we actually are */ + if (!instFilename.empty()) { + + for (std::list<std::string>::iterator it = myInstrumentList.begin(); + it != myInstrumentList.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.length() >= it->length()) { + + if (instFilename.compare(instFilename.length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + + /* Either we couldn't figure out our location or the location is + * not in the instrument list, so we skip instrumentation. */ + if (!instrumentBlock) { + + if (!be_quiet) { + + if (!instFilename.empty()) + SAYF(cYEL "[!] " cBRI + "Not in instrument list, skipping %s line %u...\n", + instFilename.c_str(), instLine); + else + SAYF(cYEL "[!] " cBRI "No filename information found, skipping it"); + + } + + return 0; + + } + + } + + return do_ext_call ? ext_call_instrument(fun) : inline_instrument(fun); + + } + +}; /* class afl_pass */ + +} // namespace + +static struct opt_pass *make_afl_pass(bool ext_call, gcc::context *ctxt) { + + return new afl_pass(ext_call, ctxt); + +} + +/* -------------------------------------------------------------------------- */ +/* -- Initialization -------------------------------------------------------- */ + +int plugin_is_GPL_compatible = 1; + +static struct plugin_info afl_plugin_info = { + + .version = "20200519", + .help = "AFL++ gcc plugin\n", + +}; + +int plugin_init(struct plugin_name_args * plugin_info, + struct plugin_gcc_version *version) { + + struct register_pass_info afl_pass_info; + struct timeval tv; + struct timezone tz; + u32 rand_seed; + + /* Setup random() so we get Actually Random(TM) outputs from R() */ + gettimeofday(&tv, &tz); + rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid(); + SR(rand_seed); + + /* Pass information */ + afl_pass_info.pass = make_afl_pass(inst_ext, g); + afl_pass_info.reference_pass_name = "ssa"; + afl_pass_info.ref_pass_instance_number = 1; + afl_pass_info.pos_op = PASS_POS_INSERT_AFTER; + + if (!plugin_default_version_check(version, &gcc_version)) { + + FATAL(G_("Incompatible gcc/plugin versions! Expected GCC %d.%d"), + GCCPLUGIN_VERSION_MAJOR, GCCPLUGIN_VERSION_MINOR); + + } + + /* Show a banner */ + if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) { + + SAYF(G_(cCYA "afl-gcc-pass" VERSION cRST + " initially by <aseipp@pobox.com>, maintainer: hexcoder-\n")); + + } else + + be_quiet = 1; + + /* Decide instrumentation ratio */ + char *inst_ratio_str = getenv("AFL_INST_RATIO"); + + if (inst_ratio_str) { + + if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio || + inst_ratio > 100) + FATAL(G_("Bad value of AFL_INST_RATIO (must be between 1 and 100)")); + else { + + if (!be_quiet) + ACTF(G_("%s instrumentation at ratio of %u%% in %s mode."), + inst_ext ? G_("Call-based") : G_("Inline"), inst_ratio, + getenv("AFL_HARDEN") ? G_("hardened") : G_("non-hardened")); + + } + + } + + char *instInstrumentListFilename = getenv("AFL_GCC_INSTRUMENT_FILE"); + if (!instInstrumentListFilename) + instInstrumentListFilename = getenv("AFL_GCC_WHITELIST"); + if (instInstrumentListFilename) { + + std::string line; + std::ifstream fileStream; + fileStream.open(instInstrumentListFilename); + if (!fileStream) PFATAL("Unable to open AFL_GCC_INSTRUMENT_FILE"); + getline(fileStream, line); + while (fileStream) { + + myInstrumentList.push_back(line); + getline(fileStream, line); + + } + + } else if (!be_quiet && (getenv("AFL_LLVM_WHITELIST") || + + getenv("AFL_LLVM_INSTRUMENT_FILE"))) { + + SAYF(cYEL "[-] " cRST + "AFL_LLVM_INSTRUMENT_FILE environment variable detected - did " + "you mean AFL_GCC_INSTRUMENT_FILE?\n"); + + } + + /* Go go gadget */ + register_callback(plugin_info->base_name, PLUGIN_INFO, NULL, + &afl_plugin_info); + register_callback(plugin_info->base_name, PLUGIN_PASS_MANAGER_SETUP, NULL, + &afl_pass_info); + return 0; + +} + diff --git a/instrumentation/afl-llvm-common.cc b/instrumentation/afl-llvm-common.cc new file mode 100644 index 00000000..189b4ec6 --- /dev/null +++ b/instrumentation/afl-llvm-common.cc @@ -0,0 +1,575 @@ +#define AFL_LLVM_PASS + +#include "config.h" +#include "debug.h" + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/time.h> +#include <fnmatch.h> + +#include <list> +#include <string> +#include <fstream> + +#include <llvm/Support/raw_ostream.h> + +#define IS_EXTERN extern +#include "afl-llvm-common.h" + +using namespace llvm; + +static std::list<std::string> allowListFiles; +static std::list<std::string> allowListFunctions; +static std::list<std::string> denyListFiles; +static std::list<std::string> denyListFunctions; + +char *getBBName(const llvm::BasicBlock *BB) { + + static char *name; + + if (!BB->getName().empty()) { + + name = strdup(BB->getName().str().c_str()); + return name; + + } + + std::string Str; + raw_string_ostream OS(Str); + +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) + BB->printAsOperand(OS, false); +#endif + name = strdup(OS.str().c_str()); + return name; + +} + +/* Function that we never instrument or analyze */ +/* Note: this ignore check is also called in isInInstrumentList() */ +bool isIgnoreFunction(const llvm::Function *F) { + + // Starting from "LLVMFuzzer" these are functions used in libfuzzer based + // fuzzing campaign installations, e.g. oss-fuzz + + static const char *ignoreList[] = { + + "asan.", + "llvm.", + "sancov.", + "__ubsan_", + "ign.", + "__afl_", + "_fini", + "__libc_csu", + "__asan", + "__msan", + "__cmplog", + "__sancov", + "msan.", + "LLVMFuzzer", + "__decide_deferred", + "maybe_duplicate_stderr", + "discard_output", + "close_stdout", + "dup_and_close_stderr", + "maybe_close_fd_mask", + "ExecuteFilesOnyByOne" + + }; + + for (auto const &ignoreListFunc : ignoreList) { + + if (F->getName().startswith(ignoreListFunc)) { return true; } + + } + + return false; + +} + +void initInstrumentList() { + + char *allowlist = getenv("AFL_LLVM_ALLOWLIST"); + if (!allowlist) allowlist = getenv("AFL_LLVM_INSTRUMENT_FILE"); + if (!allowlist) allowlist = getenv("AFL_LLVM_WHITELIST"); + char *denylist = getenv("AFL_LLVM_DENYLIST"); + if (!denylist) denylist = getenv("AFL_LLVM_BLOCKLIST"); + + if (allowlist && denylist) + FATAL( + "You can only specify either AFL_LLVM_ALLOWLIST or AFL_LLVM_DENYLIST " + "but not both!"); + + if (allowlist) { + + std::string line; + std::ifstream fileStream; + fileStream.open(allowlist); + if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_ALLOWLIST"); + getline(fileStream, line); + + while (fileStream) { + + int is_file = -1; + std::size_t npos; + std::string original_line = line; + + line.erase(std::remove_if(line.begin(), line.end(), ::isspace), + line.end()); + + // remove # and following + if ((npos = line.find("#")) != std::string::npos) + line = line.substr(0, npos); + + if (line.compare(0, 4, "fun:") == 0) { + + is_file = 0; + line = line.substr(4); + + } else if (line.compare(0, 9, "function:") == 0) { + + is_file = 0; + line = line.substr(9); + + } else if (line.compare(0, 4, "src:") == 0) { + + is_file = 1; + line = line.substr(4); + + } else if (line.compare(0, 7, "source:") == 0) { + + is_file = 1; + line = line.substr(7); + + } + + if (line.find(":") != std::string::npos) { + + FATAL("invalid line in AFL_LLVM_ALLOWLIST: %s", original_line.c_str()); + + } + + if (line.length() > 0) { + + // if the entry contains / or . it must be a file + if (is_file == -1) + if (line.find("/") != std::string::npos || + line.find(".") != std::string::npos) + is_file = 1; + // otherwise it is a function + + if (is_file == 1) + allowListFiles.push_back(line); + else + allowListFunctions.push_back(line); + getline(fileStream, line); + + } + + } + + if (debug) + SAYF(cMGN "[D] " cRST + "loaded allowlist with %zu file and %zu function entries\n", + allowListFiles.size(), allowListFunctions.size()); + + } + + if (denylist) { + + std::string line; + std::ifstream fileStream; + fileStream.open(denylist); + if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_DENYLIST"); + getline(fileStream, line); + + while (fileStream) { + + int is_file = -1; + std::size_t npos; + std::string original_line = line; + + line.erase(std::remove_if(line.begin(), line.end(), ::isspace), + line.end()); + + // remove # and following + if ((npos = line.find("#")) != std::string::npos) + line = line.substr(0, npos); + + if (line.compare(0, 4, "fun:") == 0) { + + is_file = 0; + line = line.substr(4); + + } else if (line.compare(0, 9, "function:") == 0) { + + is_file = 0; + line = line.substr(9); + + } else if (line.compare(0, 4, "src:") == 0) { + + is_file = 1; + line = line.substr(4); + + } else if (line.compare(0, 7, "source:") == 0) { + + is_file = 1; + line = line.substr(7); + + } + + if (line.find(":") != std::string::npos) { + + FATAL("invalid line in AFL_LLVM_DENYLIST: %s", original_line.c_str()); + + } + + if (line.length() > 0) { + + // if the entry contains / or . it must be a file + if (is_file == -1) + if (line.find("/") != std::string::npos || + line.find(".") != std::string::npos) + is_file = 1; + // otherwise it is a function + + if (is_file == 1) + denyListFiles.push_back(line); + else + denyListFunctions.push_back(line); + getline(fileStream, line); + + } + + } + + if (debug) + SAYF(cMGN "[D] " cRST + "loaded denylist with %zu file and %zu function entries\n", + denyListFiles.size(), denyListFunctions.size()); + + } + +} + +void scanForDangerousFunctions(llvm::Module *M) { + + if (!M) return; + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9) + + for (GlobalIFunc &IF : M->ifuncs()) { + + StringRef ifunc_name = IF.getName(); + Constant *r = IF.getResolver(); + StringRef r_name = cast<Function>(r->getOperand(0))->getName(); + if (!be_quiet) + fprintf(stderr, + "Info: Found an ifunc with name %s that points to resolver " + "function %s, we will not instrument this, putting it into the " + "block list.\n", + ifunc_name.str().c_str(), r_name.str().c_str()); + denyListFunctions.push_back(r_name.str()); + + } + + GlobalVariable *GV = M->getNamedGlobal("llvm.global_ctors"); + if (GV && !GV->isDeclaration() && !GV->hasLocalLinkage()) { + + ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + + if (InitList) { + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + + if (ConstantStruct *CS = + dyn_cast<ConstantStruct>(InitList->getOperand(i))) { + + if (CS->getNumOperands() >= 2) { + + if (CS->getOperand(1)->isNullValue()) + break; // Found a null terminator, stop here. + + ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0)); + int Priority = CI ? CI->getSExtValue() : 0; + + Constant *FP = CS->getOperand(1); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP)) + if (CE->isCast()) FP = CE->getOperand(0); + if (Function *F = dyn_cast<Function>(FP)) { + + if (!F->isDeclaration() && + strncmp(F->getName().str().c_str(), "__afl", 5) != 0) { + + if (!be_quiet) + fprintf(stderr, + "Info: Found constructor function %s with prio " + "%u, we will not instrument this, putting it into a " + "block list.\n", + F->getName().str().c_str(), Priority); + denyListFunctions.push_back(F->getName().str()); + + } + + } + + } + + } + + } + + } + + } + +#endif + +} + +static std::string getSourceName(llvm::Function *F) { + + // let's try to get the filename for the function + auto bb = &F->getEntryBlock(); + BasicBlock::iterator IP = bb->getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + DebugLoc Loc = IP->getDebugLoc(); + +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) + if (Loc) { + + StringRef instFilename; + DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode()); + + if (cDILoc) { instFilename = cDILoc->getFilename(); } + + if (instFilename.str().empty()) { + + /* If the original location is empty, try using the inlined location + */ + DILocation *oDILoc = cDILoc->getInlinedAt(); + if (oDILoc) { instFilename = oDILoc->getFilename(); } + + } + + return instFilename.str(); + + } + +#else + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(F->getContext())); + + StringRef instFilename = cDILoc.getFilename(); + + /* Continue only if we know where we actually are */ + return instFilename.str(); + + } + +#endif + + return std::string(""); + +} + +bool isInInstrumentList(llvm::Function *F) { + + bool return_default = true; + + // is this a function with code? If it is external we don't instrument it + // anyway and it can't be in the instrument file list. Or if it is it is + // ignored. + if (!F->size() || isIgnoreFunction(F)) return false; + + if (!denyListFiles.empty() || !denyListFunctions.empty()) { + + if (!denyListFunctions.empty()) { + + std::string instFunction = F->getName().str(); + + for (std::list<std::string>::iterator it = denyListFunctions.begin(); + it != denyListFunctions.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. We also allow UNIX-style pattern + * matching */ + + if (instFunction.length() >= it->length()) { + + if (fnmatch(("*" + *it).c_str(), instFunction.c_str(), 0) == 0) { + + if (debug) + SAYF(cMGN "[D] " cRST + "Function %s is in the deny function list, " + "not instrumenting ... \n", + instFunction.c_str()); + return false; + + } + + } + + } + + } + + if (!denyListFiles.empty()) { + + std::string source_file = getSourceName(F); + + if (!source_file.empty()) { + + for (std::list<std::string>::iterator it = denyListFiles.begin(); + it != denyListFiles.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. We also allow UNIX-style pattern + * matching */ + + if (source_file.length() >= it->length()) { + + if (fnmatch(("*" + *it).c_str(), source_file.c_str(), 0) == 0) { + + return false; + + } + + } + + } + + } else { + + // we could not find out the location. in this case we say it is not + // in the instrument file list + if (!be_quiet) + WARNF( + "No debug information found for function %s, will be " + "instrumented (recompile with -g -O[1-3]).", + F->getName().str().c_str()); + + } + + } + + } + + // if we do not have a instrument file list return true + if (!allowListFiles.empty() || !allowListFunctions.empty()) { + + return_default = false; + + if (!allowListFunctions.empty()) { + + std::string instFunction = F->getName().str(); + + for (std::list<std::string>::iterator it = allowListFunctions.begin(); + it != allowListFunctions.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. We also allow UNIX-style pattern + * matching */ + + if (instFunction.length() >= it->length()) { + + if (fnmatch(("*" + *it).c_str(), instFunction.c_str(), 0) == 0) { + + if (debug) + SAYF(cMGN "[D] " cRST + "Function %s is in the allow function list, " + "instrumenting ... \n", + instFunction.c_str()); + return true; + + } + + } + + } + + } + + if (!allowListFiles.empty()) { + + std::string source_file = getSourceName(F); + + if (!source_file.empty()) { + + for (std::list<std::string>::iterator it = allowListFiles.begin(); + it != allowListFiles.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. We also allow UNIX-style pattern + * matching */ + + if (source_file.length() >= it->length()) { + + if (fnmatch(("*" + *it).c_str(), source_file.c_str(), 0) == 0) { + + if (debug) + SAYF(cMGN "[D] " cRST + "Function %s is in the allowlist (%s), " + "instrumenting ... \n", + F->getName().str().c_str(), source_file.c_str()); + return true; + + } + + } + + } + + } else { + + // we could not find out the location. In this case we say it is not + // in the instrument file list + if (!be_quiet) + WARNF( + "No debug information found for function %s, will not be " + "instrumented (recompile with -g -O[1-3]).", + F->getName().str().c_str()); + return false; + + } + + } + + } + + return return_default; + +} + +// Calculate the number of average collisions that would occur if all +// location IDs would be assigned randomly (like normal afl/afl++). +// This uses the "balls in bins" algorithm. +unsigned long long int calculateCollisions(uint32_t edges) { + + double bins = MAP_SIZE; + double balls = edges; + double step1 = 1 - (1 / bins); + double step2 = pow(step1, balls); + double step3 = bins * step2; + double step4 = round(step3); + unsigned long long int empty = step4; + unsigned long long int collisions = edges - (MAP_SIZE - empty); + return collisions; + +} + diff --git a/instrumentation/afl-llvm-common.h b/instrumentation/afl-llvm-common.h new file mode 100644 index 00000000..a1561d9c --- /dev/null +++ b/instrumentation/afl-llvm-common.h @@ -0,0 +1,52 @@ +#ifndef __AFLLLVMCOMMON_H +#define __AFLLLVMCOMMON_H + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <list> +#include <string> +#include <fstream> +#include <sys/time.h> + +#include "llvm/Config/llvm-config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 +typedef long double max_align_t; +#endif + +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) + #include "llvm/IR/DebugInfo.h" + #include "llvm/IR/CFG.h" +#else + #include "llvm/DebugInfo.h" + #include "llvm/Support/CFG.h" +#endif + +char * getBBName(const llvm::BasicBlock *BB); +bool isIgnoreFunction(const llvm::Function *F); +void initInstrumentList(); +bool isInInstrumentList(llvm::Function *F); +unsigned long long int calculateCollisions(uint32_t edges); +void scanForDangerousFunctions(llvm::Module *M); + +#ifndef IS_EXTERN + #define IS_EXTERN +#endif + +IS_EXTERN int debug; +IS_EXTERN int be_quiet; + +#undef IS_EXTERN + +#endif + diff --git a/instrumentation/afl-llvm-dict2file.so.cc b/instrumentation/afl-llvm-dict2file.so.cc new file mode 100644 index 00000000..2f9f44ca --- /dev/null +++ b/instrumentation/afl-llvm-dict2file.so.cc @@ -0,0 +1,608 @@ +/* + american fuzzy lop++ - LLVM LTO instrumentation pass + ---------------------------------------------------- + + Written by Marc Heuse <mh@mh-sec.de> + + Copyright 2019-2020 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + This library is plugged into LLVM when invoking clang through afl-clang-lto. + + */ + +#define AFL_LLVM_PASS + +#include "config.h" +#include "debug.h" + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <ctype.h> + +#include <list> +#include <string> +#include <fstream> +#include <set> + +#include "llvm/Config/llvm-config.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Pass.h" +#include "llvm/IR/Constants.h" + +#include "afl-llvm-common.h" + +#ifndef O_DSYNC + #define O_DSYNC O_SYNC +#endif + +using namespace llvm; + +namespace { + +class AFLdict2filePass : public ModulePass { + + public: + static char ID; + + AFLdict2filePass() : ModulePass(ID) { + + if (getenv("AFL_DEBUG")) debug = 1; + + } + + bool runOnModule(Module &M) override; + +}; + +} // namespace + +void dict2file(int fd, u8 *mem, u32 len) { + + u32 i, j, binary = 0; + char line[MAX_AUTO_EXTRA * 8], tmp[8]; + + strcpy(line, "\""); + j = 1; + for (i = 0; i < len; i++) { + + if (isprint(mem[i])) { + + line[j++] = mem[i]; + + } else { + + if (i + 1 != len || mem[i] != 0 || binary || len == 4 || len == 8) { + + line[j] = 0; + sprintf(tmp, "\\x%02x", (u8)mem[i]); + strcat(line, tmp); + j = strlen(line); + + } + + binary = 1; + + } + + } + + line[j] = 0; + strcat(line, "\"\n"); + if (write(fd, line, strlen(line)) <= 0) + PFATAL("Could not write to dictionary file"); + fsync(fd); + + if (!be_quiet) fprintf(stderr, "Found dictionary token: %s", line); + +} + +bool AFLdict2filePass::runOnModule(Module &M) { + + DenseMap<Value *, std::string *> valueMap; + char * ptr; + int fd, found = 0; + + /* Show a banner */ + setvbuf(stdout, NULL, _IONBF, 0); + + if ((isatty(2) && !getenv("AFL_QUIET")) || debug) { + + SAYF(cCYA "afl-llvm-dict2file" VERSION cRST + " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n"); + + } else + + be_quiet = 1; + + scanForDangerousFunctions(&M); + + ptr = getenv("AFL_LLVM_DICT2FILE"); + + if (!ptr || *ptr != '/') + FATAL("AFL_LLVM_DICT2FILE is not set to an absolute path: %s", ptr); + + if ((fd = open(ptr, O_WRONLY | O_APPEND | O_CREAT | O_DSYNC, 0644)) < 0) + PFATAL("Could not open/create %s.", ptr); + + /* Instrument all the things! */ + + for (auto &F : M) { + + if (isIgnoreFunction(&F)) continue; + + /* Some implementation notes. + * + * We try to handle 3 cases: + * - memcmp("foo", arg, 3) <- literal string + * - static char globalvar[] = "foo"; + * memcmp(globalvar, arg, 3) <- global variable + * - char localvar[] = "foo"; + * memcmp(locallvar, arg, 3) <- local variable + * + * The local variable case is the hardest. We can only detect that + * case if there is no reassignment or change in the variable. + * And it might not work across llvm version. + * What we do is hooking the initializer function for local variables + * (llvm.memcpy.p0i8.p0i8.i64) and note the string and the assigned + * variable. And if that variable is then used in a compare function + * we use that noted string. + * This seems not to work for tokens that have a size <= 4 :-( + * + * - if the compared length is smaller than the string length we + * save the full string. This is likely better for fuzzing but + * might be wrong in a few cases depending on optimizers + * + * - not using StringRef because there is a bug in the llvm 11 + * checkout I am using which sometimes points to wrong strings + * + * Over and out. Took me a full day. damn. mh/vh + */ + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CallInst *callInst = nullptr; + CmpInst * cmpInst = nullptr; + + if ((cmpInst = dyn_cast<CmpInst>(&IN))) { + + Value * op = cmpInst->getOperand(1); + ConstantInt *ilen = dyn_cast<ConstantInt>(op); + + if (ilen) { + + u64 val2 = 0, val = ilen->getZExtValue(); + u32 len = 0; + if (val > 0x10000 && val < 0xffffffff) len = 4; + if (val > 0x100000001 && val < 0xffffffffffffffff) len = 8; + + if (len) { + + auto c = cmpInst->getPredicate(); + + switch (c) { + + case CmpInst::FCMP_OGT: // fall through + case CmpInst::FCMP_OLE: // fall through + case CmpInst::ICMP_SLE: // fall through + case CmpInst::ICMP_SGT: + + // signed comparison and it is a negative constant + if ((len == 4 && (val & 80000000)) || + (len == 8 && (val & 8000000000000000))) { + + if ((val & 0xffff) != 1) val2 = val - 1; + break; + + } + + // fall through + + case CmpInst::FCMP_UGT: // fall through + case CmpInst::FCMP_ULE: // fall through + case CmpInst::ICMP_UGT: // fall through + case CmpInst::ICMP_ULE: + if ((val & 0xffff) != 0xfffe) val2 = val + 1; + break; + + case CmpInst::FCMP_OLT: // fall through + case CmpInst::FCMP_OGE: // fall through + case CmpInst::ICMP_SLT: // fall through + case CmpInst::ICMP_SGE: + + // signed comparison and it is a negative constant + if ((len == 4 && (val & 80000000)) || + (len == 8 && (val & 8000000000000000))) { + + if ((val & 0xffff) != 1) val2 = val - 1; + break; + + } + + // fall through + + case CmpInst::FCMP_ULT: // fall through + case CmpInst::FCMP_UGE: // fall through + case CmpInst::ICMP_ULT: // fall through + case CmpInst::ICMP_UGE: + if ((val & 0xffff) != 1) val2 = val - 1; + break; + + default: + val2 = 0; + + } + + dict2file(fd, (u8 *)&val, len); + found++; + if (val2) { + + dict2file(fd, (u8 *)&val2, len); + found++; + + } + + } + + } + + } + + if ((callInst = dyn_cast<CallInst>(&IN))) { + + bool isStrcmp = true; + bool isMemcmp = true; + bool isStrncmp = true; + bool isStrcasecmp = true; + bool isStrncasecmp = true; + bool isIntMemcpy = true; + bool isStdString = true; + bool addedNull = false; + size_t optLen = 0; + + Function *Callee = callInst->getCalledFunction(); + if (!Callee) continue; + if (callInst->getCallingConv() != llvm::CallingConv::C) continue; + std::string FuncName = Callee->getName().str(); + isStrcmp &= !FuncName.compare("strcmp"); + isMemcmp &= + (!FuncName.compare("memcmp") || !FuncName.compare("bcmp")); + isStrncmp &= !FuncName.compare("strncmp"); + isStrcasecmp &= !FuncName.compare("strcasecmp"); + isStrncasecmp &= !FuncName.compare("strncasecmp"); + isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64"); + isStdString &= ((FuncName.find("basic_string") != std::string::npos && + FuncName.find("compare") != std::string::npos) || + (FuncName.find("basic_string") != std::string::npos && + FuncName.find("find") != std::string::npos)); + + if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && + !isStrncasecmp && !isIntMemcpy && !isStdString) + continue; + + /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function + * prototype */ + FunctionType *FT = Callee->getFunctionType(); + + isStrcmp &= + FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext()); + isStrcasecmp &= + FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext()); + isMemcmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy() && + FT->getParamType(2)->isIntegerTy(); + isStrncmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + isStrncasecmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + isStdString &= FT->getNumParams() >= 2 && + FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy(); + + if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && + !isStrncasecmp && !isIntMemcpy && !isStdString) + continue; + + /* is a str{n,}{case,}cmp/memcmp, check if we have + * str{case,}cmp(x, "const") or str{case,}cmp("const", x) + * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..) + * memcmp(x, "const", ..) or memcmp("const", x, ..) */ + Value *Str1P = callInst->getArgOperand(0), + *Str2P = callInst->getArgOperand(1); + std::string Str1, Str2; + StringRef TmpStr; + bool HasStr1 = getConstantStringInfo(Str1P, TmpStr); + if (TmpStr.empty()) { + + HasStr1 = false; + + } else { + + HasStr1 = true; + Str1 = TmpStr.str(); + + } + + bool HasStr2 = getConstantStringInfo(Str2P, TmpStr); + if (TmpStr.empty()) { + + HasStr2 = false; + + } else { + + HasStr2 = true; + Str2 = TmpStr.str(); + + } + + if (debug) + fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n", + FuncName.c_str(), Str1P, Str1P->getName().str().c_str(), + Str1.c_str(), HasStr1 == true ? "true" : "false", Str2P, + Str2P->getName().str().c_str(), Str2.c_str(), + HasStr2 == true ? "true" : "false"); + + // we handle the 2nd parameter first because of llvm memcpy + if (!HasStr2) { + + auto *Ptr = dyn_cast<ConstantExpr>(Str2P); + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (Var->hasInitializer()) { + + if (auto *Array = + dyn_cast<ConstantDataArray>(Var->getInitializer())) { + + HasStr2 = true; + Str2 = Array->getAsString().str(); + + } + + } + + } + + } + + } + + // for the internal memcpy routine we only care for the second + // parameter and are not reporting anything. + if (isIntMemcpy == true) { + + if (HasStr2 == true) { + + Value * op2 = callInst->getArgOperand(2); + ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { + + uint64_t literalLength = Str2.size(); + uint64_t optLength = ilen->getZExtValue(); + if (literalLength + 1 == optLength) { + + Str2.append("\0", 1); // add null byte + addedNull = true; + + } + + } + + valueMap[Str1P] = new std::string(Str2); + + if (debug) + fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(), Str1P); + continue; + + } + + continue; + + } + + // Neither a literal nor a global variable? + // maybe it is a local variable that we saved + if (!HasStr2) { + + std::string *strng = valueMap[Str2P]; + if (strng && !strng->empty()) { + + Str2 = *strng; + HasStr2 = true; + if (debug) + fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(), Str2P); + + } + + } + + if (!HasStr1) { + + auto Ptr = dyn_cast<ConstantExpr>(Str1P); + + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (Var->hasInitializer()) { + + if (auto *Array = + dyn_cast<ConstantDataArray>(Var->getInitializer())) { + + HasStr1 = true; + Str1 = Array->getAsString().str(); + + } + + } + + } + + } + + } + + // Neither a literal nor a global variable? + // maybe it is a local variable that we saved + if (!HasStr1) { + + std::string *strng = valueMap[Str1P]; + if (strng && !strng->empty()) { + + Str1 = *strng; + HasStr1 = true; + if (debug) + fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(), Str1P); + + } + + } + + /* handle cases of one string is const, one string is variable */ + if (!(HasStr1 ^ HasStr2)) continue; + + std::string thestring; + + if (HasStr1) + thestring = Str1; + else + thestring = Str2; + + optLen = thestring.length(); + + if (isMemcmp || isStrncmp || isStrncasecmp) { + + Value * op2 = callInst->getArgOperand(2); + ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { + + uint64_t literalLength = optLen; + optLen = ilen->getZExtValue(); + if (literalLength + 1 == optLen) { // add null byte + thestring.append("\0", 1); + addedNull = true; + + } + + } + + } + + // add null byte if this is a string compare function and a null + // was not already added + if (!isMemcmp) { + + if (addedNull == false) { + + thestring.append("\0", 1); // add null byte + optLen++; + + } + + // ensure we do not have garbage + size_t offset = thestring.find('\0', 0); + if (offset + 1 < optLen) optLen = offset + 1; + thestring = thestring.substr(0, optLen); + + } + + // we take the longer string, even if the compare was to a + // shorter part. Note that depending on the optimizer of the + // compiler this can be wrong, but it is more likely that this + // is helping the fuzzer + if (optLen != thestring.length()) optLen = thestring.length(); + if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA; + if (optLen < 3) // too short? skip + continue; + + ptr = (char *)thestring.c_str(); + + dict2file(fd, (u8 *)ptr, optLen); + found++; + + } + + } + + } + + } + + close(fd); + + /* Say something nice. */ + + if (!be_quiet) { + + if (!found) + OKF("No entries for a dictionary found."); + else + OKF("Wrote %d entries to the dictionary file.\n", found); + + } + + return true; + +} + +char AFLdict2filePass::ID = 0; + +static void registerAFLdict2filePass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + PM.add(new AFLdict2filePass()); + +} + +static RegisterPass<AFLdict2filePass> X("afl-dict2file", + "afl++ dict2file instrumentation pass", + false, false); + +static RegisterStandardPasses RegisterAFLdict2filePass( + PassManagerBuilder::EP_OptimizerLast, registerAFLdict2filePass); + +static RegisterStandardPasses RegisterAFLdict2filePass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLdict2filePass); + diff --git a/instrumentation/afl-llvm-lto-instrumentation.so.cc b/instrumentation/afl-llvm-lto-instrumentation.so.cc new file mode 100644 index 00000000..2f936c29 --- /dev/null +++ b/instrumentation/afl-llvm-lto-instrumentation.so.cc @@ -0,0 +1,1060 @@ +/* + american fuzzy lop++ - LLVM LTO instrumentation pass + ---------------------------------------------------- + + Written by Marc Heuse <mh@mh-sec.de> + + Copyright 2019-2020 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + This library is plugged into LLVM when invoking clang through afl-clang-lto. + + */ + +#define AFL_LLVM_PASS + +#include "config.h" +#include "debug.h" + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <sys/time.h> + +#include <list> +#include <string> +#include <fstream> +#include <set> +#include <iostream> + +#include "llvm/Config/llvm-config.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Pass.h" +#include "llvm/IR/Constants.h" + +#include "afl-llvm-common.h" + +using namespace llvm; + +namespace { + +class AFLLTOPass : public ModulePass { + + public: + static char ID; + + AFLLTOPass() : ModulePass(ID) { + + char *ptr; + + if (getenv("AFL_DEBUG")) debug = 1; + if ((ptr = getenv("AFL_LLVM_LTO_STARTID")) != NULL) + if ((afl_global_id = atoi(ptr)) < 0 || afl_global_id >= MAP_SIZE) + FATAL("AFL_LLVM_LTO_STARTID value of \"%s\" is not between 0 and %d\n", + ptr, MAP_SIZE - 1); + + skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO"); + + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + + ModulePass::getAnalysisUsage(AU); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + + } + + bool runOnModule(Module &M) override; + + protected: + int afl_global_id = 1, autodictionary = 1; + uint32_t function_minimum_size = 1; + uint32_t inst_blocks = 0, inst_funcs = 0, total_instr = 0; + uint64_t map_addr = 0x10000; + char * skip_nozero = NULL; + +}; + +} // namespace + +bool AFLLTOPass::runOnModule(Module &M) { + + LLVMContext & C = M.getContext(); + std::vector<std::string> dictionary; + std::vector<CallInst *> calls; + DenseMap<Value *, std::string *> valueMap; + std::vector<BasicBlock *> BlockList; + char * ptr; + FILE * documentFile = NULL; + size_t found = 0; + + srand((unsigned int)time(NULL)); + + unsigned long long int moduleID = + (((unsigned long long int)(rand() & 0xffffffff)) << 32) | getpid(); + + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + IntegerType *Int64Ty = IntegerType::getInt64Ty(C); + + /* Show a banner */ + setvbuf(stdout, NULL, _IONBF, 0); + + if ((isatty(2) && !getenv("AFL_QUIET")) || debug) { + + SAYF(cCYA "afl-llvm-lto" VERSION cRST + " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n"); + + } else + + be_quiet = 1; + + if ((ptr = getenv("AFL_LLVM_DOCUMENT_IDS")) != NULL) { + + if ((documentFile = fopen(ptr, "a")) == NULL) + WARNF("Cannot access document file %s", ptr); + + } + + // we make this the default as the fixed map has problems with + // defered forkserver, early constructors, ifuncs and maybe more + /*if (getenv("AFL_LLVM_MAP_DYNAMIC"))*/ + map_addr = 0; + + if ((ptr = getenv("AFL_LLVM_MAP_ADDR"))) { + + uint64_t val; + if (!*ptr || !strcmp(ptr, "0") || !strcmp(ptr, "0x0")) { + + map_addr = 0; + + } else if (getenv("AFL_LLVM_MAP_DYNAMIC")) { + + FATAL( + "AFL_LLVM_MAP_ADDR and AFL_LLVM_MAP_DYNAMIC cannot be used together"); + + } else if (strncmp(ptr, "0x", 2) != 0) { + + map_addr = 0x10000; // the default + + } else { + + val = strtoull(ptr, NULL, 16); + if (val < 0x100 || val > 0xffffffff00000000) { + + FATAL( + "AFL_LLVM_MAP_ADDR must be a value between 0x100 and " + "0xffffffff00000000"); + + } + + map_addr = val; + + } + + } + + if (debug) { fprintf(stderr, "map address is 0x%lx\n", map_addr); } + + /* Get/set the globals for the SHM region. */ + + GlobalVariable *AFLMapPtr = NULL; + Value * MapPtrFixed = NULL; + + if (!map_addr) { + + AFLMapPtr = + new GlobalVariable(M, PointerType::get(Int8Ty, 0), false, + GlobalValue::ExternalLinkage, 0, "__afl_area_ptr"); + + } else { + + ConstantInt *MapAddr = ConstantInt::get(Int64Ty, map_addr); + MapPtrFixed = + ConstantExpr::getIntToPtr(MapAddr, PointerType::getUnqual(Int8Ty)); + + } + + ConstantInt *Zero = ConstantInt::get(Int8Ty, 0); + ConstantInt *One = ConstantInt::get(Int8Ty, 1); + + // This dumps all inialized global strings - might be useful in the future + /* + for (auto G=M.getGlobalList().begin(); G!=M.getGlobalList().end(); G++) { + + GlobalVariable &GV=*G; + if (!GV.getName().str().empty()) { + + fprintf(stderr, "Global Variable: %s", GV.getName().str().c_str()); + if (GV.hasInitializer()) + if (auto *Val = dyn_cast<ConstantDataArray>(GV.getInitializer())) + fprintf(stderr, " Value: \"%s\"", Val->getAsString().str().c_str()); + fprintf(stderr, "\n"); + + } + + } + + */ + + scanForDangerousFunctions(&M); + + /* Instrument all the things! */ + + int inst_blocks = 0; + + for (auto &F : M) { + + /*For debugging + AttributeSet X = F.getAttributes().getFnAttributes(); + fprintf(stderr, "DEBUG: Module %s Function %s attributes %u\n", + M.getName().str().c_str(), F.getName().str().c_str(), + X.getNumAttributes()); + */ + + if (F.size() < function_minimum_size) continue; + if (isIgnoreFunction(&F)) continue; + + // the instrument file list check + AttributeList Attrs = F.getAttributes(); + if (Attrs.hasAttribute(-1, StringRef("skipinstrument"))) { + + if (debug) + fprintf(stderr, + "DEBUG: Function %s is not in a source file that was specified " + "in the instrument file list\n", + F.getName().str().c_str()); + continue; + + } + + std::vector<BasicBlock *> InsBlocks; + + if (autodictionary) { + + /* Some implementation notes. + * + * We try to handle 3 cases: + * - memcmp("foo", arg, 3) <- literal string + * - static char globalvar[] = "foo"; + * memcmp(globalvar, arg, 3) <- global variable + * - char localvar[] = "foo"; + * memcmp(locallvar, arg, 3) <- local variable + * + * The local variable case is the hardest. We can only detect that + * case if there is no reassignment or change in the variable. + * And it might not work across llvm version. + * What we do is hooking the initializer function for local variables + * (llvm.memcpy.p0i8.p0i8.i64) and note the string and the assigned + * variable. And if that variable is then used in a compare function + * we use that noted string. + * This seems not to work for tokens that have a size <= 4 :-( + * + * - if the compared length is smaller than the string length we + * save the full string. This is likely better for fuzzing but + * might be wrong in a few cases depending on optimizers + * + * - not using StringRef because there is a bug in the llvm 11 + * checkout I am using which sometimes points to wrong strings + * + * Over and out. Took me a full day. damn. mh/vh + */ + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CallInst *callInst = nullptr; + CmpInst * cmpInst = nullptr; + + if ((cmpInst = dyn_cast<CmpInst>(&IN))) { + + Value * op = cmpInst->getOperand(1); + ConstantInt *ilen = dyn_cast<ConstantInt>(op); + + if (ilen) { + + u64 val2 = 0, val = ilen->getZExtValue(); + u32 len = 0; + if (val > 0x10000 && val < 0xffffffff) len = 4; + if (val > 0x100000001 && val < 0xffffffffffffffff) len = 8; + + if (len) { + + auto c = cmpInst->getPredicate(); + + switch (c) { + + case CmpInst::FCMP_OGT: // fall through + case CmpInst::FCMP_OLE: // fall through + case CmpInst::ICMP_SLE: // fall through + case CmpInst::ICMP_SGT: + + // signed comparison and it is a negative constant + if ((len == 4 && (val & 80000000)) || + (len == 8 && (val & 8000000000000000))) { + + if ((val & 0xffff) != 1) val2 = val - 1; + break; + + } + + // fall through + + case CmpInst::FCMP_UGT: // fall through + case CmpInst::FCMP_ULE: // fall through + case CmpInst::ICMP_UGT: // fall through + case CmpInst::ICMP_ULE: + if ((val & 0xffff) != 0xfffe) val2 = val + 1; + break; + + case CmpInst::FCMP_OLT: // fall through + case CmpInst::FCMP_OGE: // fall through + case CmpInst::ICMP_SLT: // fall through + case CmpInst::ICMP_SGE: + + // signed comparison and it is a negative constant + if ((len == 4 && (val & 80000000)) || + (len == 8 && (val & 8000000000000000))) { + + if ((val & 0xffff) != 1) val2 = val - 1; + break; + + } + + // fall through + + case CmpInst::FCMP_ULT: // fall through + case CmpInst::FCMP_UGE: // fall through + case CmpInst::ICMP_ULT: // fall through + case CmpInst::ICMP_UGE: + if ((val & 0xffff) != 1) val2 = val - 1; + break; + + default: + val2 = 0; + + } + + dictionary.push_back(std::string((char *)&val, len)); + found++; + + if (val2) { + + dictionary.push_back(std::string((char *)&val2, len)); + found++; + + } + + } + + } + + } + + if ((callInst = dyn_cast<CallInst>(&IN))) { + + bool isStrcmp = true; + bool isMemcmp = true; + bool isStrncmp = true; + bool isStrcasecmp = true; + bool isStrncasecmp = true; + bool isIntMemcpy = true; + bool isStdString = true; + bool addedNull = false; + size_t optLen = 0; + + Function *Callee = callInst->getCalledFunction(); + if (!Callee) continue; + if (callInst->getCallingConv() != llvm::CallingConv::C) continue; + std::string FuncName = Callee->getName().str(); + isStrcmp &= !FuncName.compare("strcmp"); + isMemcmp &= + (!FuncName.compare("memcmp") || !FuncName.compare("bcmp")); + isStrncmp &= !FuncName.compare("strncmp"); + isStrcasecmp &= !FuncName.compare("strcasecmp"); + isStrncasecmp &= !FuncName.compare("strncasecmp"); + isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64"); + isStdString &= + ((FuncName.find("basic_string") != std::string::npos && + FuncName.find("compare") != std::string::npos) || + (FuncName.find("basic_string") != std::string::npos && + FuncName.find("find") != std::string::npos)); + + /* we do something different here, putting this BB and the + successors in a block map */ + if (!FuncName.compare("__afl_persistent_loop")) { + + BlockList.push_back(&BB); + /* + for (succ_iterator SI = succ_begin(&BB), SE = + succ_end(&BB); SI != SE; ++SI) { + + BasicBlock *succ = *SI; + BlockList.push_back(succ); + + } + + */ + + } + + if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && + !isStrncasecmp && !isIntMemcpy && !isStdString) + continue; + + /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function + * prototype */ + FunctionType *FT = Callee->getFunctionType(); + + isStrcmp &= FT->getNumParams() == 2 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()); + isStrcasecmp &= FT->getNumParams() == 2 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()); + isMemcmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy() && + FT->getParamType(2)->isIntegerTy(); + isStrncmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + isStrncasecmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + isStdString &= FT->getNumParams() >= 2 && + FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy(); + + if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && + !isStrncasecmp && !isIntMemcpy && !isStdString) + continue; + + /* is a str{n,}{case,}cmp/memcmp, check if we have + * str{case,}cmp(x, "const") or str{case,}cmp("const", x) + * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..) + * memcmp(x, "const", ..) or memcmp("const", x, ..) */ + Value *Str1P = callInst->getArgOperand(0), + *Str2P = callInst->getArgOperand(1); + std::string Str1, Str2; + StringRef TmpStr; + bool HasStr1 = getConstantStringInfo(Str1P, TmpStr); + if (TmpStr.empty()) { + + HasStr1 = false; + + } else { + + HasStr1 = true; + Str1 = TmpStr.str(); + + } + + bool HasStr2 = getConstantStringInfo(Str2P, TmpStr); + if (TmpStr.empty()) { + + HasStr2 = false; + + } else { + + HasStr2 = true; + Str2 = TmpStr.str(); + + } + + if (debug) + fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n", + FuncName.c_str(), Str1P, Str1P->getName().str().c_str(), + Str1.c_str(), HasStr1 == true ? "true" : "false", Str2P, + Str2P->getName().str().c_str(), Str2.c_str(), + HasStr2 == true ? "true" : "false"); + + // we handle the 2nd parameter first because of llvm memcpy + if (!HasStr2) { + + auto *Ptr = dyn_cast<ConstantExpr>(Str2P); + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (Var->hasInitializer()) { + + if (auto *Array = dyn_cast<ConstantDataArray>( + Var->getInitializer())) { + + HasStr2 = true; + Str2 = Array->getAsString().str(); + + } + + } + + } + + } + + } + + // for the internal memcpy routine we only care for the second + // parameter and are not reporting anything. + if (isIntMemcpy == true) { + + if (HasStr2 == true) { + + Value * op2 = callInst->getArgOperand(2); + ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { + + uint64_t literalLength = Str2.size(); + uint64_t optLength = ilen->getZExtValue(); + if (literalLength + 1 == optLength) { + + Str2.append("\0", 1); // add null byte + addedNull = true; + + } + + } + + valueMap[Str1P] = new std::string(Str2); + + if (debug) + fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(), Str1P); + continue; + + } + + continue; + + } + + // Neither a literal nor a global variable? + // maybe it is a local variable that we saved + if (!HasStr2) { + + std::string *strng = valueMap[Str2P]; + if (strng && !strng->empty()) { + + Str2 = *strng; + HasStr2 = true; + if (debug) + fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(), + Str2P); + + } + + } + + if (!HasStr1) { + + auto Ptr = dyn_cast<ConstantExpr>(Str1P); + + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (Var->hasInitializer()) { + + if (auto *Array = dyn_cast<ConstantDataArray>( + Var->getInitializer())) { + + HasStr1 = true; + Str1 = Array->getAsString().str(); + + } + + } + + } + + } + + } + + // Neither a literal nor a global variable? + // maybe it is a local variable that we saved + if (!HasStr1) { + + std::string *strng = valueMap[Str1P]; + if (strng && !strng->empty()) { + + Str1 = *strng; + HasStr1 = true; + if (debug) + fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(), + Str1P); + + } + + } + + /* handle cases of one string is const, one string is variable */ + if (!(HasStr1 ^ HasStr2)) continue; + + std::string thestring; + + if (HasStr1) + thestring = Str1; + else + thestring = Str2; + + optLen = thestring.length(); + + if (isMemcmp || isStrncmp || isStrncasecmp) { + + Value * op2 = callInst->getArgOperand(2); + ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { + + uint64_t literalLength = optLen; + optLen = ilen->getZExtValue(); + if (literalLength + 1 == optLen) { // add null byte + thestring.append("\0", 1); + addedNull = true; + + } + + } + + } + + // add null byte if this is a string compare function and a null + // was not already added + if (!isMemcmp) { + + if (addedNull == false) { + + thestring.append("\0", 1); // add null byte + optLen++; + + } + + // ensure we do not have garbage + size_t offset = thestring.find('\0', 0); + if (offset + 1 < optLen) optLen = offset + 1; + thestring = thestring.substr(0, optLen); + + } + + if (!be_quiet) { + + std::string outstring; + fprintf(stderr, "%s: length %zu/%zu \"", FuncName.c_str(), optLen, + thestring.length()); + for (uint8_t i = 0; i < thestring.length(); i++) { + + uint8_t c = thestring[i]; + if (c <= 32 || c >= 127) + fprintf(stderr, "\\x%02x", c); + else + fprintf(stderr, "%c", c); + + } + + fprintf(stderr, "\"\n"); + + } + + // we take the longer string, even if the compare was to a + // shorter part. Note that depending on the optimizer of the + // compiler this can be wrong, but it is more likely that this + // is helping the fuzzer + if (optLen != thestring.length()) optLen = thestring.length(); + if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA; + if (optLen < MIN_AUTO_EXTRA) // too short? skip + continue; + + dictionary.push_back(thestring.substr(0, optLen)); + + } + + } + + } + + } + + for (auto &BB : F) { + + if (F.size() == 1) { + + InsBlocks.push_back(&BB); + continue; + + } + + uint32_t succ = 0; + for (succ_iterator SI = succ_begin(&BB), SE = succ_end(&BB); SI != SE; + ++SI) + if ((*SI)->size() > 0) succ++; + if (succ < 2) // no need to instrument + continue; + + if (BlockList.size()) { + + int skip = 0; + for (uint32_t k = 0; k < BlockList.size(); k++) { + + if (&BB == BlockList[k]) { + + if (debug) + fprintf(stderr, + "DEBUG: Function %s skipping BB with/after __afl_loop\n", + F.getName().str().c_str()); + skip = 1; + + } + + } + + if (skip) continue; + + } + + InsBlocks.push_back(&BB); + + } + + if (InsBlocks.size() > 0) { + + uint32_t i = InsBlocks.size(); + + do { + + --i; + BasicBlock * newBB = NULL; + BasicBlock * origBB = &(*InsBlocks[i]); + std::vector<BasicBlock *> Successors; + Instruction * TI = origBB->getTerminator(); + uint32_t fs = origBB->getParent()->size(); + uint32_t countto; + + for (succ_iterator SI = succ_begin(origBB), SE = succ_end(origBB); + SI != SE; ++SI) { + + BasicBlock *succ = *SI; + Successors.push_back(succ); + + } + + if (fs == 1) { + + newBB = origBB; + countto = 1; + + } else { + + if (TI == NULL || TI->getNumSuccessors() < 2) continue; + countto = Successors.size(); + + } + + // if (Successors.size() != TI->getNumSuccessors()) + // FATAL("Different successor numbers %lu <-> %u\n", Successors.size(), + // TI->getNumSuccessors()); + + for (uint32_t j = 0; j < countto; j++) { + + if (fs != 1) newBB = llvm::SplitEdge(origBB, Successors[j]); + + if (!newBB) { + + if (!be_quiet) WARNF("Split failed!"); + continue; + + } + + if (documentFile) { + + fprintf(documentFile, "ModuleID=%llu Function=%s edgeID=%u\n", + moduleID, F.getName().str().c_str(), afl_global_id); + + } + + BasicBlock::iterator IP = newBB->getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + + /* Set the ID of the inserted basic block */ + + ConstantInt *CurLoc = ConstantInt::get(Int32Ty, afl_global_id++); + + /* Load SHM pointer */ + + Value *MapPtrIdx; + + if (map_addr) { + + MapPtrIdx = IRB.CreateGEP(MapPtrFixed, CurLoc); + + } else { + + LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); + MapPtr->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + MapPtrIdx = IRB.CreateGEP(MapPtr, CurLoc); + + } + + /* Update bitmap */ + + LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); + Counter->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + Value *Incr = IRB.CreateAdd(Counter, One); + + if (skip_nozero == NULL) { + + auto cf = IRB.CreateICmpEQ(Incr, Zero); + auto carry = IRB.CreateZExt(cf, Int8Ty); + Incr = IRB.CreateAdd(Incr, carry); + + } + + IRB.CreateStore(Incr, MapPtrIdx) + ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + // done :) + + inst_blocks++; + + } + + } while (i > 0); + + } + + } + + if (documentFile) fclose(documentFile); + documentFile = NULL; + + // save highest location ID to global variable + // do this after each function to fail faster + if (!be_quiet && afl_global_id > MAP_SIZE && + afl_global_id > FS_OPT_MAX_MAPSIZE) { + + uint32_t pow2map = 1, map = afl_global_id; + while ((map = map >> 1)) + pow2map++; + WARNF( + "We have %u blocks to instrument but the map size is only %u. Either " + "edit config.h and set MAP_SIZE_POW2 from %u to %u, then recompile " + "afl-fuzz and llvm_mode and then make this target - or set " + "AFL_MAP_SIZE with at least size %u when running afl-fuzz with this " + "target.", + afl_global_id, MAP_SIZE, MAP_SIZE_POW2, pow2map, afl_global_id); + + } + + if (!getenv("AFL_LLVM_LTO_DONTWRITEID") || dictionary.size() || map_addr) { + + // yes we could create our own function, insert it into ctors ... + // but this would be a pain in the butt ... so we use afl-llvm-rt-lto.o + + Function *f = M.getFunction("__afl_auto_init_globals"); + + if (!f) { + + fprintf(stderr, + "Error: init function could not be found (this should not " + "happen)\n"); + exit(-1); + + } + + BasicBlock *bb = &f->getEntryBlock(); + if (!bb) { + + fprintf(stderr, + "Error: init function does not have an EntryBlock (this should " + "not happen)\n"); + exit(-1); + + } + + BasicBlock::iterator IP = bb->getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + + if (map_addr) { + + GlobalVariable *AFLMapAddrFixed = new GlobalVariable( + M, Int64Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_map_addr"); + ConstantInt *MapAddr = ConstantInt::get(Int64Ty, map_addr); + StoreInst * StoreMapAddr = IRB.CreateStore(MapAddr, AFLMapAddrFixed); + StoreMapAddr->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) { + + uint32_t write_loc = afl_global_id; + + if (afl_global_id % 8) write_loc = (((afl_global_id + 8) >> 3) << 3); + + GlobalVariable *AFLFinalLoc = new GlobalVariable( + M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc"); + ConstantInt *const_loc = ConstantInt::get(Int32Ty, write_loc); + StoreInst * StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc); + StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + if (dictionary.size()) { + + size_t memlen = 0, count = 0, offset = 0; + char * ptr; + + // sort and unique the dictionary + std::sort(dictionary.begin(), dictionary.end()); + auto last = std::unique(dictionary.begin(), dictionary.end()); + dictionary.erase(last, dictionary.end()); + + for (auto token : dictionary) { + + memlen += token.length(); + count++; + + } + + if (!be_quiet) + printf("AUTODICTIONARY: %lu string%s found\n", count, + count == 1 ? "" : "s"); + + if (count) { + + if ((ptr = (char *)malloc(memlen + count)) == NULL) { + + fprintf(stderr, "Error: malloc for %lu bytes failed!\n", + memlen + count); + exit(-1); + + } + + count = 0; + + for (auto token : dictionary) { + + if (offset + token.length() < 0xfffff0 && count < MAX_AUTO_EXTRAS) { + + ptr[offset++] = (uint8_t)token.length(); + memcpy(ptr + offset, token.c_str(), token.length()); + offset += token.length(); + count++; + + } + + } + + GlobalVariable *AFLDictionaryLen = + new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, + 0, "__afl_dictionary_len"); + ConstantInt *const_len = ConstantInt::get(Int32Ty, offset); + StoreInst *StoreDictLen = IRB.CreateStore(const_len, AFLDictionaryLen); + StoreDictLen->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + ArrayType *ArrayTy = ArrayType::get(IntegerType::get(C, 8), offset); + GlobalVariable *AFLInternalDictionary = new GlobalVariable( + M, ArrayTy, true, GlobalValue::ExternalLinkage, + ConstantDataArray::get(C, + *(new ArrayRef<char>((char *)ptr, offset))), + "__afl_internal_dictionary"); + AFLInternalDictionary->setInitializer(ConstantDataArray::get( + C, *(new ArrayRef<char>((char *)ptr, offset)))); + AFLInternalDictionary->setConstant(true); + + GlobalVariable *AFLDictionary = new GlobalVariable( + M, PointerType::get(Int8Ty, 0), false, GlobalValue::ExternalLinkage, + 0, "__afl_dictionary"); + + Value *AFLDictOff = IRB.CreateGEP(AFLInternalDictionary, Zero); + Value *AFLDictPtr = + IRB.CreatePointerCast(AFLDictOff, PointerType::get(Int8Ty, 0)); + StoreInst *StoreDict = IRB.CreateStore(AFLDictPtr, AFLDictionary); + StoreDict->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + } + + } + + /* Say something nice. */ + + if (!be_quiet) { + + if (!inst_blocks) + WARNF("No instrumentation targets found."); + else { + + char modeline[100]; + snprintf(modeline, sizeof(modeline), "%s%s%s%s%s", + getenv("AFL_HARDEN") ? "hardened" : "non-hardened", + getenv("AFL_USE_ASAN") ? ", ASAN" : "", + getenv("AFL_USE_MSAN") ? ", MSAN" : "", + getenv("AFL_USE_CFISAN") ? ", CFISAN" : "", + getenv("AFL_USE_UBSAN") ? ", UBSAN" : ""); + OKF("Instrumented %u locations with no collisions (on average %llu " + "collisions would be in afl-gcc/afl-clang-fast) (%s mode).", + inst_blocks, calculateCollisions(inst_blocks), modeline); + + } + + } + + return true; + +} + +char AFLLTOPass::ID = 0; + +static void registerAFLLTOPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + PM.add(new AFLLTOPass()); + +} + +static RegisterPass<AFLLTOPass> X("afl-lto", "afl++ LTO instrumentation pass", + false, false); + +static RegisterStandardPasses RegisterAFLLTOPass( + PassManagerBuilder::EP_FullLinkTimeOptimizationLast, registerAFLLTOPass); + diff --git a/instrumentation/afl-llvm-lto-instrumentlist.so.cc b/instrumentation/afl-llvm-lto-instrumentlist.so.cc new file mode 100644 index 00000000..a7331444 --- /dev/null +++ b/instrumentation/afl-llvm-lto-instrumentlist.so.cc @@ -0,0 +1,147 @@ +/* + american fuzzy lop++ - LLVM-mode instrumentation pass + --------------------------------------------------- + + Written by Laszlo Szekeres <lszekeres@google.com> and + Michal Zalewski + + LLVM integration design comes from Laszlo Szekeres. C bits copied-and-pasted + from afl-as.c are Michal's fault. + + Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2019-2020 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + This library is plugged into LLVM when invoking clang through afl-clang-fast. + It tells the compiler to add code roughly equivalent to the bits discussed + in ../afl-as.h. + + */ + +#define AFL_LLVM_PASS + +#include "config.h" +#include "debug.h" + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <list> +#include <string> +#include <fstream> +#include <sys/time.h> +#include <fnmatch.h> + +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/IR/CFG.h" + +#include "afl-llvm-common.h" + +using namespace llvm; + +namespace { + +class AFLcheckIfInstrument : public ModulePass { + + public: + static char ID; + AFLcheckIfInstrument() : ModulePass(ID) { + + if (getenv("AFL_DEBUG")) debug = 1; + + initInstrumentList(); + + } + + bool runOnModule(Module &M) override; + + // StringRef getPassName() const override { + + // return "American Fuzzy Lop Instrumentation"; + // } + + protected: + std::list<std::string> myInstrumentList; + +}; + +} // namespace + +char AFLcheckIfInstrument::ID = 0; + +bool AFLcheckIfInstrument::runOnModule(Module &M) { + + /* Show a banner */ + + setvbuf(stdout, NULL, _IONBF, 0); + + if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) { + + SAYF(cCYA "afl-llvm-lto-instrumentlist" VERSION cRST + " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n"); + + } else if (getenv("AFL_QUIET")) + + be_quiet = 1; + + for (auto &F : M) { + + if (F.size() < 1) continue; + + // fprintf(stderr, "F:%s\n", F.getName().str().c_str()); + + if (isInInstrumentList(&F)) { + + if (debug) + SAYF(cMGN "[D] " cRST "function %s is in the instrument file list\n", + F.getName().str().c_str()); + + } else { + + if (debug) + SAYF(cMGN "[D] " cRST + "function %s is NOT in the instrument file list\n", + F.getName().str().c_str()); + + auto & Ctx = F.getContext(); + AttributeList Attrs = F.getAttributes(); + AttrBuilder NewAttrs; + NewAttrs.addAttribute("skipinstrument"); + F.setAttributes( + Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs)); + + } + + } + + return true; + +} + +static void registerAFLcheckIfInstrumentpass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + PM.add(new AFLcheckIfInstrument()); + +} + +static RegisterStandardPasses RegisterAFLcheckIfInstrumentpass( + PassManagerBuilder::EP_ModuleOptimizerEarly, + registerAFLcheckIfInstrumentpass); + +static RegisterStandardPasses RegisterAFLcheckIfInstrumentpass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, + registerAFLcheckIfInstrumentpass); + diff --git a/instrumentation/afl-llvm-pass.so.cc b/instrumentation/afl-llvm-pass.so.cc new file mode 100644 index 00000000..8c8c987a --- /dev/null +++ b/instrumentation/afl-llvm-pass.so.cc @@ -0,0 +1,654 @@ +/* + american fuzzy lop++ - LLVM-mode instrumentation pass + --------------------------------------------------- + + Written by Laszlo Szekeres <lszekeres@google.com>, + Adrian Herrera <adrian.herrera@anu.edu.au>, + Michal Zalewski + + LLVM integration design comes from Laszlo Szekeres. C bits copied-and-pasted + from afl-as.c are Michal's fault. + + NGRAM previous location coverage comes from Adrian Herrera. + + Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2019-2020 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + This library is plugged into LLVM when invoking clang through afl-clang-fast. + It tells the compiler to add code roughly equivalent to the bits discussed + in ../afl-as.h. + + */ + +#define AFL_LLVM_PASS + +#include "config.h" +#include "debug.h" +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <list> +#include <string> +#include <fstream> +#include <sys/time.h> + +#include "llvm/Config/llvm-config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 +typedef long double max_align_t; +#endif + +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) + #include "llvm/IR/DebugInfo.h" + #include "llvm/IR/CFG.h" +#else + #include "llvm/DebugInfo.h" + #include "llvm/Support/CFG.h" +#endif + +#include "afl-llvm-common.h" +#include "llvm-ngram-coverage.h" + +using namespace llvm; + +namespace { + +class AFLCoverage : public ModulePass { + + public: + static char ID; + AFLCoverage() : ModulePass(ID) { + + initInstrumentList(); + + } + + bool runOnModule(Module &M) override; + + protected: + uint32_t ngram_size = 0; + uint32_t map_size = MAP_SIZE; + uint32_t function_minimum_size = 1; + char * ctx_str = NULL, *skip_nozero = NULL; + +}; + +} // namespace + +char AFLCoverage::ID = 0; + +/* needed up to 3.9.0 */ +#if LLVM_VERSION_MAJOR == 3 && \ + (LLVM_VERSION_MINOR < 9 || \ + (LLVM_VERSION_MINOR == 9 && LLVM_VERSION_PATCH < 1)) +uint64_t PowerOf2Ceil(unsigned in) { + + uint64_t in64 = in - 1; + in64 |= (in64 >> 1); + in64 |= (in64 >> 2); + in64 |= (in64 >> 4); + in64 |= (in64 >> 8); + in64 |= (in64 >> 16); + in64 |= (in64 >> 32); + return in64 + 1; + +} + +#endif + +/* #if LLVM_VERSION_STRING >= "4.0.1" */ +#if LLVM_VERSION_MAJOR > 4 || \ + (LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_PATCH >= 1) + #define AFL_HAVE_VECTOR_INTRINSICS 1 +#endif +bool AFLCoverage::runOnModule(Module &M) { + + LLVMContext &C = M.getContext(); + + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); +#ifdef AFL_HAVE_VECTOR_INTRINSICS + IntegerType *IntLocTy = + IntegerType::getIntNTy(C, sizeof(PREV_LOC_T) * CHAR_BIT); +#endif + struct timeval tv; + struct timezone tz; + u32 rand_seed; + unsigned int cur_loc = 0; + + /* Setup random() so we get Actually Random(TM) outputs from AFL_R() */ + gettimeofday(&tv, &tz); + rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid(); + AFL_SR(rand_seed); + + /* Show a banner */ + + setvbuf(stdout, NULL, _IONBF, 0); + + if (getenv("AFL_DEBUG")) debug = 1; + + if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) { + + SAYF(cCYA "afl-llvm-pass" VERSION cRST + " by <lszekeres@google.com> and <adrian.herrera@anu.edu.au>\n"); + + } else + + be_quiet = 1; + + /* + char *ptr; + if ((ptr = getenv("AFL_MAP_SIZE")) || (ptr = getenv("AFL_MAPSIZE"))) { + + map_size = atoi(ptr); + if (map_size < 8 || map_size > (1 << 29)) + FATAL("illegal AFL_MAP_SIZE %u, must be between 2^3 and 2^30", + map_size); if (map_size % 8) map_size = (((map_size >> 3) + 1) << 3); + + } + + */ + + /* Decide instrumentation ratio */ + + char * inst_ratio_str = getenv("AFL_INST_RATIO"); + unsigned int inst_ratio = 100; + + if (inst_ratio_str) { + + if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio || + inst_ratio > 100) + FATAL("Bad value of AFL_INST_RATIO (must be between 1 and 100)"); + + } + +#if LLVM_VERSION_MAJOR < 9 + char *neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO"); +#endif + skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO"); + + unsigned PrevLocSize = 0; + + char *ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE"); + if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE"); + ctx_str = getenv("AFL_LLVM_CTX"); + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + /* Decide previous location vector size (must be a power of two) */ + VectorType *PrevLocTy = NULL; + + if (ngram_size_str) + if (sscanf(ngram_size_str, "%u", &ngram_size) != 1 || ngram_size < 2 || + ngram_size > NGRAM_SIZE_MAX) + FATAL( + "Bad value of AFL_NGRAM_SIZE (must be between 2 and NGRAM_SIZE_MAX " + "(%u))", + NGRAM_SIZE_MAX); + + if (ngram_size == 1) ngram_size = 0; + if (ngram_size) + PrevLocSize = ngram_size - 1; + else +#else + if (ngram_size_str) + #ifndef LLVM_VERSION_PATCH + FATAL( + "Sorry, NGRAM branch coverage is not supported with llvm version " + "%d.%d.%d!", + LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, 0); + #else + FATAL( + "Sorry, NGRAM branch coverage is not supported with llvm version " + "%d.%d.%d!", + LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH); + #endif +#endif + PrevLocSize = 1; + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + int PrevLocVecSize = PowerOf2Ceil(PrevLocSize); + if (ngram_size) + PrevLocTy = VectorType::get(IntLocTy, PrevLocVecSize + #if LLVM_VERSION_MAJOR >= 12 + , + false + #endif + ); +#endif + + /* Get globals for the SHM region and the previous location. Note that + __afl_prev_loc is thread-local. */ + + GlobalVariable *AFLMapPtr = + new GlobalVariable(M, PointerType::get(Int8Ty, 0), false, + GlobalValue::ExternalLinkage, 0, "__afl_area_ptr"); + GlobalVariable *AFLPrevLoc; + GlobalVariable *AFLContext = NULL; + + if (ctx_str) +#ifdef __ANDROID__ + AFLContext = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx"); +#else + AFLContext = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx", 0, + GlobalVariable::GeneralDynamicTLSModel, 0, false); +#endif + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ngram_size) + #ifdef __ANDROID__ + AFLPrevLoc = new GlobalVariable( + M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage, + /* Initializer */ nullptr, "__afl_prev_loc"); + #else + AFLPrevLoc = new GlobalVariable( + M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage, + /* Initializer */ nullptr, "__afl_prev_loc", + /* InsertBefore */ nullptr, GlobalVariable::GeneralDynamicTLSModel, + /* AddressSpace */ 0, /* IsExternallyInitialized */ false); + #endif + else +#endif +#ifdef __ANDROID__ + AFLPrevLoc = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc"); +#else + AFLPrevLoc = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", 0, + GlobalVariable::GeneralDynamicTLSModel, 0, false); +#endif + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + /* Create the vector shuffle mask for updating the previous block history. + Note that the first element of the vector will store cur_loc, so just set + it to undef to allow the optimizer to do its thing. */ + + SmallVector<Constant *, 32> PrevLocShuffle = {UndefValue::get(Int32Ty)}; + + for (unsigned I = 0; I < PrevLocSize - 1; ++I) + PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, I)); + + for (int I = PrevLocSize; I < PrevLocVecSize; ++I) + PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, PrevLocSize)); + + Constant *PrevLocShuffleMask = ConstantVector::get(PrevLocShuffle); +#endif + + // other constants we need + ConstantInt *Zero = ConstantInt::get(Int8Ty, 0); + ConstantInt *One = ConstantInt::get(Int8Ty, 1); + + LoadInst *PrevCtx = NULL; // CTX sensitive coverage + + /* Instrument all the things! */ + + int inst_blocks = 0; + scanForDangerousFunctions(&M); + + for (auto &F : M) { + + int has_calls = 0; + if (debug) + fprintf(stderr, "FUNCTION: %s (%zu)\n", F.getName().str().c_str(), + F.size()); + + if (!isInInstrumentList(&F)) continue; + + if (F.size() < function_minimum_size) continue; + + for (auto &BB : F) { + + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + + // Context sensitive coverage + if (ctx_str && &BB == &F.getEntryBlock()) { + + // load the context ID of the previous function and write to to a local + // variable on the stack + PrevCtx = IRB.CreateLoad(AFLContext); + PrevCtx->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + // does the function have calls? and is any of the calls larger than one + // basic block? + for (auto &BB : F) { + + if (has_calls) break; + for (auto &IN : BB) { + + CallInst *callInst = nullptr; + if ((callInst = dyn_cast<CallInst>(&IN))) { + + Function *Callee = callInst->getCalledFunction(); + if (!Callee || Callee->size() < function_minimum_size) + continue; + else { + + has_calls = 1; + break; + + } + + } + + } + + } + + // if yes we store a context ID for this function in the global var + if (has_calls) { + + ConstantInt *NewCtx = ConstantInt::get(Int32Ty, AFL_R(map_size)); + StoreInst * StoreCtx = IRB.CreateStore(NewCtx, AFLContext); + StoreCtx->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + } + + if (AFL_R(100) >= inst_ratio) continue; + + /* Make up cur_loc */ + + // cur_loc++; + cur_loc = AFL_R(map_size); + +/* There is a problem with Ubuntu 18.04 and llvm 6.0 (see issue #63). + The inline function successors() is not inlined and also not found at runtime + :-( As I am unable to detect Ubuntu18.04 heree, the next best thing is to + disable this optional optimization for LLVM 6.0.0 and Linux */ +#if !(LLVM_VERSION_MAJOR == 6 && LLVM_VERSION_MINOR == 0) || !defined __linux__ + // only instrument if this basic block is the destination of a previous + // basic block that has multiple successors + // this gets rid of ~5-10% of instrumentations that are unnecessary + // result: a little more speed and less map pollution + int more_than_one = -1; + // fprintf(stderr, "BB %u: ", cur_loc); + for (pred_iterator PI = pred_begin(&BB), E = pred_end(&BB); PI != E; + ++PI) { + + BasicBlock *Pred = *PI; + + int count = 0; + if (more_than_one == -1) more_than_one = 0; + // fprintf(stderr, " %p=>", Pred); + + for (succ_iterator SI = succ_begin(Pred), E = succ_end(Pred); SI != E; + ++SI) { + + BasicBlock *Succ = *SI; + + // if (count > 0) + // fprintf(stderr, "|"); + if (Succ != NULL) count++; + // fprintf(stderr, "%p", Succ); + + } + + if (count > 1) more_than_one = 1; + + } + + // fprintf(stderr, " == %d\n", more_than_one); + if (F.size() > 1 && more_than_one != 1) { + + // in CTX mode we have to restore the original context for the caller - + // she might be calling other functions which need the correct CTX + if (ctx_str && has_calls) { + + Instruction *Inst = BB.getTerminator(); + if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) { + + IRBuilder<> Post_IRB(Inst); + StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext); + RestoreCtx->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + } + + continue; + + } + +#endif + + ConstantInt *CurLoc; + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ngram_size) + CurLoc = ConstantInt::get(IntLocTy, cur_loc); + else +#endif + CurLoc = ConstantInt::get(Int32Ty, cur_loc); + + /* Load prev_loc */ + + LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc); + PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + Value *PrevLocTrans; + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + /* "For efficiency, we propose to hash the tuple as a key into the + hit_count map as (prev_block_trans << 1) ^ curr_block_trans, where + prev_block_trans = (block_trans_1 ^ ... ^ block_trans_(n-1)" */ + + if (ngram_size) + PrevLocTrans = + IRB.CreateZExt(IRB.CreateXorReduce(PrevLoc), IRB.getInt32Ty()); + else +#endif + PrevLocTrans = PrevLoc; + + if (ctx_str) + PrevLocTrans = + IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCtx), Int32Ty); + else + PrevLocTrans = IRB.CreateZExt(PrevLocTrans, IRB.getInt32Ty()); + + /* Load SHM pointer */ + + LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); + MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + Value *MapPtrIdx; +#ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ngram_size) + MapPtrIdx = IRB.CreateGEP( + MapPtr, + IRB.CreateZExt( + IRB.CreateXor(PrevLocTrans, IRB.CreateZExt(CurLoc, Int32Ty)), + Int32Ty)); + else +#endif + MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocTrans, CurLoc)); + + /* Update bitmap */ + + LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); + Counter->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + Value *Incr = IRB.CreateAdd(Counter, One); + +#if LLVM_VERSION_MAJOR < 9 + if (neverZero_counters_str != + NULL) { // with llvm 9 we make this the default as the bug in llvm is + // then fixed +#else + if (!skip_nozero) { + +#endif + /* hexcoder: Realize a counter that skips zero during overflow. + * Once this counter reaches its maximum value, it next increments to 1 + * + * Instead of + * Counter + 1 -> Counter + * we inject now this + * Counter + 1 -> {Counter, OverflowFlag} + * Counter + OverflowFlag -> Counter + */ + + auto cf = IRB.CreateICmpEQ(Incr, Zero); + auto carry = IRB.CreateZExt(cf, Int8Ty); + Incr = IRB.CreateAdd(Incr, carry); + + } + + IRB.CreateStore(Incr, MapPtrIdx) + ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + /* Update prev_loc history vector (by placing cur_loc at the head of the + vector and shuffle the other elements back by one) */ + + StoreInst *Store; + +#ifdef AFL_HAVE_VECTOR_INTRINSICS + if (ngram_size) { + + Value *ShuffledPrevLoc = IRB.CreateShuffleVector( + PrevLoc, UndefValue::get(PrevLocTy), PrevLocShuffleMask); + Value *UpdatedPrevLoc = IRB.CreateInsertElement( + ShuffledPrevLoc, IRB.CreateLShr(CurLoc, (uint64_t)1), (uint64_t)0); + + Store = IRB.CreateStore(UpdatedPrevLoc, AFLPrevLoc); + Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + } else + +#endif + { + + Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), + AFLPrevLoc); + + } + + // in CTX mode we have to restore the original context for the caller - + // she might be calling other functions which need the correct CTX. + // Currently this is only needed for the Ubuntu clang-6.0 bug + if (ctx_str && has_calls) { + + Instruction *Inst = BB.getTerminator(); + if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) { + + IRBuilder<> Post_IRB(Inst); + StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext); + RestoreCtx->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + } + + inst_blocks++; + + } + + } + + /* + // This is currently disabled because we not only need to create/insert a + // function (easy), but also add it as a constructor with an ID < 5 + + if (getenv("AFL_LLVM_DONTWRITEID") == NULL) { + + // yes we could create our own function, insert it into ctors ... + // but this would be a pain in the butt ... so we use afl-llvm-rt.o + + Function *f = ... + + if (!f) { + + fprintf(stderr, + "Error: init function could not be created (this should not + happen)\n"); exit(-1); + + } + + ... constructor for f = 4 + + BasicBlock *bb = &f->getEntryBlock(); + if (!bb) { + + fprintf(stderr, + "Error: init function does not have an EntryBlock (this should + not happen)\n"); exit(-1); + + } + + BasicBlock::iterator IP = bb->getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + + if (map_size <= 0x800000) { + + GlobalVariable *AFLFinalLoc = new GlobalVariable( + M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, + "__afl_final_loc"); + ConstantInt *const_loc = ConstantInt::get(Int32Ty, map_size); + StoreInst * StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc); + StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"), + MDNode::get(C, None)); + + } + + } + + */ + + /* Say something nice. */ + + if (!be_quiet) { + + if (!inst_blocks) + WARNF("No instrumentation targets found."); + else { + + char modeline[100]; + snprintf(modeline, sizeof(modeline), "%s%s%s%s%s", + getenv("AFL_HARDEN") ? "hardened" : "non-hardened", + getenv("AFL_USE_ASAN") ? ", ASAN" : "", + getenv("AFL_USE_MSAN") ? ", MSAN" : "", + getenv("AFL_USE_CFISAN") ? ", CFISAN" : "", + getenv("AFL_USE_UBSAN") ? ", UBSAN" : ""); + OKF("Instrumented %u locations (%s mode, ratio %u%%).", inst_blocks, + modeline, inst_ratio); + + } + + } + + return true; + +} + +static void registerAFLPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + PM.add(new AFLCoverage()); + +} + +static RegisterStandardPasses RegisterAFLPass( + PassManagerBuilder::EP_OptimizerLast, registerAFLPass); + +static RegisterStandardPasses RegisterAFLPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerAFLPass); + diff --git a/instrumentation/afl-llvm-rt-lto.o.c b/instrumentation/afl-llvm-rt-lto.o.c new file mode 100644 index 00000000..e53785ff --- /dev/null +++ b/instrumentation/afl-llvm-rt-lto.o.c @@ -0,0 +1,27 @@ +/* + american fuzzy lop++ - LLVM instrumentation bootstrap + ----------------------------------------------------- + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +*/ + +#include <stdio.h> +#include <stdlib.h> + +// to prevent the function from being removed +unsigned char __afl_lto_mode = 0; + +/* Proper initialization routine. */ + +__attribute__((constructor(0))) void __afl_auto_init_globals(void) { + + if (getenv("AFL_DEBUG")) fprintf(stderr, "[__afl_auto_init_globals]\n"); + __afl_lto_mode = 1; + +} + diff --git a/instrumentation/cmplog-instructions-pass.cc b/instrumentation/cmplog-instructions-pass.cc new file mode 100644 index 00000000..d5de3dbb --- /dev/null +++ b/instrumentation/cmplog-instructions-pass.cc @@ -0,0 +1,292 @@ +/* + american fuzzy lop++ - LLVM CmpLog instrumentation + -------------------------------------------------- + + Written by Andrea Fioraldi <andreafioraldi@gmail.com> + + Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2019-2020 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <list> +#include <string> +#include <fstream> +#include <sys/time.h> +#include "llvm/Config/llvm-config.h" + +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ValueTracking.h" + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) + #include "llvm/IR/Verifier.h" + #include "llvm/IR/DebugInfo.h" +#else + #include "llvm/Analysis/Verifier.h" + #include "llvm/DebugInfo.h" + #define nullptr 0 +#endif + +#include <set> +#include "afl-llvm-common.h" + +using namespace llvm; + +namespace { + +class CmpLogInstructions : public ModulePass { + + public: + static char ID; + CmpLogInstructions() : ModulePass(ID) { + + initInstrumentList(); + + } + + bool runOnModule(Module &M) override; + +#if LLVM_VERSION_MAJOR < 4 + const char *getPassName() const override { + +#else + StringRef getPassName() const override { + +#endif + return "cmplog instructions"; + + } + + private: + bool hookInstrs(Module &M); + +}; + +} // namespace + +char CmpLogInstructions::ID = 0; + +bool CmpLogInstructions::hookInstrs(Module &M) { + + std::vector<Instruction *> icomps; + LLVMContext & C = M.getContext(); + + Type * VoidTy = Type::getVoidTy(C); + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int16Ty = IntegerType::getInt16Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + IntegerType *Int64Ty = IntegerType::getInt64Ty(C); + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c1 = M.getOrInsertFunction("__cmplog_ins_hook1", VoidTy, Int8Ty, Int8Ty +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogHookIns1 = cast<Function>(c1); +#else + FunctionCallee cmplogHookIns1 = c1; +#endif + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c2 = M.getOrInsertFunction("__cmplog_ins_hook2", VoidTy, Int16Ty, Int16Ty +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogHookIns2 = cast<Function>(c2); +#else + FunctionCallee cmplogHookIns2 = c2; +#endif + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c4 = M.getOrInsertFunction("__cmplog_ins_hook4", VoidTy, Int32Ty, Int32Ty +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogHookIns4 = cast<Function>(c4); +#else + FunctionCallee cmplogHookIns4 = c4; +#endif + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c8 = M.getOrInsertFunction("__cmplog_ins_hook8", VoidTy, Int64Ty, Int64Ty +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogHookIns8 = cast<Function>(c8); +#else + FunctionCallee cmplogHookIns8 = c8; +#endif + + /* iterate over all functions, bbs and instruction and add suitable calls */ + for (auto &F : M) { + + if (!isInInstrumentList(&F)) continue; + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CmpInst *selectcmpInst = nullptr; + + if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) { + + if (selectcmpInst->getPredicate() == CmpInst::ICMP_EQ || + selectcmpInst->getPredicate() == CmpInst::ICMP_NE || + selectcmpInst->getPredicate() == CmpInst::ICMP_UGT || + selectcmpInst->getPredicate() == CmpInst::ICMP_SGT || + selectcmpInst->getPredicate() == CmpInst::ICMP_ULT || + selectcmpInst->getPredicate() == CmpInst::ICMP_SLT || + selectcmpInst->getPredicate() == CmpInst::ICMP_UGE || + selectcmpInst->getPredicate() == CmpInst::ICMP_SGE || + selectcmpInst->getPredicate() == CmpInst::ICMP_ULE || + selectcmpInst->getPredicate() == CmpInst::ICMP_SLE) { + + auto op0 = selectcmpInst->getOperand(0); + auto op1 = selectcmpInst->getOperand(1); + + IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType()); + IntegerType *intTyOp1 = dyn_cast<IntegerType>(op1->getType()); + + /* this is probably not needed but we do it anyway */ + if (!intTyOp0 || !intTyOp1) { continue; } + + icomps.push_back(selectcmpInst); + + } + + } + + } + + } + + } + + if (!icomps.size()) return false; + if (!be_quiet) errs() << "Hooking " << icomps.size() << " cmp instructions\n"; + + for (auto &selectcmpInst : icomps) { + + IRBuilder<> IRB(selectcmpInst->getParent()); + IRB.SetInsertPoint(selectcmpInst); + + auto op0 = selectcmpInst->getOperand(0); + auto op1 = selectcmpInst->getOperand(1); + + IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType()); + IntegerType *intTyOp1 = dyn_cast<IntegerType>(op1->getType()); + + unsigned max_size = intTyOp0->getBitWidth() > intTyOp1->getBitWidth() + ? intTyOp0->getBitWidth() + : intTyOp1->getBitWidth(); + + std::vector<Value *> args; + args.push_back(op0); + args.push_back(op1); + + switch (max_size) { + + case 8: + IRB.CreateCall(cmplogHookIns1, args); + break; + case 16: + IRB.CreateCall(cmplogHookIns2, args); + break; + case 32: + IRB.CreateCall(cmplogHookIns4, args); + break; + case 64: + IRB.CreateCall(cmplogHookIns8, args); + break; + default: + break; + + } + + } + + return true; + +} + +bool CmpLogInstructions::runOnModule(Module &M) { + + if (getenv("AFL_QUIET") == NULL) + llvm::errs() + << "Running cmplog-instructions-pass by andreafioraldi@gmail.com\n"; + else + be_quiet = 1; + hookInstrs(M); + verifyModule(M); + + return true; + +} + +static void registerCmpLogInstructionsPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + auto p = new CmpLogInstructions(); + PM.add(p); + +} + +static RegisterStandardPasses RegisterCmpLogInstructionsPass( + PassManagerBuilder::EP_OptimizerLast, registerCmpLogInstructionsPass); + +static RegisterStandardPasses RegisterCmpLogInstructionsPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerCmpLogInstructionsPass); + +#if LLVM_VERSION_MAJOR >= 11 +static RegisterStandardPasses RegisterCmpLogInstructionsPassLTO( + PassManagerBuilder::EP_FullLinkTimeOptimizationLast, + registerCmpLogInstructionsPass); +#endif + diff --git a/instrumentation/cmplog-routines-pass.cc b/instrumentation/cmplog-routines-pass.cc new file mode 100644 index 00000000..c44f38c4 --- /dev/null +++ b/instrumentation/cmplog-routines-pass.cc @@ -0,0 +1,212 @@ +/* + american fuzzy lop++ - LLVM CmpLog instrumentation + -------------------------------------------------- + + Written by Andrea Fioraldi <andreafioraldi@gmail.com> + + Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2019-2020 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <list> +#include <string> +#include <fstream> +#include <sys/time.h> +#include "llvm/Config/llvm-config.h" + +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ValueTracking.h" + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) + #include "llvm/IR/Verifier.h" + #include "llvm/IR/DebugInfo.h" +#else + #include "llvm/Analysis/Verifier.h" + #include "llvm/DebugInfo.h" + #define nullptr 0 +#endif + +#include <set> +#include "afl-llvm-common.h" + +using namespace llvm; + +namespace { + +class CmpLogRoutines : public ModulePass { + + public: + static char ID; + CmpLogRoutines() : ModulePass(ID) { + + initInstrumentList(); + + } + + bool runOnModule(Module &M) override; + +#if LLVM_VERSION_MAJOR < 4 + const char *getPassName() const override { + +#else + StringRef getPassName() const override { + +#endif + return "cmplog routines"; + + } + + private: + bool hookRtns(Module &M); + +}; + +} // namespace + +char CmpLogRoutines::ID = 0; + +bool CmpLogRoutines::hookRtns(Module &M) { + + std::vector<CallInst *> calls; + LLVMContext & C = M.getContext(); + + Type *VoidTy = Type::getVoidTy(C); + // PointerType *VoidPtrTy = PointerType::get(VoidTy, 0); + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + PointerType *i8PtrTy = PointerType::get(Int8Ty, 0); + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c = M.getOrInsertFunction("__cmplog_rtn_hook", VoidTy, i8PtrTy, i8PtrTy +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogHookFn = cast<Function>(c); +#else + FunctionCallee cmplogHookFn = c; +#endif + + /* iterate over all functions, bbs and instruction and add suitable calls */ + for (auto &F : M) { + + if (!isInInstrumentList(&F)) continue; + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CallInst *callInst = nullptr; + + if ((callInst = dyn_cast<CallInst>(&IN))) { + + Function *Callee = callInst->getCalledFunction(); + if (!Callee) continue; + if (callInst->getCallingConv() != llvm::CallingConv::C) continue; + + FunctionType *FT = Callee->getFunctionType(); + + bool isPtrRtn = FT->getNumParams() >= 2 && + !FT->getReturnType()->isVoidTy() && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0)->isPointerTy(); + + if (!isPtrRtn) continue; + + calls.push_back(callInst); + + } + + } + + } + + } + + if (!calls.size()) return false; + if (!be_quiet) + errs() << "Hooking " << calls.size() + << " calls with pointers as arguments\n"; + + for (auto &callInst : calls) { + + Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1); + + IRBuilder<> IRB(callInst->getParent()); + IRB.SetInsertPoint(callInst); + + std::vector<Value *> args; + Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy); + Value * v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy); + args.push_back(v1Pcasted); + args.push_back(v2Pcasted); + + IRB.CreateCall(cmplogHookFn, args); + + // errs() << callInst->getCalledFunction()->getName() << "\n"; + + } + + return true; + +} + +bool CmpLogRoutines::runOnModule(Module &M) { + + if (getenv("AFL_QUIET") == NULL) + llvm::errs() + << "Running cmplog-routines-pass by andreafioraldi@gmail.com\n"; + else + be_quiet = 1; + hookRtns(M); + verifyModule(M); + + return true; + +} + +static void registerCmpLogRoutinesPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + auto p = new CmpLogRoutines(); + PM.add(p); + +} + +static RegisterStandardPasses RegisterCmpLogRoutinesPass( + PassManagerBuilder::EP_OptimizerLast, registerCmpLogRoutinesPass); + +static RegisterStandardPasses RegisterCmpLogRoutinesPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerCmpLogRoutinesPass); + +#if LLVM_VERSION_MAJOR >= 11 +static RegisterStandardPasses RegisterCmpLogRoutinesPassLTO( + PassManagerBuilder::EP_FullLinkTimeOptimizationLast, + registerCmpLogRoutinesPass); +#endif + diff --git a/instrumentation/compare-transform-pass.so.cc b/instrumentation/compare-transform-pass.so.cc new file mode 100644 index 00000000..9d2f4a92 --- /dev/null +++ b/instrumentation/compare-transform-pass.so.cc @@ -0,0 +1,588 @@ +/* + * Copyright 2016 laf-intel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <list> +#include <string> +#include <fstream> +#include <sys/time.h> +#include "llvm/Config/llvm-config.h" + +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ValueTracking.h" + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) + #include "llvm/IR/Verifier.h" + #include "llvm/IR/DebugInfo.h" +#else + #include "llvm/Analysis/Verifier.h" + #include "llvm/DebugInfo.h" + #define nullptr 0 +#endif + +#include <set> +#include "afl-llvm-common.h" + +using namespace llvm; + +namespace { + +class CompareTransform : public ModulePass { + + public: + static char ID; + CompareTransform() : ModulePass(ID) { + + initInstrumentList(); + + } + + bool runOnModule(Module &M) override; + +#if LLVM_VERSION_MAJOR < 4 + const char *getPassName() const override { + +#else + StringRef getPassName() const override { + +#endif + return "transforms compare functions"; + + } + + private: + bool transformCmps(Module &M, const bool processStrcmp, + const bool processMemcmp, const bool processStrncmp, + const bool processStrcasecmp, + const bool processStrncasecmp); + +}; + +} // namespace + +char CompareTransform::ID = 0; + +bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, + const bool processMemcmp, + const bool processStrncmp, + const bool processStrcasecmp, + const bool processStrncasecmp) { + + DenseMap<Value *, std::string *> valueMap; + std::vector<CallInst *> calls; + LLVMContext & C = M.getContext(); + IntegerType * Int8Ty = IntegerType::getInt8Ty(C); + IntegerType * Int32Ty = IntegerType::getInt32Ty(C); + IntegerType * Int64Ty = IntegerType::getInt64Ty(C); + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c = M.getOrInsertFunction("tolower", Int32Ty, Int32Ty +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *tolowerFn = cast<Function>(c); +#else + FunctionCallee tolowerFn = c; +#endif + + /* iterate over all functions, bbs and instruction and add suitable calls to + * strcmp/memcmp/strncmp/strcasecmp/strncasecmp */ + for (auto &F : M) { + + if (!isInInstrumentList(&F)) continue; + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CallInst *callInst = nullptr; + + if ((callInst = dyn_cast<CallInst>(&IN))) { + + bool isStrcmp = processStrcmp; + bool isMemcmp = processMemcmp; + bool isStrncmp = processStrncmp; + bool isStrcasecmp = processStrcasecmp; + bool isStrncasecmp = processStrncasecmp; + bool isIntMemcpy = true; + + Function *Callee = callInst->getCalledFunction(); + if (!Callee) continue; + if (callInst->getCallingConv() != llvm::CallingConv::C) continue; + StringRef FuncName = Callee->getName(); + isStrcmp &= !FuncName.compare(StringRef("strcmp")); + isMemcmp &= (!FuncName.compare(StringRef("memcmp")) || + !FuncName.compare(StringRef("bcmp"))); + isStrncmp &= !FuncName.compare(StringRef("strncmp")); + isStrcasecmp &= !FuncName.compare(StringRef("strcasecmp")); + isStrncasecmp &= !FuncName.compare(StringRef("strncasecmp")); + isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64"); + + if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && + !isStrncasecmp && !isIntMemcpy) + continue; + + /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function + * prototype */ + FunctionType *FT = Callee->getFunctionType(); + + isStrcmp &= + FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext()); + isStrcasecmp &= + FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext()); + isMemcmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy() && + FT->getParamType(2)->isIntegerTy(); + isStrncmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + isStrncasecmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + + if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && + !isStrncasecmp && !isIntMemcpy) + continue; + + /* is a str{n,}{case,}cmp/memcmp, check if we have + * str{case,}cmp(x, "const") or str{case,}cmp("const", x) + * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..) + * memcmp(x, "const", ..) or memcmp("const", x, ..) */ + Value *Str1P = callInst->getArgOperand(0), + *Str2P = callInst->getArgOperand(1); + StringRef Str1, Str2; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + + if (isIntMemcpy && HasStr2) { + + valueMap[Str1P] = new std::string(Str2.str()); + // fprintf(stderr, "saved %s for %p\n", Str2.str().c_str(), Str1P); + continue; + + } + + // not literal? maybe global or local variable + if (!(HasStr1 || HasStr2)) { + + auto *Ptr = dyn_cast<ConstantExpr>(Str2P); + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (Var->hasInitializer()) { + + if (auto *Array = + dyn_cast<ConstantDataArray>(Var->getInitializer())) { + + HasStr2 = true; + Str2 = Array->getAsString(); + valueMap[Str2P] = new std::string(Str2.str()); + fprintf(stderr, "glo2 %s\n", Str2.str().c_str()); + + } + + } + + } + + } + + if (!HasStr2) { + + auto *Ptr = dyn_cast<ConstantExpr>(Str1P); + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (Var->hasInitializer()) { + + if (auto *Array = dyn_cast<ConstantDataArray>( + Var->getInitializer())) { + + HasStr1 = true; + Str1 = Array->getAsString(); + valueMap[Str1P] = new std::string(Str1.str()); + // fprintf(stderr, "glo1 %s\n", Str1.str().c_str()); + + } + + } + + } + + } + + } else if (isIntMemcpy) { + + valueMap[Str1P] = new std::string(Str2.str()); + // fprintf(stderr, "saved\n"); + + } + + } + + if (isIntMemcpy) continue; + + if (!(HasStr1 || HasStr2)) { + + // do we have a saved local variable initialization? + std::string *val = valueMap[Str1P]; + if (val && !val->empty()) { + + Str1 = StringRef(*val); + HasStr1 = true; + // fprintf(stderr, "loaded1 %s\n", Str1.str().c_str()); + + } else { + + val = valueMap[Str2P]; + if (val && !val->empty()) { + + Str2 = StringRef(*val); + HasStr2 = true; + // fprintf(stderr, "loaded2 %s\n", Str2.str().c_str()); + + } + + } + + } + + /* handle cases of one string is const, one string is variable */ + if (!(HasStr1 || HasStr2)) continue; + + if (isMemcmp || isStrncmp || isStrncasecmp) { + + /* check if third operand is a constant integer + * strlen("constStr") and sizeof() are treated as constant */ + Value * op2 = callInst->getArgOperand(2); + ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { + + uint64_t len = ilen->getZExtValue(); + // if len is zero this is a pointless call but allow real + // implementation to worry about that + if (!len) continue; + + if (isMemcmp) { + + // if size of compare is larger than constant string this is + // likely a bug but allow real implementation to worry about + // that + uint64_t literalLength = HasStr1 ? Str1.size() : Str2.size(); + if (literalLength + 1 < ilen->getZExtValue()) continue; + + } + + } else if (isMemcmp) + + // this *may* supply a len greater than the constant string at + // runtime so similarly we don't want to have to handle that + continue; + + } + + calls.push_back(callInst); + + } + + } + + } + + } + + if (!calls.size()) return false; + if (!be_quiet) + errs() << "Replacing " << calls.size() + << " calls to strcmp/memcmp/strncmp/strcasecmp/strncasecmp\n"; + + for (auto &callInst : calls) { + + Value *Str1P = callInst->getArgOperand(0), + *Str2P = callInst->getArgOperand(1); + StringRef Str1, Str2, ConstStr; + std::string TmpConstStr; + Value * VarStr; + bool HasStr1 = getConstantStringInfo(Str1P, Str1); + bool HasStr2 = getConstantStringInfo(Str2P, Str2); + uint64_t constStrLen, unrollLen, constSizedLen = 0; + bool isMemcmp = + !callInst->getCalledFunction()->getName().compare(StringRef("memcmp")); + bool isSizedcmp = isMemcmp || + !callInst->getCalledFunction()->getName().compare( + StringRef("strncmp")) || + !callInst->getCalledFunction()->getName().compare( + StringRef("strncasecmp")); + Value *sizedValue = isSizedcmp ? callInst->getArgOperand(2) : NULL; + bool isConstSized = sizedValue && isa<ConstantInt>(sizedValue); + bool isCaseInsensitive = !callInst->getCalledFunction()->getName().compare( + StringRef("strcasecmp")) || + !callInst->getCalledFunction()->getName().compare( + StringRef("strncasecmp")); + + if (!(HasStr1 || HasStr2)) { + + // do we have a saved local or global variable initialization? + std::string *val = valueMap[Str1P]; + if (val && !val->empty()) { + + Str1 = StringRef(*val); + HasStr1 = true; + + } else { + + val = valueMap[Str2P]; + if (val && !val->empty()) { + + Str2 = StringRef(*val); + HasStr2 = true; + + } + + } + + } + + if (isConstSized) { + + constSizedLen = dyn_cast<ConstantInt>(sizedValue)->getZExtValue(); + + } + + if (HasStr1) { + + TmpConstStr = Str1.str(); + VarStr = Str2P; + + } else { + + TmpConstStr = Str2.str(); + VarStr = Str1P; + + } + + // add null termination character implicit in c strings + TmpConstStr.append("\0", 1); + + // in the unusual case the const str has embedded null + // characters, the string comparison functions should terminate + // at the first null + if (!isMemcmp) + TmpConstStr.assign(TmpConstStr, 0, TmpConstStr.find('\0') + 1); + + constStrLen = TmpConstStr.length(); + // prefer use of StringRef (in comparison to std::string a StringRef has + // built-in runtime bounds checking, which makes debugging easier) + ConstStr = StringRef(TmpConstStr); + + if (isConstSized) + unrollLen = constSizedLen < constStrLen ? constSizedLen : constStrLen; + else + unrollLen = constStrLen; + + if (!be_quiet) + errs() << callInst->getCalledFunction()->getName() << ": unroll len " + << unrollLen + << ((isSizedcmp && !isConstSized) ? ", variable n" : "") << ": " + << ConstStr << "\n"; + + /* split before the call instruction */ + BasicBlock *bb = callInst->getParent(); + BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(callInst)); + + BasicBlock *next_lenchk_bb = NULL; + if (isSizedcmp && !isConstSized) { + + next_lenchk_bb = + BasicBlock::Create(C, "len_check", end_bb->getParent(), end_bb); + BranchInst::Create(end_bb, next_lenchk_bb); + + } + + BasicBlock *next_cmp_bb = + BasicBlock::Create(C, "cmp_added", end_bb->getParent(), end_bb); + BranchInst::Create(end_bb, next_cmp_bb); + PHINode *PN = PHINode::Create( + Int32Ty, (next_lenchk_bb ? 2 : 1) * unrollLen + 1, "cmp_phi"); + +#if LLVM_VERSION_MAJOR < 8 + TerminatorInst *term = bb->getTerminator(); +#else + Instruction *term = bb->getTerminator(); +#endif + BranchInst::Create(next_lenchk_bb ? next_lenchk_bb : next_cmp_bb, bb); + term->eraseFromParent(); + + for (uint64_t i = 0; i < unrollLen; i++) { + + BasicBlock * cur_cmp_bb = next_cmp_bb, *cur_lenchk_bb = next_lenchk_bb; + unsigned char c; + + if (cur_lenchk_bb) { + + IRBuilder<> cur_lenchk_IRB(&*(cur_lenchk_bb->getFirstInsertionPt())); + Value * icmp = cur_lenchk_IRB.CreateICmpEQ( + sizedValue, ConstantInt::get(sizedValue->getType(), i)); + cur_lenchk_IRB.CreateCondBr(icmp, end_bb, cur_cmp_bb); + cur_lenchk_bb->getTerminator()->eraseFromParent(); + + PN->addIncoming(ConstantInt::get(Int32Ty, 0), cur_lenchk_bb); + + } + + if (isCaseInsensitive) + c = (unsigned char)(tolower((int)ConstStr[i]) & 0xff); + else + c = (unsigned char)ConstStr[i]; + + IRBuilder<> cur_cmp_IRB(&*(cur_cmp_bb->getFirstInsertionPt())); + + Value *v = ConstantInt::get(Int64Ty, i); + Value *ele = cur_cmp_IRB.CreateInBoundsGEP(VarStr, v, "empty"); + Value *load = cur_cmp_IRB.CreateLoad(ele); + + if (isCaseInsensitive) { + + // load >= 'A' && load <= 'Z' ? load | 0x020 : load + load = cur_cmp_IRB.CreateZExt(load, Int32Ty); + std::vector<Value *> args; + args.push_back(load); + load = cur_cmp_IRB.CreateCall(tolowerFn, args); + load = cur_cmp_IRB.CreateTrunc(load, Int8Ty); + + } + + Value *isub; + if (HasStr1) + isub = cur_cmp_IRB.CreateSub(ConstantInt::get(Int8Ty, c), load); + else + isub = cur_cmp_IRB.CreateSub(load, ConstantInt::get(Int8Ty, c)); + + Value *sext = cur_cmp_IRB.CreateSExt(isub, Int32Ty); + PN->addIncoming(sext, cur_cmp_bb); + + if (i < unrollLen - 1) { + + if (cur_lenchk_bb) { + + next_lenchk_bb = + BasicBlock::Create(C, "len_check", end_bb->getParent(), end_bb); + BranchInst::Create(end_bb, next_lenchk_bb); + + } + + next_cmp_bb = + BasicBlock::Create(C, "cmp_added", end_bb->getParent(), end_bb); + BranchInst::Create(end_bb, next_cmp_bb); + + Value *icmp = + cur_cmp_IRB.CreateICmpEQ(isub, ConstantInt::get(Int8Ty, 0)); + cur_cmp_IRB.CreateCondBr( + icmp, next_lenchk_bb ? next_lenchk_bb : next_cmp_bb, end_bb); + cur_cmp_bb->getTerminator()->eraseFromParent(); + + } else { + + // IRB.CreateBr(end_bb); + + } + + // add offset to varstr + // create load + // create signed isub + // create icmp + // create jcc + // create next_bb + + } + + /* since the call is the first instruction of the bb it is safe to + * replace it with a phi instruction */ + BasicBlock::iterator ii(callInst); + ReplaceInstWithInst(callInst->getParent()->getInstList(), ii, PN); + + } + + return true; + +} + +bool CompareTransform::runOnModule(Module &M) { + + if ((isatty(2) && getenv("AFL_QUIET") == NULL) || getenv("AFL_DEBUG") != NULL) + llvm::errs() << "Running compare-transform-pass by laf.intel@gmail.com, " + "extended by heiko@hexco.de\n"; + else + be_quiet = 1; + transformCmps(M, true, true, true, true, true); + verifyModule(M); + + return true; + +} + +static void registerCompTransPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + auto p = new CompareTransform(); + PM.add(p); + +} + +static RegisterStandardPasses RegisterCompTransPass( + PassManagerBuilder::EP_OptimizerLast, registerCompTransPass); + +static RegisterStandardPasses RegisterCompTransPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerCompTransPass); + +#if LLVM_VERSION_MAJOR >= 11 +static RegisterStandardPasses RegisterCompTransPassLTO( + PassManagerBuilder::EP_FullLinkTimeOptimizationLast, registerCompTransPass); +#endif + diff --git a/instrumentation/llvm-ngram-coverage.h b/instrumentation/llvm-ngram-coverage.h new file mode 100644 index 00000000..12b666e9 --- /dev/null +++ b/instrumentation/llvm-ngram-coverage.h @@ -0,0 +1,18 @@ +#ifndef AFL_NGRAM_CONFIG_H +#define AFL_NGRAM_CONFIG_H + +#include "../config.h" + +#if (MAP_SIZE_POW2 <= 16) +typedef u16 PREV_LOC_T; +#elif (MAP_SIZE_POW2 <= 32) +typedef u32 PREV_LOC_T; +#else +typedef u64 PREV_LOC_T; +#endif + +/* Maximum ngram size */ +#define NGRAM_SIZE_MAX 16U + +#endif + diff --git a/instrumentation/split-compares-pass.so.cc b/instrumentation/split-compares-pass.so.cc new file mode 100644 index 00000000..2fb90e5e --- /dev/null +++ b/instrumentation/split-compares-pass.so.cc @@ -0,0 +1,1356 @@ +/* + * Copyright 2016 laf-intel + * extended for floating point by Heiko Eißfeldt + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <list> +#include <string> +#include <fstream> +#include <sys/time.h> + +#include "llvm/Config/llvm-config.h" + +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/IR/Module.h" + +#include "llvm/IR/IRBuilder.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) + #include "llvm/IR/Verifier.h" + #include "llvm/IR/DebugInfo.h" +#else + #include "llvm/Analysis/Verifier.h" + #include "llvm/DebugInfo.h" + #define nullptr 0 +#endif + +using namespace llvm; +#include "afl-llvm-common.h" + +namespace { + +class SplitComparesTransform : public ModulePass { + + public: + static char ID; + SplitComparesTransform() : ModulePass(ID) { + + initInstrumentList(); + + } + + bool runOnModule(Module &M) override; +#if LLVM_VERSION_MAJOR >= 4 + StringRef getPassName() const override { + +#else + const char *getPassName() const override { + +#endif + return "simplifies and splits ICMP instructions"; + + } + + private: + int enableFPSplit; + + size_t splitIntCompares(Module &M, unsigned bitw); + size_t splitFPCompares(Module &M); + bool simplifyCompares(Module &M); + bool simplifyFPCompares(Module &M); + bool simplifyIntSignedness(Module &M); + size_t nextPowerOfTwo(size_t in); + +}; + +} // namespace + +char SplitComparesTransform::ID = 0; + +/* This function splits FCMP instructions with xGE or xLE predicates into two + * FCMP instructions with predicate xGT or xLT and EQ */ +bool SplitComparesTransform::simplifyFPCompares(Module &M) { + + LLVMContext & C = M.getContext(); + std::vector<Instruction *> fcomps; + IntegerType * Int1Ty = IntegerType::getInt1Ty(C); + + /* iterate over all functions, bbs and instruction and add + * all integer comparisons with >= and <= predicates to the icomps vector */ + for (auto &F : M) { + + if (!isInInstrumentList(&F)) continue; + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CmpInst *selectcmpInst = nullptr; + + if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) { + + if (enableFPSplit && + (selectcmpInst->getPredicate() == CmpInst::FCMP_OGE || + selectcmpInst->getPredicate() == CmpInst::FCMP_UGE || + selectcmpInst->getPredicate() == CmpInst::FCMP_OLE || + selectcmpInst->getPredicate() == CmpInst::FCMP_ULE)) { + + auto op0 = selectcmpInst->getOperand(0); + auto op1 = selectcmpInst->getOperand(1); + + Type *TyOp0 = op0->getType(); + Type *TyOp1 = op1->getType(); + + /* this is probably not needed but we do it anyway */ + if (TyOp0 != TyOp1) { continue; } + + if (TyOp0->isArrayTy() || TyOp0->isVectorTy()) { continue; } + + fcomps.push_back(selectcmpInst); + + } + + } + + } + + } + + } + + if (!fcomps.size()) { return false; } + + /* transform for floating point */ + for (auto &FcmpInst : fcomps) { + + BasicBlock *bb = FcmpInst->getParent(); + + auto op0 = FcmpInst->getOperand(0); + auto op1 = FcmpInst->getOperand(1); + + /* find out what the new predicate is going to be */ + auto pred = dyn_cast<CmpInst>(FcmpInst)->getPredicate(); + CmpInst::Predicate new_pred; + switch (pred) { + + case CmpInst::FCMP_UGE: + new_pred = CmpInst::FCMP_UGT; + break; + case CmpInst::FCMP_OGE: + new_pred = CmpInst::FCMP_OGT; + break; + case CmpInst::FCMP_ULE: + new_pred = CmpInst::FCMP_ULT; + break; + case CmpInst::FCMP_OLE: + new_pred = CmpInst::FCMP_OLT; + break; + default: // keep the compiler happy + continue; + + } + + /* split before the fcmp instruction */ + BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(FcmpInst)); + + /* the old bb now contains a unconditional jump to the new one (end_bb) + * we need to delete it later */ + + /* create the FCMP instruction with new_pred and add it to the old basic + * block bb it is now at the position where the old FcmpInst was */ + Instruction *fcmp_np; + fcmp_np = CmpInst::Create(Instruction::FCmp, new_pred, op0, op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + fcmp_np); + + /* create a new basic block which holds the new EQ fcmp */ + Instruction *fcmp_eq; + /* insert middle_bb before end_bb */ + BasicBlock *middle_bb = + BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); + fcmp_eq = CmpInst::Create(Instruction::FCmp, CmpInst::FCMP_OEQ, op0, op1); + middle_bb->getInstList().push_back(fcmp_eq); + /* add an unconditional branch to the end of middle_bb with destination + * end_bb */ + BranchInst::Create(end_bb, middle_bb); + + /* replace the uncond branch with a conditional one, which depends on the + * new_pred fcmp. True goes to end, false to the middle (injected) bb */ + auto term = bb->getTerminator(); + BranchInst::Create(end_bb, middle_bb, fcmp_np, bb); + term->eraseFromParent(); + + /* replace the old FcmpInst (which is the first inst in end_bb) with a PHI + * inst to wire up the loose ends */ + PHINode *PN = PHINode::Create(Int1Ty, 2, ""); + /* the first result depends on the outcome of fcmp_eq */ + PN->addIncoming(fcmp_eq, middle_bb); + /* if the source was the original bb we know that the fcmp_np yielded true + * hence we can hardcode this value */ + PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); + /* replace the old FcmpInst with our new and shiny PHI inst */ + BasicBlock::iterator ii(FcmpInst); + ReplaceInstWithInst(FcmpInst->getParent()->getInstList(), ii, PN); + + } + + return true; + +} + +/* This function splits ICMP instructions with xGE or xLE predicates into two + * ICMP instructions with predicate xGT or xLT and EQ */ +bool SplitComparesTransform::simplifyCompares(Module &M) { + + LLVMContext & C = M.getContext(); + std::vector<Instruction *> icomps; + IntegerType * Int1Ty = IntegerType::getInt1Ty(C); + + /* iterate over all functions, bbs and instruction and add + * all integer comparisons with >= and <= predicates to the icomps vector */ + for (auto &F : M) { + + if (!isInInstrumentList(&F)) continue; + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CmpInst *selectcmpInst = nullptr; + + if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) { + + if (selectcmpInst->getPredicate() == CmpInst::ICMP_UGE || + selectcmpInst->getPredicate() == CmpInst::ICMP_SGE || + selectcmpInst->getPredicate() == CmpInst::ICMP_ULE || + selectcmpInst->getPredicate() == CmpInst::ICMP_SLE) { + + auto op0 = selectcmpInst->getOperand(0); + auto op1 = selectcmpInst->getOperand(1); + + IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType()); + IntegerType *intTyOp1 = dyn_cast<IntegerType>(op1->getType()); + + /* this is probably not needed but we do it anyway */ + if (!intTyOp0 || !intTyOp1) { continue; } + + icomps.push_back(selectcmpInst); + + } + + } + + } + + } + + } + + if (!icomps.size()) { return false; } + + for (auto &IcmpInst : icomps) { + + BasicBlock *bb = IcmpInst->getParent(); + + auto op0 = IcmpInst->getOperand(0); + auto op1 = IcmpInst->getOperand(1); + + /* find out what the new predicate is going to be */ + auto pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate(); + CmpInst::Predicate new_pred; + switch (pred) { + + case CmpInst::ICMP_UGE: + new_pred = CmpInst::ICMP_UGT; + break; + case CmpInst::ICMP_SGE: + new_pred = CmpInst::ICMP_SGT; + break; + case CmpInst::ICMP_ULE: + new_pred = CmpInst::ICMP_ULT; + break; + case CmpInst::ICMP_SLE: + new_pred = CmpInst::ICMP_SLT; + break; + default: // keep the compiler happy + continue; + + } + + /* split before the icmp instruction */ + BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst)); + + /* the old bb now contains a unconditional jump to the new one (end_bb) + * we need to delete it later */ + + /* create the ICMP instruction with new_pred and add it to the old basic + * block bb it is now at the position where the old IcmpInst was */ + Instruction *icmp_np; + icmp_np = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_np); + + /* create a new basic block which holds the new EQ icmp */ + Instruction *icmp_eq; + /* insert middle_bb before end_bb */ + BasicBlock *middle_bb = + BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); + icmp_eq = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, op0, op1); + middle_bb->getInstList().push_back(icmp_eq); + /* add an unconditional branch to the end of middle_bb with destination + * end_bb */ + BranchInst::Create(end_bb, middle_bb); + + /* replace the uncond branch with a conditional one, which depends on the + * new_pred icmp. True goes to end, false to the middle (injected) bb */ + auto term = bb->getTerminator(); + BranchInst::Create(end_bb, middle_bb, icmp_np, bb); + term->eraseFromParent(); + + /* replace the old IcmpInst (which is the first inst in end_bb) with a PHI + * inst to wire up the loose ends */ + PHINode *PN = PHINode::Create(Int1Ty, 2, ""); + /* the first result depends on the outcome of icmp_eq */ + PN->addIncoming(icmp_eq, middle_bb); + /* if the source was the original bb we know that the icmp_np yielded true + * hence we can hardcode this value */ + PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); + /* replace the old IcmpInst with our new and shiny PHI inst */ + BasicBlock::iterator ii(IcmpInst); + ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); + + } + + return true; + +} + +/* this function transforms signed compares to equivalent unsigned compares */ +bool SplitComparesTransform::simplifyIntSignedness(Module &M) { + + LLVMContext & C = M.getContext(); + std::vector<Instruction *> icomps; + IntegerType * Int1Ty = IntegerType::getInt1Ty(C); + + /* iterate over all functions, bbs and instructions and add + * all signed compares to icomps vector */ + for (auto &F : M) { + + if (!isInInstrumentList(&F)) continue; + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CmpInst *selectcmpInst = nullptr; + + if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) { + + if (selectcmpInst->getPredicate() == CmpInst::ICMP_SGT || + selectcmpInst->getPredicate() == CmpInst::ICMP_SLT) { + + auto op0 = selectcmpInst->getOperand(0); + auto op1 = selectcmpInst->getOperand(1); + + IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType()); + IntegerType *intTyOp1 = dyn_cast<IntegerType>(op1->getType()); + + /* see above */ + if (!intTyOp0 || !intTyOp1) { continue; } + + /* i think this is not possible but to lazy to look it up */ + if (intTyOp0->getBitWidth() != intTyOp1->getBitWidth()) { + + continue; + + } + + icomps.push_back(selectcmpInst); + + } + + } + + } + + } + + } + + if (!icomps.size()) { return false; } + + for (auto &IcmpInst : icomps) { + + BasicBlock *bb = IcmpInst->getParent(); + + auto op0 = IcmpInst->getOperand(0); + auto op1 = IcmpInst->getOperand(1); + + IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType()); + unsigned bitw = intTyOp0->getBitWidth(); + IntegerType *IntType = IntegerType::get(C, bitw); + + /* get the new predicate */ + auto pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate(); + CmpInst::Predicate new_pred; + if (pred == CmpInst::ICMP_SGT) { + + new_pred = CmpInst::ICMP_UGT; + + } else { + + new_pred = CmpInst::ICMP_ULT; + + } + + BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst)); + + /* create a 1 bit compare for the sign bit. to do this shift and trunc + * the original operands so only the first bit remains.*/ + Instruction *s_op0, *t_op0, *s_op1, *t_op1, *icmp_sign_bit; + + s_op0 = BinaryOperator::Create(Instruction::LShr, op0, + ConstantInt::get(IntType, bitw - 1)); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0); + t_op0 = new TruncInst(s_op0, Int1Ty); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op0); + + s_op1 = BinaryOperator::Create(Instruction::LShr, op1, + ConstantInt::get(IntType, bitw - 1)); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1); + t_op1 = new TruncInst(s_op1, Int1Ty); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op1); + + /* compare of the sign bits */ + icmp_sign_bit = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_op0, t_op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_sign_bit); + + /* create a new basic block which is executed if the signedness bit is + * different */ + Instruction *icmp_inv_sig_cmp; + BasicBlock * sign_bb = + BasicBlock::Create(C, "sign", end_bb->getParent(), end_bb); + if (pred == CmpInst::ICMP_SGT) { + + /* if we check for > and the op0 positive and op1 negative then the final + * result is true. if op0 negative and op1 pos, the cmp must result + * in false + */ + icmp_inv_sig_cmp = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_op0, t_op1); + + } else { + + /* just the inverse of the above statement */ + icmp_inv_sig_cmp = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_op0, t_op1); + + } + + sign_bb->getInstList().push_back(icmp_inv_sig_cmp); + BranchInst::Create(end_bb, sign_bb); + + /* create a new bb which is executed if signedness is equal */ + Instruction *icmp_usign_cmp; + BasicBlock * middle_bb = + BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); + /* we can do a normal unsigned compare now */ + icmp_usign_cmp = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1); + middle_bb->getInstList().push_back(icmp_usign_cmp); + BranchInst::Create(end_bb, middle_bb); + + auto term = bb->getTerminator(); + /* if the sign is eq do a normal unsigned cmp, else we have to check the + * signedness bit */ + BranchInst::Create(middle_bb, sign_bb, icmp_sign_bit, bb); + term->eraseFromParent(); + + PHINode *PN = PHINode::Create(Int1Ty, 2, ""); + + PN->addIncoming(icmp_usign_cmp, middle_bb); + PN->addIncoming(icmp_inv_sig_cmp, sign_bb); + + BasicBlock::iterator ii(IcmpInst); + ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); + + } + + return true; + +} + +size_t SplitComparesTransform::nextPowerOfTwo(size_t in) { + + --in; + in |= in >> 1; + in |= in >> 2; + in |= in >> 4; + // in |= in >> 8; + // in |= in >> 16; + return in + 1; + +} + +/* splits fcmps into two nested fcmps with sign compare and the rest */ +size_t SplitComparesTransform::splitFPCompares(Module &M) { + + size_t count = 0; + + LLVMContext &C = M.getContext(); + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) + const DataLayout &dl = M.getDataLayout(); + + /* define unions with floating point and (sign, exponent, mantissa) triples + */ + if (dl.isLittleEndian()) { + + } else if (dl.isBigEndian()) { + + } else { + + return count; + + } + +#endif + + std::vector<CmpInst *> fcomps; + + /* get all EQ, NE, GT, and LT fcmps. if the other two + * functions were executed only these four predicates should exist */ + for (auto &F : M) { + + if (!isInInstrumentList(&F)) continue; + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CmpInst *selectcmpInst = nullptr; + + if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) { + + if (selectcmpInst->getPredicate() == CmpInst::FCMP_OEQ || + selectcmpInst->getPredicate() == CmpInst::FCMP_ONE || + selectcmpInst->getPredicate() == CmpInst::FCMP_UNE || + selectcmpInst->getPredicate() == CmpInst::FCMP_UGT || + selectcmpInst->getPredicate() == CmpInst::FCMP_OGT || + selectcmpInst->getPredicate() == CmpInst::FCMP_ULT || + selectcmpInst->getPredicate() == CmpInst::FCMP_OLT) { + + auto op0 = selectcmpInst->getOperand(0); + auto op1 = selectcmpInst->getOperand(1); + + Type *TyOp0 = op0->getType(); + Type *TyOp1 = op1->getType(); + + if (TyOp0 != TyOp1) { continue; } + + if (TyOp0->isArrayTy() || TyOp0->isVectorTy()) { continue; } + + fcomps.push_back(selectcmpInst); + + } + + } + + } + + } + + } + + if (!fcomps.size()) { return count; } + + IntegerType *Int1Ty = IntegerType::getInt1Ty(C); + + for (auto &FcmpInst : fcomps) { + + BasicBlock *bb = FcmpInst->getParent(); + + auto op0 = FcmpInst->getOperand(0); + auto op1 = FcmpInst->getOperand(1); + + unsigned op_size; + op_size = op0->getType()->getPrimitiveSizeInBits(); + + if (op_size != op1->getType()->getPrimitiveSizeInBits()) { continue; } + + const unsigned int sizeInBits = op0->getType()->getPrimitiveSizeInBits(); + const unsigned int precision = + sizeInBits == 32 + ? 24 + : sizeInBits == 64 + ? 53 + : sizeInBits == 128 ? 113 + : sizeInBits == 16 ? 11 + /* sizeInBits == 80 */ + : 65; + + const unsigned shiftR_exponent = precision - 1; + const unsigned long long mask_fraction = + (1ULL << (shiftR_exponent - 1)) | ((1ULL << (shiftR_exponent - 1)) - 1); + const unsigned long long mask_exponent = + (1ULL << (sizeInBits - precision)) - 1; + + // round up sizes to the next power of two + // this should help with integer compare splitting + size_t exTySizeBytes = ((sizeInBits - precision + 7) >> 3); + size_t frTySizeBytes = ((precision - 1ULL + 7) >> 3); + + IntegerType *IntExponentTy = + IntegerType::get(C, nextPowerOfTwo(exTySizeBytes) << 3); + IntegerType *IntFractionTy = + IntegerType::get(C, nextPowerOfTwo(frTySizeBytes) << 3); + + // errs() << "Fractions: IntFractionTy size " << + // IntFractionTy->getPrimitiveSizeInBits() << ", op_size " << op_size << + // ", mask " << mask_fraction << + // ", precision " << precision << "\n"; + + BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(FcmpInst)); + + /* create the integers from floats directly */ + Instruction *b_op0, *b_op1; + b_op0 = CastInst::Create(Instruction::BitCast, op0, + IntegerType::get(C, op_size)); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), b_op0); + + b_op1 = CastInst::Create(Instruction::BitCast, op1, + IntegerType::get(C, op_size)); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), b_op1); + + /* isolate signs of value of floating point type */ + + /* create a 1 bit compare for the sign bit. to do this shift and trunc + * the original operands so only the first bit remains.*/ + Instruction *s_s0, *t_s0, *s_s1, *t_s1, *icmp_sign_bit; + + s_s0 = + BinaryOperator::Create(Instruction::LShr, b_op0, + ConstantInt::get(b_op0->getType(), op_size - 1)); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_s0); + t_s0 = new TruncInst(s_s0, Int1Ty); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_s0); + + s_s1 = + BinaryOperator::Create(Instruction::LShr, b_op1, + ConstantInt::get(b_op1->getType(), op_size - 1)); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_s1); + t_s1 = new TruncInst(s_s1, Int1Ty); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_s1); + + /* compare of the sign bits */ + icmp_sign_bit = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_s0, t_s1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_sign_bit); + + /* create a new basic block which is executed if the signedness bits are + * equal */ + BasicBlock *signequal_bb = + BasicBlock::Create(C, "signequal", end_bb->getParent(), end_bb); + + BranchInst::Create(end_bb, signequal_bb); + + /* create a new bb which is executed if exponents are satisfying the compare + */ + BasicBlock *middle_bb = + BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); + + BranchInst::Create(end_bb, middle_bb); + + auto term = bb->getTerminator(); + /* if the signs are different goto end_bb else to signequal_bb */ + BranchInst::Create(signequal_bb, end_bb, icmp_sign_bit, bb); + term->eraseFromParent(); + + /* insert code for equal signs */ + + /* isolate the exponents */ + Instruction *s_e0, *m_e0, *t_e0, *s_e1, *m_e1, *t_e1; + + s_e0 = BinaryOperator::Create( + Instruction::LShr, b_op0, + ConstantInt::get(b_op0->getType(), shiftR_exponent)); + s_e1 = BinaryOperator::Create( + Instruction::LShr, b_op1, + ConstantInt::get(b_op1->getType(), shiftR_exponent)); + signequal_bb->getInstList().insert( + BasicBlock::iterator(signequal_bb->getTerminator()), s_e0); + signequal_bb->getInstList().insert( + BasicBlock::iterator(signequal_bb->getTerminator()), s_e1); + + t_e0 = new TruncInst(s_e0, IntExponentTy); + t_e1 = new TruncInst(s_e1, IntExponentTy); + signequal_bb->getInstList().insert( + BasicBlock::iterator(signequal_bb->getTerminator()), t_e0); + signequal_bb->getInstList().insert( + BasicBlock::iterator(signequal_bb->getTerminator()), t_e1); + + if (sizeInBits - precision < exTySizeBytes * 8) { + + m_e0 = BinaryOperator::Create( + Instruction::And, t_e0, + ConstantInt::get(t_e0->getType(), mask_exponent)); + m_e1 = BinaryOperator::Create( + Instruction::And, t_e1, + ConstantInt::get(t_e1->getType(), mask_exponent)); + signequal_bb->getInstList().insert( + BasicBlock::iterator(signequal_bb->getTerminator()), m_e0); + signequal_bb->getInstList().insert( + BasicBlock::iterator(signequal_bb->getTerminator()), m_e1); + + } else { + + m_e0 = t_e0; + m_e1 = t_e1; + + } + + /* compare the exponents of the operands */ + Instruction *icmp_exponents_equal; + Instruction *icmp_exponent_result; + BasicBlock * signequal2_bb = signequal_bb; + switch (FcmpInst->getPredicate()) { + + case CmpInst::FCMP_OEQ: + icmp_exponent_result = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, m_e0, m_e1); + break; + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UNE: + icmp_exponent_result = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_NE, m_e0, m_e1); + break; + /* compare the exponents of the operands (signs are equal) + * if exponents are equal -> proceed to mantissa comparison + * else get result depending on sign + */ + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: + Instruction *icmp_exponent; + icmp_exponents_equal = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, m_e0, m_e1); + signequal_bb->getInstList().insert( + BasicBlock::iterator(signequal_bb->getTerminator()), + icmp_exponents_equal); + + // shortcut for unequal exponents + signequal2_bb = signequal_bb->splitBasicBlock( + BasicBlock::iterator(signequal_bb->getTerminator())); + + /* if the exponents are equal goto middle_bb else to signequal2_bb */ + term = signequal_bb->getTerminator(); + BranchInst::Create(middle_bb, signequal2_bb, icmp_exponents_equal, + signequal_bb); + term->eraseFromParent(); + + icmp_exponent = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, m_e0, m_e1); + signequal2_bb->getInstList().insert( + BasicBlock::iterator(signequal2_bb->getTerminator()), + icmp_exponent); + icmp_exponent_result = + BinaryOperator::Create(Instruction::Xor, icmp_exponent, t_s0); + break; + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: + icmp_exponents_equal = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, m_e0, m_e1); + signequal_bb->getInstList().insert( + BasicBlock::iterator(signequal_bb->getTerminator()), + icmp_exponents_equal); + + // shortcut for unequal exponents + signequal2_bb = signequal_bb->splitBasicBlock( + BasicBlock::iterator(signequal_bb->getTerminator())); + + /* if the exponents are equal goto middle_bb else to signequal2_bb */ + term = signequal_bb->getTerminator(); + BranchInst::Create(middle_bb, signequal2_bb, icmp_exponents_equal, + signequal_bb); + term->eraseFromParent(); + + icmp_exponent = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, m_e0, m_e1); + signequal2_bb->getInstList().insert( + BasicBlock::iterator(signequal2_bb->getTerminator()), + icmp_exponent); + icmp_exponent_result = + BinaryOperator::Create(Instruction::Xor, icmp_exponent, t_s0); + break; + default: + continue; + + } + + signequal2_bb->getInstList().insert( + BasicBlock::iterator(signequal2_bb->getTerminator()), + icmp_exponent_result); + + { + + term = signequal2_bb->getTerminator(); + + switch (FcmpInst->getPredicate()) { + + case CmpInst::FCMP_OEQ: + /* if the exponents are satifying the compare do a fraction cmp in + * middle_bb */ + BranchInst::Create(middle_bb, end_bb, icmp_exponent_result, + signequal2_bb); + break; + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UNE: + /* if the exponents are satifying the compare do a fraction cmp in + * middle_bb */ + BranchInst::Create(end_bb, middle_bb, icmp_exponent_result, + signequal2_bb); + break; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: + BranchInst::Create(end_bb, signequal2_bb); + break; + default: + continue; + + } + + term->eraseFromParent(); + + } + + /* isolate the mantissa aka fraction */ + Instruction *t_f0, *t_f1; + bool needTrunc = IntFractionTy->getPrimitiveSizeInBits() < op_size; + + if (precision - 1 < frTySizeBytes * 8) { + + Instruction *m_f0, *m_f1; + m_f0 = BinaryOperator::Create( + Instruction::And, b_op0, + ConstantInt::get(b_op0->getType(), mask_fraction)); + m_f1 = BinaryOperator::Create( + Instruction::And, b_op1, + ConstantInt::get(b_op1->getType(), mask_fraction)); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), m_f0); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), m_f1); + + if (needTrunc) { + + t_f0 = new TruncInst(m_f0, IntFractionTy); + t_f1 = new TruncInst(m_f1, IntFractionTy); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), t_f0); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), t_f1); + + } else { + + t_f0 = m_f0; + t_f1 = m_f1; + + } + + } else { + + if (needTrunc) { + + t_f0 = new TruncInst(b_op0, IntFractionTy); + t_f1 = new TruncInst(b_op1, IntFractionTy); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), t_f0); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), t_f1); + + } else { + + t_f0 = b_op0; + t_f1 = b_op1; + + } + + } + + /* compare the fractions of the operands */ + Instruction *icmp_fraction_result; + Instruction *icmp_fraction_result2; + BasicBlock * middle2_bb = middle_bb; + PHINode * PN2 = nullptr; + switch (FcmpInst->getPredicate()) { + + case CmpInst::FCMP_OEQ: + icmp_fraction_result = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_f0, t_f1); + middle2_bb->getInstList().insert( + BasicBlock::iterator(middle2_bb->getTerminator()), + icmp_fraction_result); + + break; + case CmpInst::FCMP_UNE: + case CmpInst::FCMP_ONE: + icmp_fraction_result = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_NE, t_f0, t_f1); + middle2_bb->getInstList().insert( + BasicBlock::iterator(middle2_bb->getTerminator()), + icmp_fraction_result); + + break; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: { + + middle2_bb = middle_bb->splitBasicBlock( + BasicBlock::iterator(middle_bb->getTerminator())); + + BasicBlock *negative_bb = BasicBlock::Create( + C, "negative_value", middle2_bb->getParent(), middle2_bb); + BasicBlock *positive_bb = BasicBlock::Create( + C, "positive_value", negative_bb->getParent(), negative_bb); + + if (FcmpInst->getPredicate() == CmpInst::FCMP_OGT || + FcmpInst->getPredicate() == CmpInst::FCMP_UGT) { + + negative_bb->getInstList().push_back( + icmp_fraction_result = CmpInst::Create( + Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1)); + positive_bb->getInstList().push_back( + icmp_fraction_result2 = CmpInst::Create( + Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1)); + + } else { + + negative_bb->getInstList().push_back( + icmp_fraction_result = CmpInst::Create( + Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1)); + positive_bb->getInstList().push_back( + icmp_fraction_result2 = CmpInst::Create( + Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1)); + + } + + BranchInst::Create(middle2_bb, negative_bb); + BranchInst::Create(middle2_bb, positive_bb); + + term = middle_bb->getTerminator(); + BranchInst::Create(negative_bb, positive_bb, t_s0, middle_bb); + term->eraseFromParent(); + + PN2 = PHINode::Create(Int1Ty, 2, ""); + PN2->addIncoming(icmp_fraction_result, negative_bb); + PN2->addIncoming(icmp_fraction_result2, positive_bb); + middle2_bb->getInstList().insert( + BasicBlock::iterator(middle2_bb->getTerminator()), PN2); + + } break; + + default: + continue; + + } + + PHINode *PN = PHINode::Create(Int1Ty, 3, ""); + + switch (FcmpInst->getPredicate()) { + + case CmpInst::FCMP_OEQ: + /* unequal signs cannot be equal values */ + /* goto false branch */ + PN->addIncoming(ConstantInt::get(Int1Ty, 0), bb); + /* unequal exponents cannot be equal values, too */ + PN->addIncoming(ConstantInt::get(Int1Ty, 0), signequal_bb); + /* fractions comparison */ + PN->addIncoming(icmp_fraction_result, middle2_bb); + break; + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UNE: + /* unequal signs are unequal values */ + /* goto true branch */ + PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); + /* unequal exponents are unequal values, too */ + PN->addIncoming(icmp_exponent_result, signequal_bb); + /* fractions comparison */ + PN->addIncoming(icmp_fraction_result, middle2_bb); + break; + case CmpInst::FCMP_OGT: + case CmpInst::FCMP_UGT: + /* if op1 is negative goto true branch, + else go on comparing */ + PN->addIncoming(t_s1, bb); + PN->addIncoming(icmp_exponent_result, signequal2_bb); + PN->addIncoming(PN2, middle2_bb); + break; + case CmpInst::FCMP_OLT: + case CmpInst::FCMP_ULT: + /* if op0 is negative goto true branch, + else go on comparing */ + PN->addIncoming(t_s0, bb); + PN->addIncoming(icmp_exponent_result, signequal2_bb); + PN->addIncoming(PN2, middle2_bb); + break; + default: + continue; + + } + + BasicBlock::iterator ii(FcmpInst); + ReplaceInstWithInst(FcmpInst->getParent()->getInstList(), ii, PN); + ++count; + + } + + return count; + +} + +/* splits icmps of size bitw into two nested icmps with bitw/2 size each */ +size_t SplitComparesTransform::splitIntCompares(Module &M, unsigned bitw) { + + size_t count = 0; + + LLVMContext &C = M.getContext(); + + IntegerType *Int1Ty = IntegerType::getInt1Ty(C); + IntegerType *OldIntType = IntegerType::get(C, bitw); + IntegerType *NewIntType = IntegerType::get(C, bitw / 2); + + std::vector<Instruction *> icomps; + + if (bitw % 2) { return 0; } + + /* not supported yet */ + if (bitw > 64) { return 0; } + + /* get all EQ, NE, UGT, and ULT icmps of width bitw. if the + * functions simplifyCompares() and simplifyIntSignedness() + * were executed only these four predicates should exist */ + for (auto &F : M) { + + if (!isInInstrumentList(&F)) continue; + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CmpInst *selectcmpInst = nullptr; + + if ((selectcmpInst = dyn_cast<CmpInst>(&IN))) { + + if (selectcmpInst->getPredicate() == CmpInst::ICMP_EQ || + selectcmpInst->getPredicate() == CmpInst::ICMP_NE || + selectcmpInst->getPredicate() == CmpInst::ICMP_UGT || + selectcmpInst->getPredicate() == CmpInst::ICMP_ULT) { + + auto op0 = selectcmpInst->getOperand(0); + auto op1 = selectcmpInst->getOperand(1); + + IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType()); + IntegerType *intTyOp1 = dyn_cast<IntegerType>(op1->getType()); + + if (!intTyOp0 || !intTyOp1) { continue; } + + /* check if the bitwidths are the one we are looking for */ + if (intTyOp0->getBitWidth() != bitw || + intTyOp1->getBitWidth() != bitw) { + + continue; + + } + + icomps.push_back(selectcmpInst); + + } + + } + + } + + } + + } + + if (!icomps.size()) { return 0; } + + for (auto &IcmpInst : icomps) { + + BasicBlock *bb = IcmpInst->getParent(); + + auto op0 = IcmpInst->getOperand(0); + auto op1 = IcmpInst->getOperand(1); + + auto pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate(); + + BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst)); + + /* create the comparison of the top halves of the original operands */ + Instruction *s_op0, *op0_high, *s_op1, *op1_high, *icmp_high; + + s_op0 = BinaryOperator::Create(Instruction::LShr, op0, + ConstantInt::get(OldIntType, bitw / 2)); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0); + op0_high = new TruncInst(s_op0, NewIntType); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + op0_high); + + s_op1 = BinaryOperator::Create(Instruction::LShr, op1, + ConstantInt::get(OldIntType, bitw / 2)); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1); + op1_high = new TruncInst(s_op1, NewIntType); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + op1_high); + + icmp_high = CmpInst::Create(Instruction::ICmp, pred, op0_high, op1_high); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_high); + + /* now we have to destinguish between == != and > < */ + if (pred == CmpInst::ICMP_EQ || pred == CmpInst::ICMP_NE) { + + /* transformation for == and != icmps */ + + /* create a compare for the lower half of the original operands */ + Instruction *op0_low, *op1_low, *icmp_low; + BasicBlock * cmp_low_bb = + BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); + + op0_low = new TruncInst(op0, NewIntType); + cmp_low_bb->getInstList().push_back(op0_low); + + op1_low = new TruncInst(op1, NewIntType); + cmp_low_bb->getInstList().push_back(op1_low); + + icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low); + cmp_low_bb->getInstList().push_back(icmp_low); + BranchInst::Create(end_bb, cmp_low_bb); + + /* dependent on the cmp of the high parts go to the end or go on with + * the comparison */ + auto term = bb->getTerminator(); + if (pred == CmpInst::ICMP_EQ) { + + BranchInst::Create(cmp_low_bb, end_bb, icmp_high, bb); + + } else { + + /* CmpInst::ICMP_NE */ + BranchInst::Create(end_bb, cmp_low_bb, icmp_high, bb); + + } + + term->eraseFromParent(); + + /* create the PHI and connect the edges accordingly */ + PHINode *PN = PHINode::Create(Int1Ty, 2, ""); + PN->addIncoming(icmp_low, cmp_low_bb); + if (pred == CmpInst::ICMP_EQ) { + + PN->addIncoming(ConstantInt::get(Int1Ty, 0), bb); + + } else { + + /* CmpInst::ICMP_NE */ + PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); + + } + + /* replace the old icmp with the new PHI */ + BasicBlock::iterator ii(IcmpInst); + ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); + + } else { + + /* CmpInst::ICMP_UGT and CmpInst::ICMP_ULT */ + /* transformations for < and > */ + + /* create a basic block which checks for the inverse predicate. + * if this is true we can go to the end if not we have to go to the + * bb which checks the lower half of the operands */ + Instruction *icmp_inv_cmp, *op0_low, *op1_low, *icmp_low; + BasicBlock * inv_cmp_bb = + BasicBlock::Create(C, "inv_cmp", end_bb->getParent(), end_bb); + if (pred == CmpInst::ICMP_UGT) { + + icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, + op0_high, op1_high); + + } else { + + icmp_inv_cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, + op0_high, op1_high); + + } + + inv_cmp_bb->getInstList().push_back(icmp_inv_cmp); + + auto term = bb->getTerminator(); + term->eraseFromParent(); + BranchInst::Create(end_bb, inv_cmp_bb, icmp_high, bb); + + /* create a bb which handles the cmp of the lower halves */ + BasicBlock *cmp_low_bb = + BasicBlock::Create(C, "injected", end_bb->getParent(), end_bb); + op0_low = new TruncInst(op0, NewIntType); + cmp_low_bb->getInstList().push_back(op0_low); + op1_low = new TruncInst(op1, NewIntType); + cmp_low_bb->getInstList().push_back(op1_low); + + icmp_low = CmpInst::Create(Instruction::ICmp, pred, op0_low, op1_low); + cmp_low_bb->getInstList().push_back(icmp_low); + BranchInst::Create(end_bb, cmp_low_bb); + + BranchInst::Create(end_bb, cmp_low_bb, icmp_inv_cmp, inv_cmp_bb); + + PHINode *PN = PHINode::Create(Int1Ty, 3); + PN->addIncoming(icmp_low, cmp_low_bb); + PN->addIncoming(ConstantInt::get(Int1Ty, 1), bb); + PN->addIncoming(ConstantInt::get(Int1Ty, 0), inv_cmp_bb); + + BasicBlock::iterator ii(IcmpInst); + ReplaceInstWithInst(IcmpInst->getParent()->getInstList(), ii, PN); + + } + + ++count; + + } + + return count; + +} + +bool SplitComparesTransform::runOnModule(Module &M) { + + int bitw = 64; + size_t count; + + char *bitw_env = getenv("AFL_LLVM_LAF_SPLIT_COMPARES_BITW"); + if (!bitw_env) bitw_env = getenv("LAF_SPLIT_COMPARES_BITW"); + if (bitw_env) { bitw = atoi(bitw_env); } + + enableFPSplit = getenv("AFL_LLVM_LAF_SPLIT_FLOATS") != NULL; + + if ((isatty(2) && getenv("AFL_QUIET") == NULL) || + getenv("AFL_DEBUG") != NULL) { + + errs() << "Split-compare-pass by laf.intel@gmail.com, extended by " + "heiko@hexco.de\n"; + + } else { + + be_quiet = 1; + + } + + if (enableFPSplit) { + + count = splitFPCompares(M); + + if (!be_quiet) { + + errs() << "Split-floatingpoint-compare-pass: " << count + << " FP comparisons split\n"; + + } + + simplifyFPCompares(M); + + } + + simplifyCompares(M); + + simplifyIntSignedness(M); + + switch (bitw) { + + case 64: + count = splitIntCompares(M, bitw); + if (!be_quiet) + errs() << "Split-integer-compare-pass " << bitw << "bit: " << count + << " split\n"; + + bitw >>= 1; +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) + [[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */ +#endif + case 32: + count = splitIntCompares(M, bitw); + if (!be_quiet) + errs() << "Split-integer-compare-pass " << bitw << "bit: " << count + << " split\n"; + + bitw >>= 1; +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) + [[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */ +#endif + case 16: + count = splitIntCompares(M, bitw); + if (!be_quiet) + errs() << "Split-integer-compare-pass " << bitw << "bit: " << count + << " split\n"; + + bitw >>= 1; + break; + + default: + if (!be_quiet) errs() << "NOT Running split-compare-pass \n"; + return false; + break; + + } + + verifyModule(M); + return true; + +} + +static void registerSplitComparesPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + PM.add(new SplitComparesTransform()); + +} + +static RegisterStandardPasses RegisterSplitComparesPass( + PassManagerBuilder::EP_OptimizerLast, registerSplitComparesPass); + +static RegisterStandardPasses RegisterSplitComparesTransPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerSplitComparesPass); + +#if LLVM_VERSION_MAJOR >= 11 +static RegisterStandardPasses RegisterSplitComparesTransPassLTO( + PassManagerBuilder::EP_FullLinkTimeOptimizationLast, + registerSplitComparesPass); +#endif + diff --git a/instrumentation/split-switches-pass.so.cc b/instrumentation/split-switches-pass.so.cc new file mode 100644 index 00000000..a79d4114 --- /dev/null +++ b/instrumentation/split-switches-pass.so.cc @@ -0,0 +1,447 @@ +/* + * Copyright 2016 laf-intel + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <list> +#include <string> +#include <fstream> +#include <sys/time.h> + +#include "llvm/Config/llvm-config.h" + +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ValueTracking.h" + +#include "llvm/IR/IRBuilder.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) + #include "llvm/IR/Verifier.h" + #include "llvm/IR/DebugInfo.h" +#else + #include "llvm/Analysis/Verifier.h" + #include "llvm/DebugInfo.h" + #define nullptr 0 +#endif + +#include <set> +#include "afl-llvm-common.h" + +using namespace llvm; + +namespace { + +class SplitSwitchesTransform : public ModulePass { + + public: + static char ID; + SplitSwitchesTransform() : ModulePass(ID) { + + initInstrumentList(); + + } + + bool runOnModule(Module &M) override; + +#if LLVM_VERSION_MAJOR >= 4 + StringRef getPassName() const override { + +#else + const char *getPassName() const override { + +#endif + return "splits switch constructs"; + + } + + struct CaseExpr { + + ConstantInt *Val; + BasicBlock * BB; + + CaseExpr(ConstantInt *val = nullptr, BasicBlock *bb = nullptr) + : Val(val), BB(bb) { + + } + + }; + + typedef std::vector<CaseExpr> CaseVector; + + private: + bool splitSwitches(Module &M); + bool transformCmps(Module &M, const bool processStrcmp, + const bool processMemcmp); + BasicBlock *switchConvert(CaseVector Cases, std::vector<bool> bytesChecked, + BasicBlock *OrigBlock, BasicBlock *NewDefault, + Value *Val, unsigned level); + +}; + +} // namespace + +char SplitSwitchesTransform::ID = 0; + +/* switchConvert - Transform simple list of Cases into list of CaseRange's */ +BasicBlock *SplitSwitchesTransform::switchConvert( + CaseVector Cases, std::vector<bool> bytesChecked, BasicBlock *OrigBlock, + BasicBlock *NewDefault, Value *Val, unsigned level) { + + unsigned ValTypeBitWidth = Cases[0].Val->getBitWidth(); + IntegerType *ValType = + IntegerType::get(OrigBlock->getContext(), ValTypeBitWidth); + IntegerType * ByteType = IntegerType::get(OrigBlock->getContext(), 8); + unsigned BytesInValue = bytesChecked.size(); + std::vector<uint8_t> setSizes; + std::vector<std::set<uint8_t> > byteSets(BytesInValue, std::set<uint8_t>()); + + assert(ValTypeBitWidth >= 8 && ValTypeBitWidth <= 64); + + /* for each of the possible cases we iterate over all bytes of the values + * build a set of possible values at each byte position in byteSets */ + for (CaseExpr &Case : Cases) { + + for (unsigned i = 0; i < BytesInValue; i++) { + + uint8_t byte = (Case.Val->getZExtValue() >> (i * 8)) & 0xFF; + byteSets[i].insert(byte); + + } + + } + + /* find the index of the first byte position that was not yet checked. then + * save the number of possible values at that byte position */ + unsigned smallestIndex = 0; + unsigned smallestSize = 257; + for (unsigned i = 0; i < byteSets.size(); i++) { + + if (bytesChecked[i]) continue; + if (byteSets[i].size() < smallestSize) { + + smallestIndex = i; + smallestSize = byteSets[i].size(); + + } + + } + + assert(bytesChecked[smallestIndex] == false); + + /* there are only smallestSize different bytes at index smallestIndex */ + + Instruction *Shift, *Trunc; + Function * F = OrigBlock->getParent(); + BasicBlock * NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock", F); + Shift = BinaryOperator::Create(Instruction::LShr, Val, + ConstantInt::get(ValType, smallestIndex * 8)); + NewNode->getInstList().push_back(Shift); + + if (ValTypeBitWidth > 8) { + + Trunc = new TruncInst(Shift, ByteType); + NewNode->getInstList().push_back(Trunc); + + } else { + + /* not necessary to trunc */ + Trunc = Shift; + + } + + /* this is a trivial case, we can directly check for the byte, + * if the byte is not found go to default. if the byte was found + * mark the byte as checked. if this was the last byte to check + * we can finally execute the block belonging to this case */ + + if (smallestSize == 1) { + + uint8_t byte = *(byteSets[smallestIndex].begin()); + + /* insert instructions to check whether the value we are switching on is + * equal to byte */ + ICmpInst *Comp = + new ICmpInst(ICmpInst::ICMP_EQ, Trunc, ConstantInt::get(ByteType, byte), + "byteMatch"); + NewNode->getInstList().push_back(Comp); + + bytesChecked[smallestIndex] = true; + bool allBytesAreChecked = true; + + for (std::vector<bool>::iterator BCI = bytesChecked.begin(), + E = bytesChecked.end(); + BCI != E; ++BCI) { + + if (!*BCI) { + + allBytesAreChecked = false; + break; + + } + + } + + // if (std::all_of(bytesChecked.begin(), bytesChecked.end(), + // [](bool b) { return b; })) { + + if (allBytesAreChecked) { + + assert(Cases.size() == 1); + BranchInst::Create(Cases[0].BB, NewDefault, Comp, NewNode); + + /* we have to update the phi nodes! */ + for (BasicBlock::iterator I = Cases[0].BB->begin(); + I != Cases[0].BB->end(); ++I) { + + if (!isa<PHINode>(&*I)) { continue; } + PHINode *PN = cast<PHINode>(I); + + /* Only update the first occurrence. */ + unsigned Idx = 0, E = PN->getNumIncomingValues(); + for (; Idx != E; ++Idx) { + + if (PN->getIncomingBlock(Idx) == OrigBlock) { + + PN->setIncomingBlock(Idx, NewNode); + break; + + } + + } + + } + + } else { + + BasicBlock *BB = switchConvert(Cases, bytesChecked, OrigBlock, NewDefault, + Val, level + 1); + BranchInst::Create(BB, NewDefault, Comp, NewNode); + + } + + } + + /* there is no byte which we can directly check on, split the tree */ + else { + + std::vector<uint8_t> byteVector; + std::copy(byteSets[smallestIndex].begin(), byteSets[smallestIndex].end(), + std::back_inserter(byteVector)); + std::sort(byteVector.begin(), byteVector.end()); + uint8_t pivot = byteVector[byteVector.size() / 2]; + + /* we already chose to divide the cases based on the value of byte at index + * smallestIndex the pivot value determines the threshold for the decicion; + * if a case value + * is smaller at this byte index move it to the LHS vector, otherwise to the + * RHS vector */ + + CaseVector LHSCases, RHSCases; + + for (CaseExpr &Case : Cases) { + + uint8_t byte = (Case.Val->getZExtValue() >> (smallestIndex * 8)) & 0xFF; + + if (byte < pivot) { + + LHSCases.push_back(Case); + + } else { + + RHSCases.push_back(Case); + + } + + } + + BasicBlock *LBB, *RBB; + LBB = switchConvert(LHSCases, bytesChecked, OrigBlock, NewDefault, Val, + level + 1); + RBB = switchConvert(RHSCases, bytesChecked, OrigBlock, NewDefault, Val, + level + 1); + + /* insert instructions to check whether the value we are switching on is + * equal to byte */ + ICmpInst *Comp = + new ICmpInst(ICmpInst::ICMP_ULT, Trunc, + ConstantInt::get(ByteType, pivot), "byteMatch"); + NewNode->getInstList().push_back(Comp); + BranchInst::Create(LBB, RBB, Comp, NewNode); + + } + + return NewNode; + +} + +bool SplitSwitchesTransform::splitSwitches(Module &M) { + +#if (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7) + LLVMContext &C = M.getContext(); +#endif + + std::vector<SwitchInst *> switches; + + /* iterate over all functions, bbs and instruction and add + * all switches to switches vector for later processing */ + for (auto &F : M) { + + if (!isInInstrumentList(&F)) continue; + + for (auto &BB : F) { + + SwitchInst *switchInst = nullptr; + + if ((switchInst = dyn_cast<SwitchInst>(BB.getTerminator()))) { + + if (switchInst->getNumCases() < 1) continue; + switches.push_back(switchInst); + + } + + } + + } + + if (!switches.size()) return false; + if (!be_quiet) + errs() << "Rewriting " << switches.size() << " switch statements " + << "\n"; + + for (auto &SI : switches) { + + BasicBlock *CurBlock = SI->getParent(); + BasicBlock *OrigBlock = CurBlock; + Function * F = CurBlock->getParent(); + /* this is the value we are switching on */ + Value * Val = SI->getCondition(); + BasicBlock *Default = SI->getDefaultDest(); + unsigned bitw = Val->getType()->getIntegerBitWidth(); + + if (!be_quiet) + errs() << "switch: " << SI->getNumCases() << " cases " << bitw + << " bit\n"; + + /* If there is only the default destination or the condition checks 8 bit or + * less, don't bother with the code below. */ + if (!SI->getNumCases() || bitw <= 8) { + + if (!be_quiet) errs() << "skip trivial switch..\n"; + continue; + + } + + /* Create a new, empty default block so that the new hierarchy of + * if-then statements go to this and the PHI nodes are happy. + * if the default block is set as an unreachable we avoid creating one + * because will never be a valid target.*/ + BasicBlock *NewDefault = nullptr; + NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault", F, Default); + BranchInst::Create(Default, NewDefault); + + /* Prepare cases vector. */ + CaseVector Cases; + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; + ++i) +#if LLVM_VERSION_MAJOR < 5 + Cases.push_back(CaseExpr(i.getCaseValue(), i.getCaseSuccessor())); +#else + Cases.push_back(CaseExpr(i->getCaseValue(), i->getCaseSuccessor())); +#endif + /* bugfix thanks to pbst + * round up bytesChecked (in case getBitWidth() % 8 != 0) */ + std::vector<bool> bytesChecked((7 + Cases[0].Val->getBitWidth()) / 8, + false); + BasicBlock * SwitchBlock = + switchConvert(Cases, bytesChecked, OrigBlock, NewDefault, Val, 0); + + /* Branch to our shiny new if-then stuff... */ + BranchInst::Create(SwitchBlock, OrigBlock); + + /* We are now done with the switch instruction, delete it. */ + CurBlock->getInstList().erase(SI); + + /* we have to update the phi nodes! */ + for (BasicBlock::iterator I = Default->begin(); I != Default->end(); ++I) { + + if (!isa<PHINode>(&*I)) { continue; } + PHINode *PN = cast<PHINode>(I); + + /* Only update the first occurrence. */ + unsigned Idx = 0, E = PN->getNumIncomingValues(); + for (; Idx != E; ++Idx) { + + if (PN->getIncomingBlock(Idx) == OrigBlock) { + + PN->setIncomingBlock(Idx, NewDefault); + break; + + } + + } + + } + + } + + verifyModule(M); + return true; + +} + +bool SplitSwitchesTransform::runOnModule(Module &M) { + + if ((isatty(2) && getenv("AFL_QUIET") == NULL) || getenv("AFL_DEBUG") != NULL) + llvm::errs() << "Running split-switches-pass by laf.intel@gmail.com\n"; + else + be_quiet = 1; + splitSwitches(M); + verifyModule(M); + + return true; + +} + +static void registerSplitSwitchesTransPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + auto p = new SplitSwitchesTransform(); + PM.add(p); + +} + +static RegisterStandardPasses RegisterSplitSwitchesTransPass( + PassManagerBuilder::EP_OptimizerLast, registerSplitSwitchesTransPass); + +static RegisterStandardPasses RegisterSplitSwitchesTransPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerSplitSwitchesTransPass); + +#if LLVM_VERSION_MAJOR >= 11 +static RegisterStandardPasses RegisterSplitSwitchesTransPassLTO( + PassManagerBuilder::EP_FullLinkTimeOptimizationLast, + registerSplitSwitchesTransPass); +#endif + |