diff options
Diffstat (limited to 'instrumentation')
-rw-r--r-- | instrumentation/README.laf-intel.md | 5 | ||||
-rw-r--r-- | instrumentation/README.llvm.md | 4 | ||||
-rw-r--r-- | instrumentation/SanitizerCoverageLTO.so.cc | 232 | ||||
-rw-r--r-- | instrumentation/SanitizerCoveragePCGUARD.so.cc | 309 | ||||
-rw-r--r-- | instrumentation/afl-compiler-rt.o.c | 235 | ||||
-rw-r--r-- | instrumentation/afl-gcc-pass.so.cc | 2 | ||||
-rw-r--r-- | instrumentation/afl-llvm-dict2file.so.cc | 42 | ||||
-rw-r--r-- | instrumentation/afl-llvm-lto-instrumentation.so.cc | 1118 | ||||
-rw-r--r-- | instrumentation/afl-llvm-lto-instrumentlist.so.cc | 9 | ||||
-rw-r--r-- | instrumentation/afl-llvm-pass.so.cc | 5 | ||||
-rw-r--r-- | instrumentation/afl-llvm-rt-lto.o.c | 2 | ||||
-rw-r--r-- | instrumentation/cmplog-instructions-pass.cc | 215 | ||||
-rw-r--r-- | instrumentation/cmplog-routines-pass.cc | 246 | ||||
-rw-r--r-- | instrumentation/cmplog-switches-pass.cc | 2 | ||||
-rw-r--r-- | instrumentation/compare-transform-pass.so.cc | 10 | ||||
-rw-r--r-- | instrumentation/split-compares-pass.so.cc | 14 | ||||
-rw-r--r-- | instrumentation/split-switches-pass.so.cc | 2 |
17 files changed, 1132 insertions, 1320 deletions
diff --git a/instrumentation/README.laf-intel.md b/instrumentation/README.laf-intel.md index 229807e8..789055ed 100644 --- a/instrumentation/README.laf-intel.md +++ b/instrumentation/README.laf-intel.md @@ -3,9 +3,8 @@ ## Introduction This originally is the work of an individual nicknamed laf-intel. -His blog [Circumventing Fuzzing Roadblocks with Compiler Transformations] -(https://lafintel.wordpress.com/) and gitlab repo [laf-llvm-pass] -(https://gitlab.com/laf-intel/laf-llvm-pass/) +His blog [Circumventing Fuzzing Roadblocks with Compiler Transformations](https://lafintel.wordpress.com/) +and gitlab repo [laf-llvm-pass](https://gitlab.com/laf-intel/laf-llvm-pass/) describe some code transformations that help AFL++ to enter conditional blocks, where conditions consist of comparisons of large values. diff --git a/instrumentation/README.llvm.md b/instrumentation/README.llvm.md index 6e210a7c..dbb604f2 100644 --- a/instrumentation/README.llvm.md +++ b/instrumentation/README.llvm.md @@ -2,7 +2,7 @@ (See [../README.md](../README.md) for the general instruction manual.) - (See [README.gcc_plugin.md](../README.gcc_plugin.md) for the GCC-based instrumentation.) + (See [README.gcc_plugin.md](README.gcc_plugin.md) for the GCC-based instrumentation.) ## 1) Introduction @@ -75,7 +75,7 @@ load modules (you'll see "Service unavailable" when loading afl-llvm-pass.so). To solve all your problems, you can grab pre-built binaries for your OS from: - http://llvm.org/releases/download.html + https://llvm.org/releases/download.html ...and then put the bin/ directory from the tarball at the beginning of your $PATH when compiling the feature and building packages later on. You don't need diff --git a/instrumentation/SanitizerCoverageLTO.so.cc b/instrumentation/SanitizerCoverageLTO.so.cc index e06f8b93..37726607 100644 --- a/instrumentation/SanitizerCoverageLTO.so.cc +++ b/instrumentation/SanitizerCoverageLTO.so.cc @@ -235,6 +235,8 @@ class ModuleSanitizerCoverage { uint32_t autodictionary = 1; uint32_t inst = 0; uint32_t afl_global_id = 0; + uint32_t unhandled = 0; + uint32_t select_cnt = 0; uint64_t map_addr = 0; const char * skip_nozero = NULL; const char * use_threadsafe_counters = nullptr; @@ -250,7 +252,7 @@ class ModuleSanitizerCoverage { Module * Mo = NULL; GlobalVariable * AFLMapPtr = NULL; Value * MapPtrFixed = NULL; - FILE * documentFile = NULL; + std::ofstream dFile; size_t found = 0; // afl++ END @@ -446,8 +448,8 @@ bool ModuleSanitizerCoverage::instrumentModule( if ((ptr = getenv("AFL_LLVM_DOCUMENT_IDS")) != NULL) { - if ((documentFile = fopen(ptr, "a")) == NULL) - WARNF("Cannot access document file %s", ptr); + dFile.open(ptr, std::ofstream::out | std::ofstream::app); + if (dFile.is_open()) WARNF("Cannot access document file %s", ptr); } @@ -619,7 +621,6 @@ bool ModuleSanitizerCoverage::instrumentModule( bool isStrncasecmp = true; bool isIntMemcpy = true; bool isStdString = true; - bool addedNull = false; size_t optLen = 0; Function *Callee = callInst->getCalledFunction(); @@ -799,7 +800,6 @@ bool ModuleSanitizerCoverage::instrumentModule( if (literalLength + 1 == optLength) { Str2.append("\0", 1); // add null byte - // addedNull = true; } @@ -907,8 +907,8 @@ bool ModuleSanitizerCoverage::instrumentModule( if (optLen < 2) { continue; } if (literalLength + 1 == optLen) { // add null byte + thestring.append("\0", 1); - addedNull = true; } @@ -920,14 +920,18 @@ bool ModuleSanitizerCoverage::instrumentModule( // was not already added if (!isMemcmp) { - if (addedNull == false && thestring[optLen - 1] != '\0') { + /* + if (addedNull == false && thestring[optLen - 1] != + '\0') { - thestring.append("\0", 1); // add null byte - optLen++; + thestring.append("\0", 1); // add null byte + optLen++; - } + } - if (!isStdString) { + */ + if (!isStdString && + thestring.find('\0', 0) != std::string::npos) { // ensure we do not have garbage size_t offset = thestring.find('\0', 0); @@ -1003,12 +1007,7 @@ bool ModuleSanitizerCoverage::instrumentModule( instrumentFunction(F, DTCallback, PDTCallback); // afl++ START - if (documentFile) { - - fclose(documentFile); - documentFile = NULL; - - } + if (dFile.is_open()) dFile.close(); if (!getenv("AFL_LLVM_LTO_DONTWRITEID") || dictionary.size() || map_addr) { @@ -1045,8 +1044,7 @@ bool ModuleSanitizerCoverage::instrumentModule( M, Int64Tyi, true, GlobalValue::ExternalLinkage, 0, "__afl_map_addr"); ConstantInt *MapAddr = ConstantInt::get(Int64Tyi, map_addr); StoreInst * StoreMapAddr = IRB.CreateStore(MapAddr, AFLMapAddrFixed); - StoreMapAddr->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(Ctx, None)); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(StoreMapAddr); } @@ -1054,22 +1052,20 @@ bool ModuleSanitizerCoverage::instrumentModule( uint32_t write_loc = afl_global_id; - if (afl_global_id % 8) write_loc = (((afl_global_id + 8) >> 3) << 3); + write_loc = (((afl_global_id + 8) >> 3) << 3); GlobalVariable *AFLFinalLoc = new GlobalVariable(M, Int32Tyi, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc"); ConstantInt *const_loc = ConstantInt::get(Int32Tyi, write_loc); StoreInst * StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc); - StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(Ctx, None)); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(StoreFinalLoc); } if (dictionary.size()) { size_t memlen = 0, count = 0, offset = 0; - char * ptr; // sort and unique the dictionary std::sort(dictionary.begin(), dictionary.end()); @@ -1089,13 +1085,7 @@ bool ModuleSanitizerCoverage::instrumentModule( if (count) { - if ((ptr = (char *)malloc(memlen + count)) == NULL) { - - fprintf(stderr, "Error: malloc for %lu bytes failed!\n", - memlen + count); - exit(-1); - - } + auto ptrhld = std::unique_ptr<char[]>(new char[memlen + count]); count = 0; @@ -1103,8 +1093,8 @@ bool ModuleSanitizerCoverage::instrumentModule( if (offset + token.length() < 0xfffff0 && count < MAX_AUTO_EXTRAS) { - ptr[offset++] = (uint8_t)token.length(); - memcpy(ptr + offset, token.c_str(), token.length()); + ptrhld.get()[offset++] = (uint8_t)token.length(); + memcpy(ptrhld.get() + offset, token.c_str(), token.length()); offset += token.length(); count++; @@ -1117,17 +1107,16 @@ bool ModuleSanitizerCoverage::instrumentModule( 0, "__afl_dictionary_len"); ConstantInt *const_len = ConstantInt::get(Int32Tyi, offset); StoreInst *StoreDictLen = IRB.CreateStore(const_len, AFLDictionaryLen); - StoreDictLen->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(Ctx, None)); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(StoreDictLen); ArrayType *ArrayTy = ArrayType::get(IntegerType::get(Ctx, 8), offset); GlobalVariable *AFLInternalDictionary = new GlobalVariable( M, ArrayTy, true, GlobalValue::ExternalLinkage, ConstantDataArray::get(Ctx, - *(new ArrayRef<char>((char *)ptr, offset))), + *(new ArrayRef<char>(ptrhld.get(), offset))), "__afl_internal_dictionary"); AFLInternalDictionary->setInitializer(ConstantDataArray::get( - Ctx, *(new ArrayRef<char>((char *)ptr, offset)))); + Ctx, *(new ArrayRef<char>(ptrhld.get(), offset)))); AFLInternalDictionary->setConstant(true); GlobalVariable *AFLDictionary = new GlobalVariable( @@ -1138,8 +1127,7 @@ bool ModuleSanitizerCoverage::instrumentModule( Value *AFLDictPtr = IRB.CreatePointerCast(AFLDictOff, PointerType::get(Int8Tyi, 0)); StoreInst *StoreDict = IRB.CreateStore(AFLDictPtr, AFLDictionary); - StoreDict->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(Ctx, None)); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(StoreDict); } @@ -1156,15 +1144,16 @@ bool ModuleSanitizerCoverage::instrumentModule( else { char modeline[100]; - snprintf(modeline, sizeof(modeline), "%s%s%s%s%s", + snprintf(modeline, sizeof(modeline), "%s%s%s%s%s%s", getenv("AFL_HARDEN") ? "hardened" : "non-hardened", getenv("AFL_USE_ASAN") ? ", ASAN" : "", getenv("AFL_USE_MSAN") ? ", MSAN" : "", + getenv("AFL_USE_TSAN") ? ", TSAN" : "", getenv("AFL_USE_CFISAN") ? ", CFISAN" : "", getenv("AFL_USE_UBSAN") ? ", UBSAN" : ""); - OKF("Instrumented %u locations with no collisions (on average %llu " - "collisions would be in afl-gcc/vanilla AFL) (%s mode).", - inst, calculateCollisions(inst), modeline); + OKF("Instrumented %u locations (%u selects) without collisions (%llu " + "collisions have been avoided) (%s mode).", + inst, select_cnt, calculateCollisions(inst), modeline); } @@ -1286,6 +1275,7 @@ void ModuleSanitizerCoverage::instrumentFunction( const DominatorTree * DT = DTCallback(F); const PostDominatorTree *PDT = PDTCallback(F); bool IsLeafFunc = true; + uint32_t skip_next = 0, local_selects = 0; for (auto &BB : F) { @@ -1303,6 +1293,148 @@ void ModuleSanitizerCoverage::instrumentFunction( Value *val = ConstantInt::get(Int32Ty, ++afl_global_id); callInst->setOperand(1, val); + ++inst; + + } + + SelectInst *selectInst = nullptr; + + /* + std::string errMsg; + raw_string_ostream os(errMsg); + IN.print(os); + fprintf(stderr, "X(%u): %s\n", skip_next, os.str().c_str()); + */ + if (!skip_next && (selectInst = dyn_cast<SelectInst>(&IN))) { + + uint32_t vector_cnt = 0; + Value * condition = selectInst->getCondition(); + Value * result; + auto t = condition->getType(); + IRBuilder<> IRB(selectInst->getNextNode()); + + ++select_cnt; + + if (t->getTypeID() == llvm::Type::IntegerTyID) { + + Value *val1 = ConstantInt::get(Int32Ty, ++afl_global_id); + Value *val2 = ConstantInt::get(Int32Ty, ++afl_global_id); + result = IRB.CreateSelect(condition, val1, val2); + skip_next = 1; + inst += 2; + + } else + +#if LLVM_VERSION_MAJOR > 13 + if (t->getTypeID() == llvm::Type::FixedVectorTyID) { + + FixedVectorType *tt = dyn_cast<FixedVectorType>(t); + if (tt) { + + uint32_t elements = tt->getElementCount().getFixedValue(); + vector_cnt = elements; + inst += vector_cnt * 2; + if (elements) { + + FixedVectorType *GuardPtr1 = + FixedVectorType::get(Int32Ty, elements); + FixedVectorType *GuardPtr2 = + FixedVectorType::get(Int32Ty, elements); + Value *x, *y; + + Value *val1 = ConstantInt::get(Int32Ty, ++afl_global_id); + Value *val2 = ConstantInt::get(Int32Ty, ++afl_global_id); + x = IRB.CreateInsertElement(GuardPtr1, val1, (uint64_t)0); + y = IRB.CreateInsertElement(GuardPtr2, val2, (uint64_t)0); + + for (uint64_t i = 1; i < elements; i++) { + + val1 = ConstantInt::get(Int32Ty, ++afl_global_id); + val2 = ConstantInt::get(Int32Ty, ++afl_global_id); + x = IRB.CreateInsertElement(GuardPtr1, val1, i); + y = IRB.CreateInsertElement(GuardPtr2, val2, i); + + } + + result = IRB.CreateSelect(condition, x, y); + skip_next = 1; + + } + + } + + } else + +#endif + { + + unhandled++; + continue; + + } + + local_selects++; + uint32_t vector_cur = 0; + /* Load SHM pointer */ + LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(MapPtr); + + while (1) { + + /* Get CurLoc */ + Value *MapPtrIdx = nullptr; + + /* Load counter for CurLoc */ + if (!vector_cnt) { + + MapPtrIdx = IRB.CreateGEP(MapPtr, result); + + } else { + + auto element = IRB.CreateExtractElement(result, vector_cur++); + MapPtrIdx = IRB.CreateGEP(MapPtr, element); + + } + + if (use_threadsafe_counters) { + + IRB.CreateAtomicRMW(llvm::AtomicRMWInst::BinOp::Add, MapPtrIdx, One, +#if LLVM_VERSION_MAJOR >= 13 + llvm::MaybeAlign(1), +#endif + llvm::AtomicOrdering::Monotonic); + + } else { + + LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(Counter); + + /* Update bitmap */ + + Value *Incr = IRB.CreateAdd(Counter, One); + + if (skip_nozero == NULL) { + + auto cf = IRB.CreateICmpEQ(Incr, Zero); + auto carry = IRB.CreateZExt(cf, Int8Ty); + Incr = IRB.CreateAdd(Incr, carry); + + } + + auto nosan = IRB.CreateStore(Incr, MapPtrIdx); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(nosan); + + } + + if (!vector_cnt || vector_cnt == vector_cur) { break; } + + } + + skip_next = 1; + + } else { + + skip_next = 0; } @@ -1509,12 +1641,12 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, // afl++ START ++afl_global_id; - if (documentFile) { + if (dFile.is_open()) { unsigned long long int moduleID = (((unsigned long long int)(rand() & 0xffffffff)) << 32) | getpid(); - fprintf(documentFile, "ModuleID=%llu Function=%s edgeID=%u\n", moduleID, - F.getName().str().c_str(), afl_global_id); + dFile << "ModuleID=" << moduleID << " Function=" << F.getName().str() + << " edgeID=" << afl_global_id << "\n"; } @@ -1533,8 +1665,7 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } else { LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); - MapPtr->setMetadata(Mo->getMDKindID("nosanitize"), - MDNode::get(*Ct, None)); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(MapPtr); MapPtrIdx = IRB.CreateGEP(MapPtr, CurLoc); } @@ -1551,8 +1682,7 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } else { LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); - Counter->setMetadata(Mo->getMDKindID("nosanitize"), - MDNode::get(*Ct, None)); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(Counter); Value *Incr = IRB.CreateAdd(Counter, One); @@ -1564,8 +1694,8 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } - IRB.CreateStore(Incr, MapPtrIdx) - ->setMetadata(Mo->getMDKindID("nosanitize"), MDNode::get(*Ct, None)); + auto nosan = IRB.CreateStore(Incr, MapPtrIdx); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(nosan); } diff --git a/instrumentation/SanitizerCoveragePCGUARD.so.cc b/instrumentation/SanitizerCoveragePCGUARD.so.cc index 48ad2d02..76bb2448 100644 --- a/instrumentation/SanitizerCoveragePCGUARD.so.cc +++ b/instrumentation/SanitizerCoveragePCGUARD.so.cc @@ -203,7 +203,7 @@ class ModuleSanitizerCoverage { SanitizerCoverageOptions Options; - uint32_t instr = 0; + uint32_t instr = 0, selects = 0, unhandled = 0; GlobalVariable *AFLMapPtr = NULL; ConstantInt * One = NULL; ConstantInt * Zero = NULL; @@ -547,14 +547,16 @@ bool ModuleSanitizerCoverage::instrumentModule( else { char modeline[100]; - snprintf(modeline, sizeof(modeline), "%s%s%s%s%s", + snprintf(modeline, sizeof(modeline), "%s%s%s%s%s%s", getenv("AFL_HARDEN") ? "hardened" : "non-hardened", getenv("AFL_USE_ASAN") ? ", ASAN" : "", getenv("AFL_USE_MSAN") ? ", MSAN" : "", + getenv("AFL_USE_TSAN") ? ", TSAN" : "", getenv("AFL_USE_CFISAN") ? ", CFISAN" : "", getenv("AFL_USE_UBSAN") ? ", UBSAN" : ""); - OKF("Instrumented %u locations with no collisions (%s mode).", instr, - modeline); + OKF("Instrumented %u locations with no collisions (%s mode) of which are " + "%u handled and %u unhandled selects.", + instr, modeline, selects, unhandled); } @@ -833,9 +835,8 @@ bool ModuleSanitizerCoverage::InjectCoverage(Function & F, ArrayRef<BasicBlock *> AllBlocks, bool IsLeafFunc) { - if (AllBlocks.empty()) return false; + uint32_t cnt_cov = 0, cnt_sel = 0, cnt_sel_inc = 0; - uint32_t special = 0; for (auto &BB : F) { for (auto &IN : BB) { @@ -850,9 +851,37 @@ bool ModuleSanitizerCoverage::InjectCoverage(Function & F, StringRef FuncName = Callee->getName(); if (FuncName.compare(StringRef("__afl_coverage_interesting"))) continue; - uint32_t id = 1 + instr + (uint32_t)AllBlocks.size() + special++; - Value * val = ConstantInt::get(Int32Ty, id); - callInst->setOperand(1, val); + cnt_cov++; + + } + + SelectInst *selectInst = nullptr; + + if ((selectInst = dyn_cast<SelectInst>(&IN))) { + + Value *c = selectInst->getCondition(); + auto t = c->getType(); + if (t->getTypeID() == llvm::Type::IntegerTyID) { + + cnt_sel++; + cnt_sel_inc += 2; + + } + +#if LLVM__MAJOR > 11 + else if (t->getTypeID() == llvm::Type::FixedVectorTyID) { + + FixedVectorType *tt = dyn_cast<FixedVectorType>(t); + if (tt) { + + cnt_sel++; + cnt_sel_inc += tt->getElementCount().getFixedValue(); + + } + + } + +#endif } @@ -860,11 +889,256 @@ bool ModuleSanitizerCoverage::InjectCoverage(Function & F, } - CreateFunctionLocalArrays(F, AllBlocks, special); - for (size_t i = 0, N = AllBlocks.size(); i < N; i++) - InjectCoverageAtBlock(F, *AllBlocks[i], i, IsLeafFunc); + /* Create PCGUARD array */ + CreateFunctionLocalArrays(F, AllBlocks, cnt_cov + cnt_sel_inc); + selects += cnt_sel; + + uint32_t special = 0, local_selects = 0, skip_next = 0; + + for (auto &BB : F) { + + for (auto &IN : BB) { + + CallInst *callInst = nullptr; + + /* + std::string errMsg; + raw_string_ostream os(errMsg); + IN.print(os); + fprintf(stderr, "X: %s\n", os.str().c_str()); + */ + if ((callInst = dyn_cast<CallInst>(&IN))) { + + Function *Callee = callInst->getCalledFunction(); + if (!Callee) continue; + if (callInst->getCallingConv() != llvm::CallingConv::C) continue; + StringRef FuncName = Callee->getName(); + if (FuncName.compare(StringRef("__afl_coverage_interesting"))) continue; + + IRBuilder<> IRB(callInst); + + Value *GuardPtr = IRB.CreateIntToPtr( + IRB.CreateAdd( + IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), + ConstantInt::get(IntptrTy, (++special + AllBlocks.size()) * 4)), + Int32PtrTy); + + LoadInst *Idx = IRB.CreateLoad(GuardPtr); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(Idx); + + callInst->setOperand(1, Idx); + + } + + SelectInst *selectInst = nullptr; + + if (!skip_next && (selectInst = dyn_cast<SelectInst>(&IN))) { + + uint32_t vector_cnt = 0; + Value * condition = selectInst->getCondition(); + Value * result; + auto t = condition->getType(); + IRBuilder<> IRB(selectInst->getNextNode()); + + if (t->getTypeID() == llvm::Type::IntegerTyID) { + + auto GuardPtr1 = IRB.CreateIntToPtr( + IRB.CreateAdd( + IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), + ConstantInt::get( + IntptrTy, + (cnt_cov + ++local_selects + AllBlocks.size()) * 4)), + Int32PtrTy); + + auto GuardPtr2 = IRB.CreateIntToPtr( + IRB.CreateAdd( + IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), + ConstantInt::get( + IntptrTy, + (cnt_cov + ++local_selects + AllBlocks.size()) * 4)), + Int32PtrTy); + + result = IRB.CreateSelect(condition, GuardPtr1, GuardPtr2); + + } else + +#if LLVM_VERSION_MAJOR > 13 + if (t->getTypeID() == llvm::Type::FixedVectorTyID) { + + FixedVectorType *tt = dyn_cast<FixedVectorType>(t); + if (tt) { + + uint32_t elements = tt->getElementCount().getFixedValue(); + vector_cnt = elements; + if (elements) { + + FixedVectorType *GuardPtr1 = + FixedVectorType::get(Int32PtrTy, elements); + FixedVectorType *GuardPtr2 = + FixedVectorType::get(Int32PtrTy, elements); + Value *x, *y; + + Value *val1 = IRB.CreateIntToPtr( + IRB.CreateAdd( + IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), + ConstantInt::get( + IntptrTy, + (cnt_cov + ++local_selects + AllBlocks.size()) * 4)), + Int32PtrTy); + x = IRB.CreateInsertElement(GuardPtr1, val1, (uint64_t)0); + + Value *val2 = IRB.CreateIntToPtr( + IRB.CreateAdd( + IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), + ConstantInt::get( + IntptrTy, + (cnt_cov + ++local_selects + AllBlocks.size()) * 4)), + Int32PtrTy); + y = IRB.CreateInsertElement(GuardPtr2, val2, (uint64_t)0); + + for (uint64_t i = 1; i < elements; i++) { + + val1 = IRB.CreateIntToPtr( + IRB.CreateAdd( + IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), + ConstantInt::get(IntptrTy, (cnt_cov + ++local_selects + + AllBlocks.size()) * + 4)), + Int32PtrTy); + x = IRB.CreateInsertElement(x, val1, i); + + val2 = IRB.CreateIntToPtr( + IRB.CreateAdd( + IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), + ConstantInt::get(IntptrTy, (cnt_cov + ++local_selects + + AllBlocks.size()) * + 4)), + Int32PtrTy); + y = IRB.CreateInsertElement(y, val2, i); + + } + + /* + std::string errMsg; + raw_string_ostream os(errMsg); + x->print(os); + fprintf(stderr, "X: %s\n", os.str().c_str()); + */ + result = IRB.CreateSelect(condition, x, y); + + } + + } + + } else + +#endif + { + + unhandled++; + continue; + + } + + local_selects++; + uint32_t vector_cur = 0; - instr += special; + /* Load SHM pointer */ + + LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(MapPtr); + + /* + std::string errMsg; + raw_string_ostream os(errMsg); + result->print(os); + fprintf(stderr, "X: %s\n", os.str().c_str()); + */ + + while (1) { + + /* Get CurLoc */ + LoadInst *CurLoc = nullptr; + Value * MapPtrIdx = nullptr; + + /* Load counter for CurLoc */ + if (!vector_cnt) { + + CurLoc = IRB.CreateLoad(result); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(CurLoc); + MapPtrIdx = IRB.CreateGEP(MapPtr, CurLoc); + + } else { + + auto element = IRB.CreateExtractElement(result, vector_cur++); + auto elementptr = IRB.CreateIntToPtr(element, Int32PtrTy); + auto elementld = IRB.CreateLoad(elementptr); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(elementld); + MapPtrIdx = IRB.CreateGEP(MapPtr, elementld); + + } + + if (use_threadsafe_counters) { + + IRB.CreateAtomicRMW(llvm::AtomicRMWInst::BinOp::Add, MapPtrIdx, One, +#if LLVM_VERSION_MAJOR >= 13 + llvm::MaybeAlign(1), +#endif + llvm::AtomicOrdering::Monotonic); + + } else { + + LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(Counter); + + /* Update bitmap */ + + Value *Incr = IRB.CreateAdd(Counter, One); + + if (skip_nozero == NULL) { + + auto cf = IRB.CreateICmpEQ(Incr, Zero); + auto carry = IRB.CreateZExt(cf, Int8Ty); + Incr = IRB.CreateAdd(Incr, carry); + + } + + StoreInst *StoreCtx = IRB.CreateStore(Incr, MapPtrIdx); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(StoreCtx); + + } + + if (!vector_cnt) { + + vector_cnt = 2; + break; + + } else if (vector_cnt == vector_cur) { + + break; + + } + + } + + skip_next = 1; + instr += vector_cnt; + + } else { + + skip_next = 0; + + } + + } + + } + + if (AllBlocks.empty() && !special && !local_selects) return false; + + if (!AllBlocks.empty()) + for (size_t i = 0, N = AllBlocks.size(); i < N; i++) + InjectCoverageAtBlock(F, *AllBlocks[i], i, IsLeafFunc); return true; @@ -881,8 +1155,6 @@ void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls( Function &F, ArrayRef<Instruction *> IndirCalls) { if (IndirCalls.empty()) return; - assert(Options.TracePC || Options.TracePCGuard || - Options.Inline8bitCounters /*|| Options.InlineBoolFlag*/); for (auto I : IndirCalls) { IRBuilder<> IRB(I); @@ -1062,10 +1334,12 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, Int32PtrTy); LoadInst *CurLoc = IRB.CreateLoad(GuardPtr); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(CurLoc); /* Load SHM pointer */ LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(MapPtr); /* Load counter for CurLoc */ @@ -1082,6 +1356,8 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } else { LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(Counter); + /* Update bitmap */ Value *Incr = IRB.CreateAdd(Counter, One); @@ -1094,7 +1370,8 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB, } - IRB.CreateStore(Incr, MapPtrIdx); + StoreInst *StoreCtx = IRB.CreateStore(Incr, MapPtrIdx); + ModuleSanitizerCoverage::SetNoSanitizeMetadata(StoreCtx); } diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index 9acab4e7..5d198ada 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -9,7 +9,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at: - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 */ @@ -22,6 +22,10 @@ #include "cmplog.h" #include "llvm-alternative-coverage.h" +#define XXH_INLINE_ALL +#include "xxhash.h" +#undef XXH_INLINE_ALL + #include <stdio.h> #include <stdlib.h> #include <signal.h> @@ -154,6 +158,8 @@ static void at_exit(int signal) { } +#define default_hash(a, b) XXH3_64bits(a, b) + /* Uninspired gcc plugin instrumentation */ void __afl_trace(const u32 x) { @@ -664,12 +670,12 @@ static void __afl_start_snapshots(void) { u8 child_stopped = 0; - void (*old_sigchld_handler)(int) = 0; // = signal(SIGCHLD, SIG_DFL); + void (*old_sigchld_handler)(int) = signal(SIGCHLD, SIG_DFL); /* Phone home and tell the parent that we're OK. If parent isn't there, assume we're not running in forkserver mode and just execute program. */ - status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT); + status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT | FS_OPT_NEWCMPLOG); if (__afl_sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ; if (__afl_map_size <= FS_OPT_MAX_MAPSIZE) status |= (FS_OPT_SET_MAPSIZE(__afl_map_size) | FS_OPT_MAPSIZE); @@ -920,7 +926,7 @@ static void __afl_start_forkserver(void) { u8 child_stopped = 0; - void (*old_sigchld_handler)(int) = 0; // = signal(SIGCHLD, SIG_DFL); + void (*old_sigchld_handler)(int) = signal(SIGCHLD, SIG_DFL); if (__afl_map_size <= FS_OPT_MAX_MAPSIZE) { @@ -935,7 +941,12 @@ static void __afl_start_forkserver(void) { } if (__afl_sharedmem_fuzzing != 0) { status_for_fsrv |= FS_OPT_SHDMEM_FUZZ; } - if (status_for_fsrv) { status_for_fsrv |= (FS_OPT_ENABLED); } + if (status_for_fsrv) { + + status_for_fsrv |= (FS_OPT_ENABLED | FS_OPT_NEWCMPLOG); + + } + memcpy(tmp, &status_for_fsrv, 4); /* Phone home and tell the parent that we're OK. If parent isn't there, @@ -1499,8 +1510,7 @@ void __cmplog_ins_hook1(uint8_t arg1, uint8_t arg2, uint8_t attr) { if (unlikely(!__afl_cmp_map || arg1 == arg2)) return; uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits; @@ -1530,8 +1540,7 @@ void __cmplog_ins_hook2(uint16_t arg1, uint16_t arg2, uint8_t attr) { if (unlikely(!__afl_cmp_map || arg1 == arg2)) return; uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits; @@ -1569,8 +1578,7 @@ void __cmplog_ins_hook4(uint32_t arg1, uint32_t arg2, uint8_t attr) { if (unlikely(!__afl_cmp_map || arg1 == arg2)) return; uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits; @@ -1608,8 +1616,7 @@ void __cmplog_ins_hook8(uint64_t arg1, uint64_t arg2, uint8_t attr) { if (unlikely(!__afl_cmp_map || arg1 == arg2)) return; uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits; @@ -1652,8 +1659,7 @@ void __cmplog_ins_hookN(uint128_t arg1, uint128_t arg2, uint8_t attr, if (unlikely(!__afl_cmp_map || arg1 == arg2)) return; uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits; @@ -1696,8 +1702,7 @@ void __cmplog_ins_hook16(uint128_t arg1, uint128_t arg2, uint8_t attr) { if (likely(!__afl_cmp_map)) return; uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits; @@ -1802,8 +1807,8 @@ void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) { for (uint64_t i = 0; i < cases[0]; i++) { uintptr_t k = (uintptr_t)__builtin_return_address(0) + i; - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & + (CMP_MAP_W - 1)); u32 hits; @@ -1880,11 +1885,108 @@ static int area_is_valid(void *ptr, size_t len) { } +/* hook for string with length functions, eg. strncmp, strncasecmp etc. + Note that we ignore the len parameter and take longer strings if present. */ +void __cmplog_rtn_hook_strn(u8 *ptr1, u8 *ptr2, u64 len) { + + // fprintf(stderr, "RTN1 %p %p %u\n", ptr1, ptr2, len); + if (likely(!__afl_cmp_map)) return; + if (unlikely(!len)) return; + int len0 = MIN(len, 31); + int len1 = strnlen(ptr1, len0); + if (len1 < 31) len1 = area_is_valid(ptr1, len1 + 1); + int len2 = strnlen(ptr2, len0); + if (len2 < 31) len2 = area_is_valid(ptr1, len2 + 1); + int l = MAX(len1, len2); + if (l < 2) return; + + uintptr_t k = (uintptr_t)__builtin_return_address(0); + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); + + u32 hits; + + if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) { + + __afl_cmp_map->headers[k].type = CMP_TYPE_RTN; + __afl_cmp_map->headers[k].hits = 1; + __afl_cmp_map->headers[k].shape = l - 1; + hits = 0; + + } else { + + hits = __afl_cmp_map->headers[k].hits++; + + if (__afl_cmp_map->headers[k].shape < l) { + + __afl_cmp_map->headers[k].shape = l - 1; + + } + + } + + struct cmpfn_operands *cmpfn = (struct cmpfn_operands *)__afl_cmp_map->log[k]; + hits &= CMP_MAP_RTN_H - 1; + + cmpfn[hits].v0_len = 0x80 + l; + cmpfn[hits].v1_len = 0x80 + l; + __builtin_memcpy(cmpfn[hits].v0, ptr1, len1); + __builtin_memcpy(cmpfn[hits].v1, ptr2, len2); + // fprintf(stderr, "RTN3\n"); + +} + +/* hook for string functions, eg. strcmp, strcasecmp etc. */ +void __cmplog_rtn_hook_str(u8 *ptr1, u8 *ptr2) { + + // fprintf(stderr, "RTN1 %p %p\n", ptr1, ptr2); + if (likely(!__afl_cmp_map)) return; + if (unlikely(!ptr1 || !ptr2)) return; + int len1 = strnlen(ptr1, 30) + 1; + int len2 = strnlen(ptr2, 30) + 1; + int l = MAX(len1, len2); + if (l < 3) return; + + uintptr_t k = (uintptr_t)__builtin_return_address(0); + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); + + u32 hits; + + if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) { + + __afl_cmp_map->headers[k].type = CMP_TYPE_RTN; + __afl_cmp_map->headers[k].hits = 1; + __afl_cmp_map->headers[k].shape = l - 1; + hits = 0; + + } else { + + hits = __afl_cmp_map->headers[k].hits++; + + if (__afl_cmp_map->headers[k].shape < l) { + + __afl_cmp_map->headers[k].shape = l - 1; + + } + + } + + struct cmpfn_operands *cmpfn = (struct cmpfn_operands *)__afl_cmp_map->log[k]; + hits &= CMP_MAP_RTN_H - 1; + + cmpfn[hits].v0_len = 0x80 + len1; + cmpfn[hits].v1_len = 0x80 + len2; + __builtin_memcpy(cmpfn[hits].v0, ptr1, len1); + __builtin_memcpy(cmpfn[hits].v1, ptr2, len2); + // fprintf(stderr, "RTN3\n"); + +} + +/* hook function for all other func(ptr, ptr, ...) variants */ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) { /* u32 i; - if (area_is_valid(ptr1, 32) <= 0 || area_is_valid(ptr2, 32) <= 0) return; + if (area_is_valid(ptr1, 31) <= 0 || area_is_valid(ptr2, 31) <= 0) return; fprintf(stderr, "rtn arg0="); for (i = 0; i < 32; i++) fprintf(stderr, "%02x", ptr1[i]); @@ -1894,18 +1996,17 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) { fprintf(stderr, "\n"); */ - if (likely(!__afl_cmp_map)) return; // fprintf(stderr, "RTN1 %p %p\n", ptr1, ptr2); + if (likely(!__afl_cmp_map)) return; int l1, l2; - if ((l1 = area_is_valid(ptr1, 32)) <= 0 || - (l2 = area_is_valid(ptr2, 32)) <= 0) + if ((l1 = area_is_valid(ptr1, 31)) <= 0 || + (l2 = area_is_valid(ptr2, 31)) <= 0) return; - int len = MIN(l1, l2); + int len = MIN(31, MIN(l1, l2)); // fprintf(stderr, "RTN2 %u\n", len); uintptr_t k = (uintptr_t)__builtin_return_address(0); - k = (k >> 4) ^ (k << 8); - k &= CMP_MAP_W - 1; + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); u32 hits; @@ -1928,12 +2029,80 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) { } + struct cmpfn_operands *cmpfn = (struct cmpfn_operands *)__afl_cmp_map->log[k]; + hits &= CMP_MAP_RTN_H - 1; + + cmpfn[hits].v0_len = len; + cmpfn[hits].v1_len = len; + __builtin_memcpy(cmpfn[hits].v0, ptr1, len); + __builtin_memcpy(cmpfn[hits].v1, ptr2, len); + // fprintf(stderr, "RTN3\n"); + +} + +/* hook for func(ptr, ptr, len, ...) looking functions. + Note that for the time being we ignore len as this could be wrong + information and pass it on to the standard binary rtn hook */ +void __cmplog_rtn_hook_n(u8 *ptr1, u8 *ptr2, u64 len) { + + (void)(len); + __cmplog_rtn_hook(ptr1, ptr2); + +#if 0 + /* + u32 i; + if (area_is_valid(ptr1, 31) <= 0 || area_is_valid(ptr2, 31) <= 0) return; + fprintf(stderr, "rtn_n len=%u arg0=", len); + for (i = 0; i < len; i++) + fprintf(stderr, "%02x", ptr1[i]); + fprintf(stderr, " arg1="); + for (i = 0; i < len; i++) + fprintf(stderr, "%02x", ptr2[i]); + fprintf(stderr, "\n"); + */ + + // fprintf(stderr, "RTN1 %p %p %u\n", ptr1, ptr2, len); + if (likely(!__afl_cmp_map)) return; + if (unlikely(!len)) return; + int l = MIN(31, len); + + if ((l = area_is_valid(ptr1, l)) <= 0 || (l = area_is_valid(ptr2, l)) <= 0) + return; + + // fprintf(stderr, "RTN2 %u\n", l); + uintptr_t k = (uintptr_t)__builtin_return_address(0); + k = (uintptr_t)(default_hash((u8 *)&k, sizeof(uintptr_t)) & (CMP_MAP_W - 1)); + + u32 hits; + + if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) { + + __afl_cmp_map->headers[k].type = CMP_TYPE_RTN; + __afl_cmp_map->headers[k].hits = 1; + __afl_cmp_map->headers[k].shape = l - 1; + hits = 0; + + } else { + + hits = __afl_cmp_map->headers[k].hits++; + + if (__afl_cmp_map->headers[k].shape < l) { + + __afl_cmp_map->headers[k].shape = l - 1; + + } + + } + + struct cmpfn_operands *cmpfn = (struct cmpfn_operands *)__afl_cmp_map->log[k]; hits &= CMP_MAP_RTN_H - 1; - __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0, - ptr1, len); - __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1, - ptr2, len); + + cmpfn[hits].v0_len = l; + cmpfn[hits].v1_len = l; + __builtin_memcpy(cmpfn[hits].v0, ptr1, l); + __builtin_memcpy(cmpfn[hits].v1, ptr2, l); // fprintf(stderr, "RTN3\n"); +#endif } @@ -2084,5 +2253,11 @@ void __afl_coverage_interesting(u8 val, u32 id) { } +void __afl_set_persistent_mode(u8 mode) { + + is_persistent = mode; + +} + #undef write_error diff --git a/instrumentation/afl-gcc-pass.so.cc b/instrumentation/afl-gcc-pass.so.cc index 3b7eb878..df2b6f2a 100644 --- a/instrumentation/afl-gcc-pass.so.cc +++ b/instrumentation/afl-gcc-pass.so.cc @@ -30,7 +30,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <https://www.gnu.org/licenses/>. */ diff --git a/instrumentation/afl-llvm-dict2file.so.cc b/instrumentation/afl-llvm-dict2file.so.cc index 4622e488..bf07a154 100644 --- a/instrumentation/afl-llvm-dict2file.so.cc +++ b/instrumentation/afl-llvm-dict2file.so.cc @@ -10,7 +10,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at: - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 This library is plugged into LLVM when invoking clang through afl-clang-lto. @@ -66,6 +66,9 @@ namespace { class AFLdict2filePass : public ModulePass { + std::ofstream of; + void dict2file(u8 *, u32); + public: static char ID; @@ -81,7 +84,7 @@ class AFLdict2filePass : public ModulePass { } // namespace -void dict2file(int fd, u8 *mem, u32 len) { +void AFLdict2filePass::dict2file(u8 *mem, u32 len) { u32 i, j, binary = 0; char line[MAX_AUTO_EXTRA * 8], tmp[8]; @@ -113,9 +116,8 @@ void dict2file(int fd, u8 *mem, u32 len) { line[j] = 0; strcat(line, "\"\n"); - if (write(fd, line, strlen(line)) <= 0) - PFATAL("Could not write to dictionary file"); - fsync(fd); + of << line; + of.flush(); if (!be_quiet) fprintf(stderr, "Found dictionary token: %s", line); @@ -125,7 +127,7 @@ bool AFLdict2filePass::runOnModule(Module &M) { DenseMap<Value *, std::string *> valueMap; char * ptr; - int fd, found = 0; + int found = 0; /* Show a banner */ setvbuf(stdout, NULL, _IONBF, 0); @@ -146,8 +148,8 @@ bool AFLdict2filePass::runOnModule(Module &M) { if (!ptr || *ptr != '/') FATAL("AFL_LLVM_DICT2FILE is not set to an absolute path: %s", ptr); - if ((fd = open(ptr, O_WRONLY | O_APPEND | O_CREAT | O_DSYNC, 0644)) < 0) - PFATAL("Could not open/create %s.", ptr); + of.open(ptr, std::ofstream::out | std::ofstream::app); + if (!of.is_open()) PFATAL("Could not open/create %s.", ptr); /* Instrument all the things! */ @@ -264,11 +266,11 @@ bool AFLdict2filePass::runOnModule(Module &M) { } - dict2file(fd, (u8 *)&val, len); + dict2file((u8 *)&val, len); found++; if (val2) { - dict2file(fd, (u8 *)&val2, len); + dict2file((u8 *)&val2, len); found++; } @@ -289,7 +291,6 @@ bool AFLdict2filePass::runOnModule(Module &M) { bool isIntMemcpy = true; bool isStdString = true; bool isStrstr = true; - bool addedNull = false; size_t optLen = 0; Function *Callee = callInst->getCalledFunction(); @@ -588,8 +589,8 @@ bool AFLdict2filePass::runOnModule(Module &M) { if (optLen < 2) { continue; } if (literalLength + 1 == optLen) { // add null byte + thestring.append("\0", 1); - addedNull = true; } @@ -601,14 +602,17 @@ bool AFLdict2filePass::runOnModule(Module &M) { // was not already added if (!isMemcmp) { - if (addedNull == false && thestring[optLen - 1] != '\0') { + /* + if (addedNull == false && thestring[optLen - 1] != '\0') + { - thestring.append("\0", 1); // add null byte - optLen++; + thestring.append("\0", 1); // add null byte + optLen++; - } + } - if (!isStdString) { + */ + if (!isStdString && thestring.find('\0', 0) != std::string::npos) { // ensure we do not have garbage size_t offset = thestring.find('\0', 0); @@ -630,7 +634,7 @@ bool AFLdict2filePass::runOnModule(Module &M) { ptr = (char *)thestring.c_str(); - dict2file(fd, (u8 *)ptr, optLen); + dict2file((u8 *)ptr, optLen); found++; } @@ -641,7 +645,7 @@ bool AFLdict2filePass::runOnModule(Module &M) { } - close(fd); + of.close(); /* Say something nice. */ diff --git a/instrumentation/afl-llvm-lto-instrumentation.so.cc b/instrumentation/afl-llvm-lto-instrumentation.so.cc deleted file mode 100644 index e300044c..00000000 --- a/instrumentation/afl-llvm-lto-instrumentation.so.cc +++ /dev/null @@ -1,1118 +0,0 @@ -/* - american fuzzy lop++ - LLVM LTO instrumentation pass - ---------------------------------------------------- - - Written by Marc Heuse <mh@mh-sec.de> - - Copyright 2019-2020 AFLplusplus Project. All rights reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at: - - http://www.apache.org/licenses/LICENSE-2.0 - - This library is plugged into LLVM when invoking clang through afl-clang-lto. - - */ - -#define AFL_LLVM_PASS - -#include "config.h" -#include "debug.h" - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <string.h> -#include <sys/time.h> - -#include <list> -#include <string> -#include <fstream> -#include <set> -#include <iostream> - -#include "llvm/Config/llvm-config.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemorySSAUpdater.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Pass.h" -#include "llvm/IR/Constants.h" - -#include "afl-llvm-common.h" - -using namespace llvm; - -namespace { - -class AFLLTOPass : public ModulePass { - - public: - static char ID; - - AFLLTOPass() : ModulePass(ID) { - - char *ptr; - - if (getenv("AFL_DEBUG")) debug = 1; - if ((ptr = getenv("AFL_LLVM_LTO_STARTID")) != NULL) - if ((afl_global_id = (uint32_t)atoi(ptr)) < 0 || - afl_global_id >= MAP_SIZE) - FATAL("AFL_LLVM_LTO_STARTID value of \"%s\" is not between 0 and %u\n", - ptr, MAP_SIZE - 1); - - skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO"); - - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - - ModulePass::getAnalysisUsage(AU); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<LoopInfoWrapperPass>(); - - } - - bool runOnModule(Module &M) override; - - protected: - uint32_t afl_global_id = 1, autodictionary = 1; - uint32_t function_minimum_size = 1; - uint32_t inst_blocks = 0, inst_funcs = 0, total_instr = 0; - unsigned long long int map_addr = 0x10000; - const char * skip_nozero = NULL; - const char * use_threadsafe_counters = nullptr; - -}; - -} // namespace - -bool AFLLTOPass::runOnModule(Module &M) { - - LLVMContext & C = M.getContext(); - std::vector<std::string> dictionary; - // std::vector<CallInst *> calls; - DenseMap<Value *, std::string *> valueMap; - std::vector<BasicBlock *> BlockList; - char * ptr; - FILE * documentFile = NULL; - size_t found = 0; - - srand((unsigned int)time(NULL)); - - unsigned long long int moduleID = - (((unsigned long long int)(rand() & 0xffffffff)) << 32) | getpid(); - - IntegerType *Int8Ty = IntegerType::getInt8Ty(C); - IntegerType *Int32Ty = IntegerType::getInt32Ty(C); - IntegerType *Int64Ty = IntegerType::getInt64Ty(C); - - /* Show a banner */ - setvbuf(stdout, NULL, _IONBF, 0); - - if ((isatty(2) && !getenv("AFL_QUIET")) || debug) { - - SAYF(cCYA "afl-llvm-lto" VERSION cRST - " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n"); - - } else - - be_quiet = 1; - - use_threadsafe_counters = getenv("AFL_LLVM_THREADSAFE_INST"); - - if ((ptr = getenv("AFL_LLVM_DOCUMENT_IDS")) != NULL) { - - if ((documentFile = fopen(ptr, "a")) == NULL) - WARNF("Cannot access document file %s", ptr); - - } - - // we make this the default as the fixed map has problems with - // defered forkserver, early constructors, ifuncs and maybe more - /*if (getenv("AFL_LLVM_MAP_DYNAMIC"))*/ - map_addr = 0; - - if ((ptr = getenv("AFL_LLVM_MAP_ADDR"))) { - - uint64_t val; - if (!*ptr || !strcmp(ptr, "0") || !strcmp(ptr, "0x0")) { - - map_addr = 0; - - } else if (getenv("AFL_LLVM_MAP_DYNAMIC")) { - - FATAL( - "AFL_LLVM_MAP_ADDR and AFL_LLVM_MAP_DYNAMIC cannot be used together"); - - } else if (strncmp(ptr, "0x", 2) != 0) { - - map_addr = 0x10000; // the default - - } else { - - val = strtoull(ptr, NULL, 16); - if (val < 0x100 || val > 0xffffffff00000000) { - - FATAL( - "AFL_LLVM_MAP_ADDR must be a value between 0x100 and " - "0xffffffff00000000"); - - } - - map_addr = val; - - } - - } - - if (debug) { fprintf(stderr, "map address is 0x%llx\n", map_addr); } - - /* Get/set the globals for the SHM region. */ - - GlobalVariable *AFLMapPtr = NULL; - Value * MapPtrFixed = NULL; - - if (!map_addr) { - - AFLMapPtr = - new GlobalVariable(M, PointerType::get(Int8Ty, 0), false, - GlobalValue::ExternalLinkage, 0, "__afl_area_ptr"); - - } else { - - ConstantInt *MapAddr = ConstantInt::get(Int64Ty, map_addr); - MapPtrFixed = - ConstantExpr::getIntToPtr(MapAddr, PointerType::getUnqual(Int8Ty)); - - } - - ConstantInt *Zero = ConstantInt::get(Int8Ty, 0); - ConstantInt *One = ConstantInt::get(Int8Ty, 1); - - // This dumps all inialized global strings - might be useful in the future - /* - for (auto G=M.getGlobalList().begin(); G!=M.getGlobalList().end(); G++) { - - GlobalVariable &GV=*G; - if (!GV.getName().str().empty()) { - - fprintf(stderr, "Global Variable: %s", GV.getName().str().c_str()); - if (GV.hasInitializer()) - if (auto *Val = dyn_cast<ConstantDataArray>(GV.getInitializer())) - fprintf(stderr, " Value: \"%s\"", Val->getAsString().str().c_str()); - fprintf(stderr, "\n"); - - } - - } - - */ - - scanForDangerousFunctions(&M); - - /* Instrument all the things! */ - - int inst_blocks = 0; - - for (auto &F : M) { - - /*For debugging - AttributeSet X = F.getAttributes().getFnAttributes(); - fprintf(stderr, "DEBUG: Module %s Function %s attributes %u\n", - M.getName().str().c_str(), F.getName().str().c_str(), - X.getNumAttributes()); - */ - - if (F.size() < function_minimum_size) continue; - if (isIgnoreFunction(&F)) continue; - - // the instrument file list check - AttributeList Attrs = F.getAttributes(); - if (Attrs.hasAttribute(-1, StringRef("skipinstrument"))) { - - if (debug) - fprintf(stderr, - "DEBUG: Function %s is not in a source file that was specified " - "in the instrument file list\n", - F.getName().str().c_str()); - continue; - - } - - std::vector<BasicBlock *> InsBlocks; - - if (autodictionary) { - - /* Some implementation notes. - * - * We try to handle 3 cases: - * - memcmp("foo", arg, 3) <- literal string - * - static char globalvar[] = "foo"; - * memcmp(globalvar, arg, 3) <- global variable - * - char localvar[] = "foo"; - * memcmp(locallvar, arg, 3) <- local variable - * - * The local variable case is the hardest. We can only detect that - * case if there is no reassignment or change in the variable. - * And it might not work across llvm version. - * What we do is hooking the initializer function for local variables - * (llvm.memcpy.p0i8.p0i8.i64) and note the string and the assigned - * variable. And if that variable is then used in a compare function - * we use that noted string. - * This seems not to work for tokens that have a size <= 4 :-( - * - * - if the compared length is smaller than the string length we - * save the full string. This is likely better for fuzzing but - * might be wrong in a few cases depending on optimizers - * - * - not using StringRef because there is a bug in the llvm 11 - * checkout I am using which sometimes points to wrong strings - * - * Over and out. Took me a full day. damn. mh/vh - */ - - for (auto &BB : F) { - - for (auto &IN : BB) { - - CallInst *callInst = nullptr; - CmpInst * cmpInst = nullptr; - - if ((cmpInst = dyn_cast<CmpInst>(&IN))) { - - Value * op = cmpInst->getOperand(1); - ConstantInt *ilen = dyn_cast<ConstantInt>(op); - - if (ilen && ilen->uge(0xffffffffffffffff) == false) { - - u64 val2 = 0, val = ilen->getZExtValue(); - u32 len = 0; - if (val > 0x10000 && val < 0xffffffff) len = 4; - if (val > 0x100000001 && val < 0xffffffffffffffff) len = 8; - - if (len) { - - auto c = cmpInst->getPredicate(); - - switch (c) { - - case CmpInst::FCMP_OGT: // fall through - case CmpInst::FCMP_OLE: // fall through - case CmpInst::ICMP_SLE: // fall through - case CmpInst::ICMP_SGT: - - // signed comparison and it is a negative constant - if ((len == 4 && (val & 80000000)) || - (len == 8 && (val & 8000000000000000))) { - - if ((val & 0xffff) != 1) val2 = val - 1; - break; - - } - - // fall through - - case CmpInst::FCMP_UGT: // fall through - case CmpInst::FCMP_ULE: // fall through - case CmpInst::ICMP_UGT: // fall through - case CmpInst::ICMP_ULE: - if ((val & 0xffff) != 0xfffe) val2 = val + 1; - break; - - case CmpInst::FCMP_OLT: // fall through - case CmpInst::FCMP_OGE: // fall through - case CmpInst::ICMP_SLT: // fall through - case CmpInst::ICMP_SGE: - - // signed comparison and it is a negative constant - if ((len == 4 && (val & 80000000)) || - (len == 8 && (val & 8000000000000000))) { - - if ((val & 0xffff) != 1) val2 = val - 1; - break; - - } - - // fall through - - case CmpInst::FCMP_ULT: // fall through - case CmpInst::FCMP_UGE: // fall through - case CmpInst::ICMP_ULT: // fall through - case CmpInst::ICMP_UGE: - if ((val & 0xffff) != 1) val2 = val - 1; - break; - - default: - val2 = 0; - - } - - dictionary.push_back(std::string((char *)&val, len)); - found++; - - if (val2) { - - dictionary.push_back(std::string((char *)&val2, len)); - found++; - - } - - } - - } - - } - - if ((callInst = dyn_cast<CallInst>(&IN))) { - - bool isStrcmp = true; - bool isMemcmp = true; - bool isStrncmp = true; - bool isStrcasecmp = true; - bool isStrncasecmp = true; - bool isIntMemcpy = true; - bool isStdString = true; - bool addedNull = false; - size_t optLen = 0; - - Function *Callee = callInst->getCalledFunction(); - if (!Callee) continue; - if (callInst->getCallingConv() != llvm::CallingConv::C) continue; - std::string FuncName = Callee->getName().str(); - - isStrcmp &= (!FuncName.compare("strcmp") || - !FuncName.compare("xmlStrcmp") || - !FuncName.compare("xmlStrEqual") || - !FuncName.compare("g_strcmp0") || - !FuncName.compare("curl_strequal") || - !FuncName.compare("strcsequal")); - isMemcmp &= - (!FuncName.compare("memcmp") || !FuncName.compare("bcmp") || - !FuncName.compare("CRYPTO_memcmp") || - !FuncName.compare("OPENSSL_memcmp") || - !FuncName.compare("memcmp_const_time") || - !FuncName.compare("memcmpct")); - isStrncmp &= (!FuncName.compare("strncmp") || - !FuncName.compare("xmlStrncmp") || - !FuncName.compare("curl_strnequal")); - isStrcasecmp &= (!FuncName.compare("strcasecmp") || - !FuncName.compare("stricmp") || - !FuncName.compare("ap_cstr_casecmp") || - !FuncName.compare("OPENSSL_strcasecmp") || - !FuncName.compare("xmlStrcasecmp") || - !FuncName.compare("g_strcasecmp") || - !FuncName.compare("g_ascii_strcasecmp") || - !FuncName.compare("Curl_strcasecompare") || - !FuncName.compare("Curl_safe_strcasecompare") || - !FuncName.compare("cmsstrcasecmp")); - isStrncasecmp &= (!FuncName.compare("strncasecmp") || - !FuncName.compare("strnicmp") || - !FuncName.compare("ap_cstr_casecmpn") || - !FuncName.compare("OPENSSL_strncasecmp") || - !FuncName.compare("xmlStrncasecmp") || - !FuncName.compare("g_ascii_strncasecmp") || - !FuncName.compare("Curl_strncasecompare") || - !FuncName.compare("g_strncasecmp")); - isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64"); - isStdString &= - ((FuncName.find("basic_string") != std::string::npos && - FuncName.find("compare") != std::string::npos) || - (FuncName.find("basic_string") != std::string::npos && - FuncName.find("find") != std::string::npos)); - - /* we do something different here, putting this BB and the - successors in a block map */ - if (!FuncName.compare("__afl_persistent_loop")) { - - BlockList.push_back(&BB); - /* - for (succ_iterator SI = succ_begin(&BB), SE = - succ_end(&BB); SI != SE; ++SI) { - - BasicBlock *succ = *SI; - BlockList.push_back(succ); - - } - - */ - - } - - if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && - !isStrncasecmp && !isIntMemcpy && !isStdString) - continue; - - /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function - * prototype */ - FunctionType *FT = Callee->getFunctionType(); - - isStrcmp &= FT->getNumParams() == 2 && - FT->getReturnType()->isIntegerTy(32) && - FT->getParamType(0) == FT->getParamType(1) && - FT->getParamType(0) == - IntegerType::getInt8PtrTy(M.getContext()); - isStrcasecmp &= FT->getNumParams() == 2 && - FT->getReturnType()->isIntegerTy(32) && - FT->getParamType(0) == FT->getParamType(1) && - FT->getParamType(0) == - IntegerType::getInt8PtrTy(M.getContext()); - isMemcmp &= FT->getNumParams() == 3 && - FT->getReturnType()->isIntegerTy(32) && - FT->getParamType(0)->isPointerTy() && - FT->getParamType(1)->isPointerTy() && - FT->getParamType(2)->isIntegerTy(); - isStrncmp &= FT->getNumParams() == 3 && - FT->getReturnType()->isIntegerTy(32) && - FT->getParamType(0) == FT->getParamType(1) && - FT->getParamType(0) == - IntegerType::getInt8PtrTy(M.getContext()) && - FT->getParamType(2)->isIntegerTy(); - isStrncasecmp &= FT->getNumParams() == 3 && - FT->getReturnType()->isIntegerTy(32) && - FT->getParamType(0) == FT->getParamType(1) && - FT->getParamType(0) == - IntegerType::getInt8PtrTy(M.getContext()) && - FT->getParamType(2)->isIntegerTy(); - isStdString &= FT->getNumParams() >= 2 && - FT->getParamType(0)->isPointerTy() && - FT->getParamType(1)->isPointerTy(); - - if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && - !isStrncasecmp && !isIntMemcpy && !isStdString) - continue; - - /* is a str{n,}{case,}cmp/memcmp, check if we have - * str{case,}cmp(x, "const") or str{case,}cmp("const", x) - * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..) - * memcmp(x, "const", ..) or memcmp("const", x, ..) */ - Value *Str1P = callInst->getArgOperand(0), - *Str2P = callInst->getArgOperand(1); - std::string Str1, Str2; - StringRef TmpStr; - bool HasStr1; - getConstantStringInfo(Str1P, TmpStr); - if (TmpStr.empty()) { - - HasStr1 = false; - - } else { - - HasStr1 = true; - Str1 = TmpStr.str(); - - } - - bool HasStr2; - getConstantStringInfo(Str2P, TmpStr); - if (TmpStr.empty()) { - - HasStr2 = false; - - } else { - - HasStr2 = true; - Str2 = TmpStr.str(); - - } - - if (debug) - fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n", - FuncName.c_str(), Str1P, Str1P->getName().str().c_str(), - Str1.c_str(), HasStr1 == true ? "true" : "false", Str2P, - Str2P->getName().str().c_str(), Str2.c_str(), - HasStr2 == true ? "true" : "false"); - - // we handle the 2nd parameter first because of llvm memcpy - if (!HasStr2) { - - auto *Ptr = dyn_cast<ConstantExpr>(Str2P); - if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { - - if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { - - if (Var->hasInitializer()) { - - if (auto *Array = dyn_cast<ConstantDataArray>( - Var->getInitializer())) { - - HasStr2 = true; - Str2 = Array->getRawDataValues().str(); - - } - - } - - } - - } - - } - - // for the internal memcpy routine we only care for the second - // parameter and are not reporting anything. - if (isIntMemcpy == true) { - - if (HasStr2 == true) { - - Value * op2 = callInst->getArgOperand(2); - ConstantInt *ilen = dyn_cast<ConstantInt>(op2); - if (ilen) { - - uint64_t literalLength = Str2.size(); - uint64_t optLength = ilen->getZExtValue(); - if (optLength > literalLength + 1) { - - optLength = Str2.length() + 1; - - } - - if (literalLength + 1 == optLength) { - - Str2.append("\0", 1); // add null byte - // addedNull = true; - - } - - } - - valueMap[Str1P] = new std::string(Str2); - - if (debug) - fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(), Str1P); - continue; - - } - - continue; - - } - - // Neither a literal nor a global variable? - // maybe it is a local variable that we saved - if (!HasStr2) { - - std::string *strng = valueMap[Str2P]; - if (strng && !strng->empty()) { - - Str2 = *strng; - HasStr2 = true; - if (debug) - fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(), - Str2P); - - } - - } - - if (!HasStr1) { - - auto Ptr = dyn_cast<ConstantExpr>(Str1P); - - if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { - - if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { - - if (Var->hasInitializer()) { - - if (auto *Array = dyn_cast<ConstantDataArray>( - Var->getInitializer())) { - - HasStr1 = true; - Str1 = Array->getRawDataValues().str(); - - } - - } - - } - - } - - } - - // Neither a literal nor a global variable? - // maybe it is a local variable that we saved - if (!HasStr1) { - - std::string *strng = valueMap[Str1P]; - if (strng && !strng->empty()) { - - Str1 = *strng; - HasStr1 = true; - if (debug) - fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(), - Str1P); - - } - - } - - /* handle cases of one string is const, one string is variable */ - if (!(HasStr1 ^ HasStr2)) continue; - - std::string thestring; - - if (HasStr1) - thestring = Str1; - else - thestring = Str2; - - optLen = thestring.length(); - if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; } - - if (isMemcmp || isStrncmp || isStrncasecmp) { - - Value * op2 = callInst->getArgOperand(2); - ConstantInt *ilen = dyn_cast<ConstantInt>(op2); - - if (ilen) { - - uint64_t literalLength = optLen; - optLen = ilen->getZExtValue(); - if (optLen > literalLength + 1) { optLen = literalLength + 1; } - if (optLen < 2) { continue; } - if (literalLength + 1 == optLen) { // add null byte - thestring.append("\0", 1); - addedNull = true; - - } - - } - - } - - // add null byte if this is a string compare function and a null - // was not already added - if (!isMemcmp) { - - if (addedNull == false && thestring[optLen - 1] != '\0') { - - thestring.append("\0", 1); // add null byte - optLen++; - - } - - if (!isStdString) { - - // ensure we do not have garbage - size_t offset = thestring.find('\0', 0); - if (offset + 1 < optLen) optLen = offset + 1; - thestring = thestring.substr(0, optLen); - - } - - } - - if (!be_quiet) { - - fprintf(stderr, "%s: length %zu/%zu \"", FuncName.c_str(), optLen, - thestring.length()); - for (uint8_t i = 0; i < thestring.length(); i++) { - - uint8_t c = thestring[i]; - if (c <= 32 || c >= 127) - fprintf(stderr, "\\x%02x", c); - else - fprintf(stderr, "%c", c); - - } - - fprintf(stderr, "\"\n"); - - } - - // we take the longer string, even if the compare was to a - // shorter part. Note that depending on the optimizer of the - // compiler this can be wrong, but it is more likely that this - // is helping the fuzzer - if (optLen != thestring.length()) optLen = thestring.length(); - if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA; - if (optLen < MIN_AUTO_EXTRA) // too short? skip - continue; - - dictionary.push_back(thestring.substr(0, optLen)); - - } - - } - - } - - } - - for (auto &BB : F) { - - if (F.size() == 1) { - - InsBlocks.push_back(&BB); - continue; - - } - - uint32_t succ = 0; - for (succ_iterator SI = succ_begin(&BB), SE = succ_end(&BB); SI != SE; - ++SI) - if ((*SI)->size() > 0) succ++; - if (succ < 2) // no need to instrument - continue; - - if (BlockList.size()) { - - int skip = 0; - for (uint32_t k = 0; k < BlockList.size(); k++) { - - if (&BB == BlockList[k]) { - - if (debug) - fprintf(stderr, - "DEBUG: Function %s skipping BB with/after __afl_loop\n", - F.getName().str().c_str()); - skip = 1; - - } - - } - - if (skip) continue; - - } - - InsBlocks.push_back(&BB); - - } - - if (InsBlocks.size() > 0) { - - uint32_t i = InsBlocks.size(); - - do { - - --i; - BasicBlock * newBB = NULL; - BasicBlock * origBB = &(*InsBlocks[i]); - std::vector<BasicBlock *> Successors; - Instruction * TI = origBB->getTerminator(); - uint32_t fs = origBB->getParent()->size(); - uint32_t countto; - - for (succ_iterator SI = succ_begin(origBB), SE = succ_end(origBB); - SI != SE; ++SI) { - - BasicBlock *succ = *SI; - Successors.push_back(succ); - - } - - if (fs == 1) { - - newBB = origBB; - countto = 1; - - } else { - - if (TI == NULL || TI->getNumSuccessors() < 2) continue; - countto = Successors.size(); - - } - - // if (Successors.size() != TI->getNumSuccessors()) - // FATAL("Different successor numbers %lu <-> %u\n", Successors.size(), - // TI->getNumSuccessors()); - - for (uint32_t j = 0; j < countto; j++) { - - if (fs != 1) newBB = llvm::SplitEdge(origBB, Successors[j]); - - if (!newBB) { - - if (!be_quiet) WARNF("Split failed!"); - continue; - - } - - if (documentFile) { - - fprintf(documentFile, "ModuleID=%llu Function=%s edgeID=%u\n", - moduleID, F.getName().str().c_str(), afl_global_id); - - } - - BasicBlock::iterator IP = newBB->getFirstInsertionPt(); - IRBuilder<> IRB(&(*IP)); - - /* Set the ID of the inserted basic block */ - - ConstantInt *CurLoc = ConstantInt::get(Int32Ty, afl_global_id++); - - /* Load SHM pointer */ - - Value *MapPtrIdx; - - if (map_addr) { - - MapPtrIdx = IRB.CreateGEP(MapPtrFixed, CurLoc); - - } else { - - LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); - MapPtr->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(C, None)); - MapPtrIdx = IRB.CreateGEP(MapPtr, CurLoc); - - } - - /* Update bitmap */ - - if (use_threadsafe_counters) { - - IRB.CreateAtomicRMW(llvm::AtomicRMWInst::BinOp::Add, MapPtrIdx, One, -#if LLVM_VERSION_MAJOR >= 13 - llvm::MaybeAlign(1), -#endif - llvm::AtomicOrdering::Monotonic); - - } else { - - LoadInst *Counter = IRB.CreateLoad(MapPtrIdx); - Counter->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(C, None)); - - Value *Incr = IRB.CreateAdd(Counter, One); - - if (skip_nozero == NULL) { - - auto cf = IRB.CreateICmpEQ(Incr, Zero); - auto carry = IRB.CreateZExt(cf, Int8Ty); - Incr = IRB.CreateAdd(Incr, carry); - - } - - IRB.CreateStore(Incr, MapPtrIdx) - ->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(C, None)); - - } - - // done :) - - inst_blocks++; - - } - - } while (i > 0); - - } - - } - - if (documentFile) fclose(documentFile); - documentFile = NULL; - - // save highest location ID to global variable - // do this after each function to fail faster - if (!be_quiet && afl_global_id > MAP_SIZE && - afl_global_id > FS_OPT_MAX_MAPSIZE) { - - uint32_t pow2map = 1, map = afl_global_id; - while ((map = map >> 1)) - pow2map++; - WARNF( - "We have %u blocks to instrument but the map size is only %u. Either " - "edit config.h and set MAP_SIZE_POW2 from %d to %u, then recompile " - "afl-fuzz and llvm_mode and then make this target - or set " - "AFL_MAP_SIZE with at least size %u when running afl-fuzz with this " - "target.", - afl_global_id, MAP_SIZE, MAP_SIZE_POW2, pow2map, afl_global_id); - - } - - if (!getenv("AFL_LLVM_LTO_DONTWRITEID") || dictionary.size() || map_addr) { - - // yes we could create our own function, insert it into ctors ... - // but this would be a pain in the butt ... so we use afl-llvm-rt-lto.o - - Function *f = M.getFunction("__afl_auto_init_globals"); - - if (!f) { - - fprintf(stderr, - "Error: init function could not be found (this should not " - "happen)\n"); - exit(-1); - - } - - BasicBlock *bb = &f->getEntryBlock(); - if (!bb) { - - fprintf(stderr, - "Error: init function does not have an EntryBlock (this should " - "not happen)\n"); - exit(-1); - - } - - BasicBlock::iterator IP = bb->getFirstInsertionPt(); - IRBuilder<> IRB(&(*IP)); - - if (map_addr) { - - GlobalVariable *AFLMapAddrFixed = new GlobalVariable( - M, Int64Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_map_addr"); - ConstantInt *MapAddr = ConstantInt::get(Int64Ty, map_addr); - StoreInst * StoreMapAddr = IRB.CreateStore(MapAddr, AFLMapAddrFixed); - StoreMapAddr->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(C, None)); - - } - - if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) { - - uint32_t write_loc = (((afl_global_id + 63) >> 6) << 6); - - GlobalVariable *AFLFinalLoc = new GlobalVariable( - M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc"); - ConstantInt *const_loc = ConstantInt::get(Int32Ty, write_loc); - StoreInst * StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc); - StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(C, None)); - - } - - if (dictionary.size()) { - - size_t memlen = 0, count = 0; - - // sort and unique the dictionary - std::sort(dictionary.begin(), dictionary.end()); - auto last = std::unique(dictionary.begin(), dictionary.end()); - dictionary.erase(last, dictionary.end()); - - for (auto token : dictionary) { - - memlen += token.length(); - count++; - - } - - if (!be_quiet) - printf("AUTODICTIONARY: %zu string%s found\n", count, - count == 1 ? "" : "s"); - - if (count) { - - if ((ptr = (char *)malloc(memlen + count)) == NULL) { - - fprintf(stderr, "Error: malloc for %zu bytes failed!\n", - memlen + count); - exit(-1); - - } - - count = 0; - - size_t offset = 0; - for (auto token : dictionary) { - - if (offset + token.length() < 0xfffff0 && count < MAX_AUTO_EXTRAS) { - - ptr[offset++] = (uint8_t)token.length(); - memcpy(ptr + offset, token.c_str(), token.length()); - offset += token.length(); - count++; - - } - - } - - GlobalVariable *AFLDictionaryLen = - new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, - 0, "__afl_dictionary_len"); - ConstantInt *const_len = ConstantInt::get(Int32Ty, offset); - StoreInst *StoreDictLen = IRB.CreateStore(const_len, AFLDictionaryLen); - StoreDictLen->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(C, None)); - - ArrayType *ArrayTy = ArrayType::get(IntegerType::get(C, 8), offset); - GlobalVariable *AFLInternalDictionary = new GlobalVariable( - M, ArrayTy, true, GlobalValue::ExternalLinkage, - ConstantDataArray::get(C, - *(new ArrayRef<char>((char *)ptr, offset))), - "__afl_internal_dictionary"); - AFLInternalDictionary->setInitializer(ConstantDataArray::get( - C, *(new ArrayRef<char>((char *)ptr, offset)))); - AFLInternalDictionary->setConstant(true); - - GlobalVariable *AFLDictionary = new GlobalVariable( - M, PointerType::get(Int8Ty, 0), false, GlobalValue::ExternalLinkage, - 0, "__afl_dictionary"); - - Value *AFLDictOff = IRB.CreateGEP(AFLInternalDictionary, Zero); - Value *AFLDictPtr = - IRB.CreatePointerCast(AFLDictOff, PointerType::get(Int8Ty, 0)); - StoreInst *StoreDict = IRB.CreateStore(AFLDictPtr, AFLDictionary); - StoreDict->setMetadata(M.getMDKindID("nosanitize"), - MDNode::get(C, None)); - - } - - } - - } - - /* Say something nice. */ - - if (!be_quiet) { - - if (!inst_blocks) - WARNF("No instrumentation targets found."); - else { - - char modeline[100]; - snprintf(modeline, sizeof(modeline), "%s%s%s%s%s", - getenv("AFL_HARDEN") ? "hardened" : "non-hardened", - getenv("AFL_USE_ASAN") ? ", ASAN" : "", - getenv("AFL_USE_MSAN") ? ", MSAN" : "", - getenv("AFL_USE_CFISAN") ? ", CFISAN" : "", - getenv("AFL_USE_UBSAN") ? ", UBSAN" : ""); - OKF("Instrumented %d locations with no collisions (on average %llu " - "collisions would be in afl-gcc/vanilla AFL) (%s mode).", - inst_blocks, calculateCollisions(inst_blocks), modeline); - - } - - } - - return true; - -} - -char AFLLTOPass::ID = 0; - -static void registerAFLLTOPass(const PassManagerBuilder &, - legacy::PassManagerBase &PM) { - - PM.add(new AFLLTOPass()); - -} - -static RegisterPass<AFLLTOPass> X("afl-lto", "afl++ LTO instrumentation pass", - false, false); - -static RegisterStandardPasses RegisterAFLLTOPass( - PassManagerBuilder::EP_FullLinkTimeOptimizationLast, registerAFLLTOPass); - diff --git a/instrumentation/afl-llvm-lto-instrumentlist.so.cc b/instrumentation/afl-llvm-lto-instrumentlist.so.cc index 416dbb88..cf26f912 100644 --- a/instrumentation/afl-llvm-lto-instrumentlist.so.cc +++ b/instrumentation/afl-llvm-lto-instrumentlist.so.cc @@ -15,7 +15,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at: - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 This library is plugged into LLVM when invoking clang through afl-clang-fast. It tells the compiler to add code roughly equivalent to the bits discussed @@ -116,10 +116,15 @@ bool AFLcheckIfInstrument::runOnModule(Module &M) { auto & Ctx = F.getContext(); AttributeList Attrs = F.getAttributes(); - AttrBuilder NewAttrs; +#if LLVM_VERSION_MAJOR < 14 + AttrBuilder NewAttrs; NewAttrs.addAttribute("skipinstrument"); F.setAttributes( Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs)); +#else + AttributeList NewAttrs = Attrs.addFnAttribute(Ctx, "skipinstrument"); + F.setAttributes(NewAttrs); +#endif } diff --git a/instrumentation/afl-llvm-pass.so.cc b/instrumentation/afl-llvm-pass.so.cc index ecf28f31..21ce0cf9 100644 --- a/instrumentation/afl-llvm-pass.so.cc +++ b/instrumentation/afl-llvm-pass.so.cc @@ -18,7 +18,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at: - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 This library is plugged into LLVM when invoking clang through afl-clang-fast. It tells the compiler to add code roughly equivalent to the bits discussed @@ -956,11 +956,12 @@ bool AFLCoverage::runOnModule(Module &M) { else { char modeline[100]; - snprintf(modeline, sizeof(modeline), "%s%s%s%s%s", + snprintf(modeline, sizeof(modeline), "%s%s%s%s%s%s", getenv("AFL_HARDEN") ? "hardened" : "non-hardened", getenv("AFL_USE_ASAN") ? ", ASAN" : "", getenv("AFL_USE_MSAN") ? ", MSAN" : "", getenv("AFL_USE_CFISAN") ? ", CFISAN" : "", + getenv("AFL_USE_TSAN") ? ", TSAN" : "", getenv("AFL_USE_UBSAN") ? ", UBSAN" : ""); OKF("Instrumented %d locations (%s mode, ratio %u%%).", inst_blocks, modeline, inst_ratio); diff --git a/instrumentation/afl-llvm-rt-lto.o.c b/instrumentation/afl-llvm-rt-lto.o.c index e53785ff..eb346157 100644 --- a/instrumentation/afl-llvm-rt-lto.o.c +++ b/instrumentation/afl-llvm-rt-lto.o.c @@ -6,7 +6,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at: - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 */ diff --git a/instrumentation/cmplog-instructions-pass.cc b/instrumentation/cmplog-instructions-pass.cc index 0562c5b2..a7b7aac8 100644 --- a/instrumentation/cmplog-instructions-pass.cc +++ b/instrumentation/cmplog-instructions-pass.cc @@ -11,7 +11,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at: - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 */ @@ -274,14 +274,15 @@ bool CmpLogInstructions::hookInstrs(Module &M) { Value *op0 = selectcmpInst->getOperand(0); Value *op1 = selectcmpInst->getOperand(1); + Value *op0_saved = op0, *op1_saved = op1; + auto ty0 = op0->getType(); + auto ty1 = op1->getType(); - IntegerType * intTyOp0 = NULL; - IntegerType * intTyOp1 = NULL; - unsigned max_size = 0, cast_size = 0; - unsigned char attr = 0; - std::vector<Value *> args; - - CmpInst *cmpInst = dyn_cast<CmpInst>(selectcmpInst); + IntegerType *intTyOp0 = NULL; + IntegerType *intTyOp1 = NULL; + unsigned max_size = 0, cast_size = 0; + unsigned attr = 0, vector_cnt = 0; + CmpInst * cmpInst = dyn_cast<CmpInst>(selectcmpInst); if (!cmpInst) { continue; } @@ -327,7 +328,23 @@ bool CmpLogInstructions::hookInstrs(Module &M) { if (selectcmpInst->getOpcode() == Instruction::FCmp) { - auto ty0 = op0->getType(); + if (ty0->isVectorTy()) { + + VectorType *tt = dyn_cast<VectorType>(ty0); + if (!tt) { + + fprintf(stderr, "Warning: cmplog cmp vector is not a vector!\n"); + continue; + + } + +#if LLVM_MAJOR > 11 + vector_cnt = tt->getElementCount().getKnownMinValue(); + ty0 = tt->getElementType(); +#endif + + } + if (ty0->isHalfTy() #if LLVM_VERSION_MAJOR >= 11 || ty0->isBFloatTy() @@ -342,13 +359,35 @@ bool CmpLogInstructions::hookInstrs(Module &M) { max_size = 80; else if (ty0->isFP128Ty() || ty0->isPPC_FP128Ty()) max_size = 128; +#if LLVM_MAJOR > 11 + else if (ty0->getTypeID() != llvm::Type::PointerTyID && !be_quiet) + fprintf(stderr, "Warning: unsupported cmp type for cmplog: %u!\n", + ty0->getTypeID()); +#endif attr += 8; } else { - intTyOp0 = dyn_cast<IntegerType>(op0->getType()); - intTyOp1 = dyn_cast<IntegerType>(op1->getType()); + if (ty0->isVectorTy()) { + +#if LLVM_MAJOR > 11 + VectorType *tt = dyn_cast<VectorType>(ty0); + if (!tt) { + + fprintf(stderr, "Warning: cmplog cmp vector is not a vector!\n"); + continue; + + } + + vector_cnt = tt->getElementCount().getKnownMinValue(); + ty1 = ty0 = tt->getElementType(); +#endif + + } + + intTyOp0 = dyn_cast<IntegerType>(ty0); + intTyOp1 = dyn_cast<IntegerType>(ty1); if (intTyOp0 && intTyOp1) { @@ -356,11 +395,28 @@ bool CmpLogInstructions::hookInstrs(Module &M) { ? intTyOp0->getBitWidth() : intTyOp1->getBitWidth(); + } else { + +#if LLVM_MAJOR > 11 + if (ty0->getTypeID() != llvm::Type::PointerTyID && !be_quiet) { + + fprintf(stderr, "Warning: unsupported cmp type for cmplog: %u\n", + ty0->getTypeID()); + + } + +#endif + } } - if (!max_size || max_size < 16) { continue; } + if (!max_size || max_size < 16) { + + // fprintf(stderr, "too small\n"); + continue; + + } if (max_size % 8) { max_size = (((max_size / 8) + 1) * 8); } @@ -393,67 +449,110 @@ bool CmpLogInstructions::hookInstrs(Module &M) { } - // errs() << "[CMPLOG] cmp " << *cmpInst << "(in function " << - // cmpInst->getFunction()->getName() << ")\n"; + uint64_t cur = 0, last_val0 = 0, last_val1 = 0, cur_val; - // first bitcast to integer type of the same bitsize as the original - // type (this is a nop, if already integer) - Value *op0_i = IRB.CreateBitCast( - op0, IntegerType::get(C, op0->getType()->getPrimitiveSizeInBits())); - // then create a int cast, which does zext, trunc or bitcast. In our case - // usually zext to the next larger supported type (this is a nop if - // already the right type) - Value *V0 = - IRB.CreateIntCast(op0_i, IntegerType::get(C, cast_size), false); - args.push_back(V0); - Value *op1_i = IRB.CreateBitCast( - op1, IntegerType::get(C, op1->getType()->getPrimitiveSizeInBits())); - Value *V1 = - IRB.CreateIntCast(op1_i, IntegerType::get(C, cast_size), false); - args.push_back(V1); + while (1) { - // errs() << "[CMPLOG] casted parameters:\n0: " << *V0 << "\n1: " << *V1 - // << "\n"; + std::vector<Value *> args; + uint32_t skip = 0; - ConstantInt *attribute = ConstantInt::get(Int8Ty, attr); - args.push_back(attribute); + if (vector_cnt) { - if (cast_size != max_size) { + op0 = IRB.CreateExtractElement(op0_saved, cur); + op1 = IRB.CreateExtractElement(op1_saved, cur); + ConstantInt *i0 = dyn_cast<ConstantInt>(op0); + ConstantInt *i1 = dyn_cast<ConstantInt>(op1); + if (i0 && i0->uge(0xffffffffffffffff) == false) { - ConstantInt *bitsize = ConstantInt::get(Int8Ty, (max_size / 8) - 1); - args.push_back(bitsize); + cur_val = i0->getZExtValue(); + if (last_val0 && last_val0 == cur_val) { skip = 1; } + last_val0 = cur_val; - } + } - // fprintf(stderr, "_ExtInt(%u) castTo %u with attr %u didcast %u\n", - // max_size, cast_size, attr); + if (i1 && i1->uge(0xffffffffffffffff) == false) { - switch (cast_size) { + cur_val = i1->getZExtValue(); + if (last_val1 && last_val1 == cur_val) { skip = 1; } + last_val1 = cur_val; - case 8: - IRB.CreateCall(cmplogHookIns1, args); - break; - case 16: - IRB.CreateCall(cmplogHookIns2, args); - break; - case 32: - IRB.CreateCall(cmplogHookIns4, args); - break; - case 64: - IRB.CreateCall(cmplogHookIns8, args); - break; - case 128: - if (max_size == 128) { + } + + } + + if (!skip) { + + // errs() << "[CMPLOG] cmp " << *cmpInst << "(in function " << + // cmpInst->getFunction()->getName() << ")\n"; + + // first bitcast to integer type of the same bitsize as the original + // type (this is a nop, if already integer) + Value *op0_i = IRB.CreateBitCast( + op0, IntegerType::get(C, ty0->getPrimitiveSizeInBits())); + // then create a int cast, which does zext, trunc or bitcast. In our + // case usually zext to the next larger supported type (this is a nop + // if already the right type) + Value *V0 = + IRB.CreateIntCast(op0_i, IntegerType::get(C, cast_size), false); + args.push_back(V0); + Value *op1_i = IRB.CreateBitCast( + op1, IntegerType::get(C, ty1->getPrimitiveSizeInBits())); + Value *V1 = + IRB.CreateIntCast(op1_i, IntegerType::get(C, cast_size), false); + args.push_back(V1); - IRB.CreateCall(cmplogHookIns16, args); + // errs() << "[CMPLOG] casted parameters:\n0: " << *V0 << "\n1: " << + // *V1 + // << "\n"; - } else { + ConstantInt *attribute = ConstantInt::get(Int8Ty, attr); + args.push_back(attribute); - IRB.CreateCall(cmplogHookInsN, args); + if (cast_size != max_size) { + + ConstantInt *bitsize = ConstantInt::get(Int8Ty, (max_size / 8) - 1); + args.push_back(bitsize); } - break; + // fprintf(stderr, "_ExtInt(%u) castTo %u with attr %u didcast %u\n", + // max_size, cast_size, attr); + + switch (cast_size) { + + case 8: + IRB.CreateCall(cmplogHookIns1, args); + break; + case 16: + IRB.CreateCall(cmplogHookIns2, args); + break; + case 32: + IRB.CreateCall(cmplogHookIns4, args); + break; + case 64: + IRB.CreateCall(cmplogHookIns8, args); + break; + case 128: + if (max_size == 128) { + + IRB.CreateCall(cmplogHookIns16, args); + + } else { + + IRB.CreateCall(cmplogHookInsN, args); + + } + + break; + + } + + } + + /* else fprintf(stderr, "skipped\n"); */ + + ++cur; + if (cur >= vector_cnt) { break; } } diff --git a/instrumentation/cmplog-routines-pass.cc b/instrumentation/cmplog-routines-pass.cc index 1e2610f2..fb514edc 100644 --- a/instrumentation/cmplog-routines-pass.cc +++ b/instrumentation/cmplog-routines-pass.cc @@ -11,7 +11,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at: - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 */ @@ -87,12 +87,14 @@ char CmpLogRoutines::ID = 0; bool CmpLogRoutines::hookRtns(Module &M) { - std::vector<CallInst *> calls, llvmStdStd, llvmStdC, gccStdStd, gccStdC; - LLVMContext & C = M.getContext(); + std::vector<CallInst *> calls, llvmStdStd, llvmStdC, gccStdStd, gccStdC, + Memcmp, Strcmp, Strncmp; + LLVMContext &C = M.getContext(); Type *VoidTy = Type::getVoidTy(C); // PointerType *VoidPtrTy = PointerType::get(VoidTy, 0); IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int64Ty = IntegerType::getInt64Ty(C); PointerType *i8PtrTy = PointerType::get(Int8Ty, 0); #if LLVM_VERSION_MAJOR < 9 @@ -184,6 +186,60 @@ bool CmpLogRoutines::hookRtns(Module &M) { FunctionCallee cmplogGccStdC = c4; #endif +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c5 = M.getOrInsertFunction("__cmplog_rtn_hook_n", VoidTy, i8PtrTy, + i8PtrTy, Int64Ty +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogHookFnN = cast<Function>(c5); +#else + FunctionCallee cmplogHookFnN = c5; +#endif + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c6 = M.getOrInsertFunction("__cmplog_rtn_hook_strn", VoidTy, i8PtrTy, + i8PtrTy, Int64Ty +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogHookFnStrN = cast<Function>(c6); +#else + FunctionCallee cmplogHookFnStrN = c6; +#endif + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c7 = M.getOrInsertFunction("__cmplog_rtn_hook_str", VoidTy, i8PtrTy, + i8PtrTy +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogHookFnStr = cast<Function>(c7); +#else + FunctionCallee cmplogHookFnStr = c7; +#endif + GlobalVariable *AFLCmplogPtr = M.getNamedGlobal("__afl_cmp_map"); if (!AFLCmplogPtr) { @@ -214,12 +270,93 @@ bool CmpLogRoutines::hookRtns(Module &M) { if (callInst->getCallingConv() != llvm::CallingConv::C) continue; FunctionType *FT = Callee->getFunctionType(); + std::string FuncName = Callee->getName().str(); bool isPtrRtn = FT->getNumParams() >= 2 && !FT->getReturnType()->isVoidTy() && FT->getParamType(0) == FT->getParamType(1) && FT->getParamType(0)->isPointerTy(); + bool isPtrRtnN = FT->getNumParams() >= 3 && + !FT->getReturnType()->isVoidTy() && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0)->isPointerTy() && + FT->getParamType(2)->isIntegerTy(); + if (isPtrRtnN) { + + auto intTyOp = + dyn_cast<IntegerType>(callInst->getArgOperand(2)->getType()); + if (intTyOp) { + + if (intTyOp->getBitWidth() != 32 && + intTyOp->getBitWidth() != 64) { + + isPtrRtnN = false; + + } + + } + + } + + bool isMemcmp = + (!FuncName.compare("memcmp") || !FuncName.compare("bcmp") || + !FuncName.compare("CRYPTO_memcmp") || + !FuncName.compare("OPENSSL_memcmp") || + !FuncName.compare("memcmp_const_time") || + !FuncName.compare("memcmpct")); + isMemcmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy() && + FT->getParamType(2)->isIntegerTy(); + + bool isStrcmp = + (!FuncName.compare("strcmp") || !FuncName.compare("xmlStrcmp") || + !FuncName.compare("xmlStrEqual") || + !FuncName.compare("g_strcmp0") || + !FuncName.compare("curl_strequal") || + !FuncName.compare("strcsequal") || + !FuncName.compare("strcasecmp") || + !FuncName.compare("stricmp") || + !FuncName.compare("ap_cstr_casecmp") || + !FuncName.compare("OPENSSL_strcasecmp") || + !FuncName.compare("xmlStrcasecmp") || + !FuncName.compare("g_strcasecmp") || + !FuncName.compare("g_ascii_strcasecmp") || + !FuncName.compare("Curl_strcasecompare") || + !FuncName.compare("Curl_safe_strcasecompare") || + !FuncName.compare("cmsstrcasecmp") || + !FuncName.compare("strstr") || + !FuncName.compare("g_strstr_len") || + !FuncName.compare("ap_strcasestr") || + !FuncName.compare("xmlStrstr") || + !FuncName.compare("xmlStrcasestr") || + !FuncName.compare("g_str_has_prefix") || + !FuncName.compare("g_str_has_suffix")); + isStrcmp &= + FT->getNumParams() == 2 && FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == IntegerType::getInt8PtrTy(M.getContext()); + + bool isStrncmp = (!FuncName.compare("strncmp") || + !FuncName.compare("xmlStrncmp") || + !FuncName.compare("curl_strnequal") || + !FuncName.compare("strncasecmp") || + !FuncName.compare("strnicmp") || + !FuncName.compare("ap_cstr_casecmpn") || + !FuncName.compare("OPENSSL_strncasecmp") || + !FuncName.compare("xmlStrncasecmp") || + !FuncName.compare("g_ascii_strncasecmp") || + !FuncName.compare("Curl_strncasecompare") || + !FuncName.compare("g_strncasecmp")); + isStrncmp &= FT->getNumParams() == 3 && + FT->getReturnType()->isIntegerTy(32) && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0) == + IntegerType::getInt8PtrTy(M.getContext()) && + FT->getParamType(2)->isIntegerTy(); + bool isGccStdStringStdString = Callee->getName().find("__is_charIT_EE7__value") != std::string::npos && @@ -267,13 +404,19 @@ bool CmpLogRoutines::hookRtns(Module &M) { */ if (isGccStdStringCString || isGccStdStringStdString || - isLlvmStdStringStdString || isLlvmStdStringCString) { + isLlvmStdStringStdString || isLlvmStdStringCString || isMemcmp || + isStrcmp || isStrncmp) { - isPtrRtn = false; + isPtrRtnN = isPtrRtn = false; } + if (isPtrRtnN) { isPtrRtn = false; } + if (isPtrRtn) { calls.push_back(callInst); } + if (isMemcmp || isPtrRtnN) { Memcmp.push_back(callInst); } + if (isStrcmp) { Strcmp.push_back(callInst); } + if (isStrncmp) { Strncmp.push_back(callInst); } if (isGccStdStringStdString) { gccStdStd.push_back(callInst); } if (isGccStdStringCString) { gccStdC.push_back(callInst); } if (isLlvmStdStringStdString) { llvmStdStd.push_back(callInst); } @@ -288,7 +431,8 @@ bool CmpLogRoutines::hookRtns(Module &M) { } if (!calls.size() && !gccStdStd.size() && !gccStdC.size() && - !llvmStdStd.size() && !llvmStdC.size()) + !llvmStdStd.size() && !llvmStdC.size() && !Memcmp.size() && + Strcmp.size() && Strncmp.size()) return false; /* @@ -323,6 +467,96 @@ bool CmpLogRoutines::hookRtns(Module &M) { } + for (auto &callInst : Memcmp) { + + Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1), + *v3P = callInst->getArgOperand(2); + + IRBuilder<> IRB2(callInst->getParent()); + IRB2.SetInsertPoint(callInst); + + LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr); + CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null); + auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, callInst, false); + + IRBuilder<> IRB(ThenTerm); + + std::vector<Value *> args; + Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy); + Value * v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy); + Value * v3Pbitcast = IRB.CreateBitCast( + v3P, IntegerType::get(C, v3P->getType()->getPrimitiveSizeInBits())); + Value *v3Pcasted = + IRB.CreateIntCast(v3Pbitcast, IntegerType::get(C, 64), false); + args.push_back(v1Pcasted); + args.push_back(v2Pcasted); + args.push_back(v3Pcasted); + + IRB.CreateCall(cmplogHookFnN, args); + + // errs() << callInst->getCalledFunction()->getName() << "\n"; + + } + + for (auto &callInst : Strcmp) { + + Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1); + + IRBuilder<> IRB2(callInst->getParent()); + IRB2.SetInsertPoint(callInst); + + LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr); + CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null); + auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, callInst, false); + + IRBuilder<> IRB(ThenTerm); + + std::vector<Value *> args; + Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy); + Value * v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy); + args.push_back(v1Pcasted); + args.push_back(v2Pcasted); + + IRB.CreateCall(cmplogHookFnStr, args); + + // errs() << callInst->getCalledFunction()->getName() << "\n"; + + } + + for (auto &callInst : Strncmp) { + + Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1), + *v3P = callInst->getArgOperand(2); + + IRBuilder<> IRB2(callInst->getParent()); + IRB2.SetInsertPoint(callInst); + + LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr); + CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null); + auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, callInst, false); + + IRBuilder<> IRB(ThenTerm); + + std::vector<Value *> args; + Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy); + Value * v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy); + Value * v3Pbitcast = IRB.CreateBitCast( + v3P, IntegerType::get(C, v3P->getType()->getPrimitiveSizeInBits())); + Value *v3Pcasted = + IRB.CreateIntCast(v3Pbitcast, IntegerType::get(C, 64), false); + args.push_back(v1Pcasted); + args.push_back(v2Pcasted); + args.push_back(v3Pcasted); + + IRB.CreateCall(cmplogHookFnStrN, args); + + // errs() << callInst->getCalledFunction()->getName() << "\n"; + + } + for (auto &callInst : gccStdStd) { Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1); diff --git a/instrumentation/cmplog-switches-pass.cc b/instrumentation/cmplog-switches-pass.cc index c42d44fe..aa719013 100644 --- a/instrumentation/cmplog-switches-pass.cc +++ b/instrumentation/cmplog-switches-pass.cc @@ -11,7 +11,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at: - http://www.apache.org/licenses/LICENSE-2.0 + https://www.apache.org/licenses/LICENSE-2.0 */ diff --git a/instrumentation/compare-transform-pass.so.cc b/instrumentation/compare-transform-pass.so.cc index 288e8282..1ec2bbfe 100644 --- a/instrumentation/compare-transform-pass.so.cc +++ b/instrumentation/compare-transform-pass.so.cc @@ -5,7 +5,7 @@ * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -445,6 +445,10 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, } + // the following is in general OK, but strncmp is sometimes used in binary + // data structures and this can result in crashes :( so it is commented out + /* + // add null termination character implicit in c strings if (!isMemcmp && TmpConstStr[TmpConstStr.length() - 1]) { @@ -452,10 +456,12 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, } + */ + // in the unusual case the const str has embedded null // characters, the string comparison functions should terminate // at the first null - if (!isMemcmp) { + if (!isMemcmp && TmpConstStr.find('\0') != std::string::npos) { TmpConstStr.assign(TmpConstStr, 0, TmpConstStr.find('\0') + 1); diff --git a/instrumentation/split-compares-pass.so.cc b/instrumentation/split-compares-pass.so.cc index 13f45b69..d1254e40 100644 --- a/instrumentation/split-compares-pass.so.cc +++ b/instrumentation/split-compares-pass.so.cc @@ -6,7 +6,7 @@ * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -578,16 +578,16 @@ bool SplitComparesTransform::splitCompare(CmpInst *cmp_inst, Module &M, /* dependent on the cmp of the high parts go to the end or go on with * the comparison */ - auto term = bb->getTerminator(); - BranchInst *br = nullptr; + auto term = bb->getTerminator(); + if (pred == CmpInst::ICMP_EQ) { - br = BranchInst::Create(cmp_low_bb, end_bb, icmp_high, bb); + BranchInst::Create(cmp_low_bb, end_bb, icmp_high, bb); } else { - /* CmpInst::ICMP_NE */ - br = BranchInst::Create(end_bb, cmp_low_bb, icmp_high, bb); + // CmpInst::ICMP_NE + BranchInst::Create(end_bb, cmp_low_bb, icmp_high, bb); } @@ -675,7 +675,7 @@ bool SplitComparesTransform::splitCompare(CmpInst *cmp_inst, Module &M, ReplaceInstWithInst(cmp_inst->getParent()->getInstList(), ii, PN); // We split the comparison into low and high. If this isn't our target - // bitwidth we recursivly split the low and high parts again until we have + // bitwidth we recursively split the low and high parts again until we have // target bitwidth. if ((bitw / 2) > target_bitwidth) { diff --git a/instrumentation/split-switches-pass.so.cc b/instrumentation/split-switches-pass.so.cc index 82f198aa..1e32a31d 100644 --- a/instrumentation/split-switches-pass.so.cc +++ b/instrumentation/split-switches-pass.so.cc @@ -5,7 +5,7 @@ * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * https://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, |