From 46010a87049bdf32ef23b08d25c186aba3aae442 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Fri, 22 Jan 2021 13:50:16 +0100 Subject: prepare for cmplog rtn std::string support for llvm and g++ --- instrumentation/cmplog-routines-pass.cc | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'instrumentation/cmplog-routines-pass.cc') diff --git a/instrumentation/cmplog-routines-pass.cc b/instrumentation/cmplog-routines-pass.cc index e92883ae..8adf42d5 100644 --- a/instrumentation/cmplog-routines-pass.cc +++ b/instrumentation/cmplog-routines-pass.cc @@ -131,6 +131,11 @@ bool CmpLogRoutines::hookRtns(Module &M) { FunctionType *FT = Callee->getFunctionType(); + // _ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE7compareEmmPKcm + // => libc++ => llvm => __cmplog_rtn_llvm_stdstring_cstring(u8 *stdstring1, u8 *stdstring2) + // _ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7compareEPKc + // => libstdc++ => gcc => __cmplog_rtn_gcc_stdstring_cstring + bool isPtrRtn = FT->getNumParams() >= 2 && !FT->getReturnType()->isVoidTy() && FT->getParamType(0) == FT->getParamType(1) && -- cgit 1.4.1 From baf1ac2e69145d9e411f13df8ab4e7060f2f2685 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Fri, 22 Jan 2021 15:58:12 +0100 Subject: basic cmplog std::string support --- docs/Changelog.md | 2 + instrumentation/afl-compiler-rt.o.c | 4 +- instrumentation/cmplog-routines-pass.cc | 214 ++++++++++++++++++++++++++++++-- 3 files changed, 209 insertions(+), 11 deletions(-) (limited to 'instrumentation/cmplog-routines-pass.cc') diff --git a/docs/Changelog.md b/docs/Changelog.md index 63e8e66e..2b8b0e8d 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -32,6 +32,8 @@ sending a mail to . - fixed endless loop for allow/blocklist lines starting with a comment (thanks to Zherya for reporting) - cmplog/redqueen now also tracks floating point, _ExtInt() + 128bit + - cmplog/redqueen can now process basic libc++ and libstdc++ + std::string comparisons (though no position or length type variants) - added AFL_LLVM_INSTRUMENT option NATIVE for native clang pc-guard support (less performant than our own), GCC for old afl-gcc and CLANG for old afl-clang diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index 322141ba..433a1d89 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -1549,10 +1549,10 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) { u32 i; if (!area_is_mapped(ptr1, 32) || !area_is_mapped(ptr2, 32)) return; fprintf(stderr, "rtn arg0="); - for (i = 0; i < 8; i++) + for (i = 0; i < 24; i++) fprintf(stderr, "%02x", ptr1[i]); fprintf(stderr, " arg1="); - for (i = 0; i < 8; i++) + for (i = 0; i < 24; i++) fprintf(stderr, "%02x", ptr2[i]); fprintf(stderr, "\n"); */ diff --git a/instrumentation/cmplog-routines-pass.cc b/instrumentation/cmplog-routines-pass.cc index 8adf42d5..9d6999ca 100644 --- a/instrumentation/cmplog-routines-pass.cc +++ b/instrumentation/cmplog-routines-pass.cc @@ -87,7 +87,7 @@ char CmpLogRoutines::ID = 0; bool CmpLogRoutines::hookRtns(Module &M) { - std::vector calls; + std::vector calls, llvmStdStd, llvmStdC, gccStdStd, gccStdC; LLVMContext & C = M.getContext(); Type *VoidTy = Type::getVoidTy(C); @@ -112,6 +112,78 @@ bool CmpLogRoutines::hookRtns(Module &M) { FunctionCallee cmplogHookFn = c; #endif +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c1 = M.getOrInsertFunction("__cmplog_rtn_llvm_stdstring_stdstring", + VoidTy, i8PtrTy, i8PtrTy +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogLlvmStdStd = cast(c1); +#else + FunctionCallee cmplogLlvmStdStd = c1; +#endif + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c2 = M.getOrInsertFunction("__cmplog_rtn_llvm_stdstring_cstring", VoidTy, + i8PtrTy, i8PtrTy +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogLlvmStdC = cast(c2); +#else + FunctionCallee cmplogLlvmStdC = c2; +#endif + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c3 = M.getOrInsertFunction("__cmplog_rtn_gcc_stdstring_stdstring", VoidTy, + i8PtrTy, i8PtrTy +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogGccStdStd = cast(c3); +#else + FunctionCallee cmplogGccStdStd = c3; +#endif + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c4 = M.getOrInsertFunction("__cmplog_rtn_gcc_stdstring_cstring", VoidTy, + i8PtrTy, i8PtrTy +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogGccStdC = cast(c4); +#else + FunctionCallee cmplogGccStdC = c4; +#endif + /* iterate over all functions, bbs and instruction and add suitable calls */ for (auto &F : M) { @@ -131,19 +203,67 @@ bool CmpLogRoutines::hookRtns(Module &M) { FunctionType *FT = Callee->getFunctionType(); - // _ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE7compareEmmPKcm - // => libc++ => llvm => __cmplog_rtn_llvm_stdstring_cstring(u8 *stdstring1, u8 *stdstring2) - // _ZNKSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7compareEPKc - // => libstdc++ => gcc => __cmplog_rtn_gcc_stdstring_cstring - bool isPtrRtn = FT->getNumParams() >= 2 && !FT->getReturnType()->isVoidTy() && FT->getParamType(0) == FT->getParamType(1) && FT->getParamType(0)->isPointerTy(); - if (!isPtrRtn) continue; - - calls.push_back(callInst); + bool isGccStdStringStdString = + Callee->getName().find("__is_charIT_EE7__value") != + std::string::npos && + Callee->getName().find( + "St7__cxx1112basic_stringIS2_St11char_traits") != + std::string::npos && + FT->getNumParams() >= 2 && + FT->getParamType(0) == FT->getParamType(1) && + FT->getParamType(0)->isPointerTy(); + + bool isGccStdStringCString = + Callee->getName().find( + "St7__cxx1112basic_stringIcSt11char_" + "traitsIcESaIcEE7compareEPK") != std::string::npos && + FT->getNumParams() >= 2 && FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy(); + + bool isLlvmStdStringStdString = + Callee->getName().find("_ZNSt3__1eqINS") != std::string::npos && + Callee->getName().find("_12basic_stringIcNS_11char_traits") != + std::string::npos && + FT->getNumParams() >= 2 && FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy(); + + bool isLlvmStdStringCString = + Callee->getName().find("ZNSt3__1eqIcNS") != std::string::npos && + Callee->getName().find("_12basic_stringI") != std::string::npos && + FT->getNumParams() >= 2 && FT->getParamType(0)->isPointerTy() && + FT->getParamType(1)->isPointerTy(); + + /* + fprintf(stderr, "F:%s C:%s argc:%u\n", + F.getName().str().c_str(), Callee->getName().str().c_str(), + FT->getNumParams()); fprintf(stderr, "ptr0:%u ptr1:%u ptr2:%u\n", + FT->getParamType(0)->isPointerTy(), + FT->getParamType(1)->isPointerTy(), + FT->getNumParams() > 2 ? FT->getParamType(2)->isPointerTy() + : 22 ); + */ + + if (isGccStdStringCString || isGccStdStringStdString || + isLlvmStdStringStdString || isLlvmStdStringCString) { + + isPtrRtn = false; + + fprintf(stderr, "%u %u %u %u\n", isGccStdStringCString, + isGccStdStringStdString, isLlvmStdStringStdString, + isLlvmStdStringCString); + + } + + if (isPtrRtn) { calls.push_back(callInst); } + if (isGccStdStringStdString) { gccStdStd.push_back(callInst); } + if (isGccStdStringCString) { gccStdC.push_back(callInst); } + if (isLlvmStdStringStdString) { llvmStdStd.push_back(callInst); } + if (isLlvmStdStringCString) { llvmStdC.push_back(callInst); } } @@ -179,6 +299,82 @@ bool CmpLogRoutines::hookRtns(Module &M) { } + for (auto &callInst : gccStdStd) { + + Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1); + + IRBuilder<> IRB(callInst->getParent()); + IRB.SetInsertPoint(callInst); + + std::vector args; + Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy); + Value * v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy); + args.push_back(v1Pcasted); + args.push_back(v2Pcasted); + + IRB.CreateCall(cmplogGccStdStd, args); + + // errs() << callInst->getCalledFunction()->getName() << "\n"; + + } + + for (auto &callInst : gccStdC) { + + Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1); + + IRBuilder<> IRB(callInst->getParent()); + IRB.SetInsertPoint(callInst); + + std::vector args; + Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy); + Value * v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy); + args.push_back(v1Pcasted); + args.push_back(v2Pcasted); + + IRB.CreateCall(cmplogGccStdC, args); + + // errs() << callInst->getCalledFunction()->getName() << "\n"; + + } + + for (auto &callInst : llvmStdStd) { + + Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1); + + IRBuilder<> IRB(callInst->getParent()); + IRB.SetInsertPoint(callInst); + + std::vector args; + Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy); + Value * v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy); + args.push_back(v1Pcasted); + args.push_back(v2Pcasted); + + IRB.CreateCall(cmplogLlvmStdStd, args); + + // errs() << callInst->getCalledFunction()->getName() << "\n"; + + } + + for (auto &callInst : llvmStdC) { + + Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1); + + IRBuilder<> IRB(callInst->getParent()); + IRB.SetInsertPoint(callInst); + + std::vector args; + Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy); + Value * v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy); + args.push_back(v1Pcasted); + args.push_back(v2Pcasted); + + IRB.CreateCall(cmplogLlvmStdC, args); + + // errs() << callInst->getCalledFunction()->getName() << "\n"; + + } + return true; } -- cgit 1.4.1 From 9ed533a0e3908a40e3abc28b95c6f5bd4e413c44 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Fri, 22 Jan 2021 16:01:00 +0100 Subject: fix debug output --- instrumentation/cmplog-routines-pass.cc | 4 ---- 1 file changed, 4 deletions(-) (limited to 'instrumentation/cmplog-routines-pass.cc') diff --git a/instrumentation/cmplog-routines-pass.cc b/instrumentation/cmplog-routines-pass.cc index 9d6999ca..9f19b062 100644 --- a/instrumentation/cmplog-routines-pass.cc +++ b/instrumentation/cmplog-routines-pass.cc @@ -253,10 +253,6 @@ bool CmpLogRoutines::hookRtns(Module &M) { isPtrRtn = false; - fprintf(stderr, "%u %u %u %u\n", isGccStdStringCString, - isGccStdStringStdString, isLlvmStdStringStdString, - isLlvmStdStringCString); - } if (isPtrRtn) { calls.push_back(callInst); } -- cgit 1.4.1 From 1c19804834d2ea4f169be0a99b8ce493a2f10167 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Fri, 22 Jan 2021 16:41:31 +0100 Subject: fix for cmplog stdstring --- instrumentation/cmplog-routines-pass.cc | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'instrumentation/cmplog-routines-pass.cc') diff --git a/instrumentation/cmplog-routines-pass.cc b/instrumentation/cmplog-routines-pass.cc index 9f19b062..a5992c9a 100644 --- a/instrumentation/cmplog-routines-pass.cc +++ b/instrumentation/cmplog-routines-pass.cc @@ -226,26 +226,32 @@ bool CmpLogRoutines::hookRtns(Module &M) { FT->getParamType(1)->isPointerTy(); bool isLlvmStdStringStdString = - Callee->getName().find("_ZNSt3__1eqINS") != std::string::npos && - Callee->getName().find("_12basic_stringIcNS_11char_traits") != - std::string::npos && + Callee->getName().find("_ZNSt3__1eqI") != std::string::npos && + Callee->getName().find("_12basic_stringI") != std::string::npos && + Callee->getName().find("_11char_traits") != std::string::npos && FT->getNumParams() >= 2 && FT->getParamType(0)->isPointerTy() && FT->getParamType(1)->isPointerTy(); bool isLlvmStdStringCString = - Callee->getName().find("ZNSt3__1eqIcNS") != std::string::npos && + Callee->getName().find("_ZNSt3__1eqI") != std::string::npos && Callee->getName().find("_12basic_stringI") != std::string::npos && FT->getNumParams() >= 2 && FT->getParamType(0)->isPointerTy() && FT->getParamType(1)->isPointerTy(); /* - fprintf(stderr, "F:%s C:%s argc:%u\n", - F.getName().str().c_str(), Callee->getName().str().c_str(), - FT->getNumParams()); fprintf(stderr, "ptr0:%u ptr1:%u ptr2:%u\n", - FT->getParamType(0)->isPointerTy(), - FT->getParamType(1)->isPointerTy(), - FT->getNumParams() > 2 ? FT->getParamType(2)->isPointerTy() - : 22 ); + { + + fprintf(stderr, "F:%s C:%s argc:%u\n", + F.getName().str().c_str(), + Callee->getName().str().c_str(), FT->getNumParams()); + fprintf(stderr, "ptr0:%u ptr1:%u ptr2:%u\n", + FT->getParamType(0)->isPointerTy(), + FT->getParamType(1)->isPointerTy(), + FT->getNumParams() > 2 ? + FT->getParamType(2)->isPointerTy() : 22 ); + + } + */ if (isGccStdStringCString || isGccStdStringStdString || @@ -269,7 +275,10 @@ bool CmpLogRoutines::hookRtns(Module &M) { } - if (!calls.size()) return false; + if (!calls.size() && !gccStdStd.size() && !gccStdC.size() && + !llvmStdStd.size() && !llvmStdC.size()) + return false; + /* if (!be_quiet) errs() << "Hooking " << calls.size() -- cgit 1.4.1