diff options
-rw-r--r-- | llvm_mode/README.lto.md | 2 | ||||
-rw-r--r-- | llvm_mode/afl-clang-fast.c | 4 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-lto-instrumentation.so.cc | 330 | ||||
-rw-r--r-- | test/test-compcov.c | 10 |
4 files changed, 251 insertions, 95 deletions
diff --git a/llvm_mode/README.lto.md b/llvm_mode/README.lto.md index 9fc444df..51b50544 100644 --- a/llvm_mode/README.lto.md +++ b/llvm_mode/README.lto.md @@ -57,7 +57,7 @@ AUTODICTIONARY: 11 strings found ## Building llvm 11 ``` -$ sudo apt install binutils-dev +$ sudo apt install binutils-dev # this is *essential*! $ git clone https://github.com/llvm/llvm-project $ cd llvm-project $ mkdir build diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c index cdb22cb9..fa76a11e 100644 --- a/llvm_mode/afl-clang-fast.c +++ b/llvm_mode/afl-clang-fast.c @@ -401,7 +401,9 @@ static void edit_params(u32 argc, char **argv, char **envp) { } - if (getenv("AFL_NO_BUILTIN")) { + if (getenv("AFL_NO_BUILTIN") || (instrument_mode == INSTRUMENT_LTO && + (getenv("AFL_LLVM_LTO_AUTODICTIONARY") || + getenv("AFL_LLVM_AUTODICTIONARY")))) { cc_params[cc_par_cnt++] = "-fno-builtin-strcmp"; cc_params[cc_par_cnt++] = "-fno-builtin-strncmp"; diff --git a/llvm_mode/afl-llvm-lto-instrumentation.so.cc b/llvm_mode/afl-llvm-lto-instrumentation.so.cc index 5cdf0b70..c5e7a2b7 100644 --- a/llvm_mode/afl-llvm-lto-instrumentation.so.cc +++ b/llvm_mode/afl-llvm-lto-instrumentation.so.cc @@ -161,9 +161,10 @@ class AFLLTOPass : public ModulePass { bool AFLLTOPass::runOnModule(Module &M) { - LLVMContext & C = M.getContext(); - std::vector<std::string> dictionary; - std::vector<CallInst *> calls; + LLVMContext & C = M.getContext(); + std::vector<std::string> dictionary; + std::vector<CallInst *> calls; + DenseMap<Value *, std::string *> valueMap; IntegerType *Int8Ty = IntegerType::getInt8Ty(C); IntegerType *Int32Ty = IntegerType::getInt32Ty(C); @@ -208,6 +209,34 @@ bool AFLLTOPass::runOnModule(Module &M) { if (autodictionary) { + /* Some implementation notes. + * + * We try to handle 3 cases: + * - memcmp("foo", arg, 3) <- literal string + * - static char globalvar[] = "foo"; + * memcmp(globalvar, arg, 3) <- global variable + * - char localvar[] = "foo"; + * memcmp(locallvar, arg, 3) <- local variable + * + * The local variable case is the hardest. We can only detect that + * case if there is no reassignment or change in the variable. + * And it might not work across llvm version. + * What we do is hooking the initializer function for local variables + * (llvm.memcpy.p0i8.p0i8.i64) and note the string and the assigned + * variable. And if that variable is then used in a compare function + * we use that noted string. + * This seems not to work for tokens that have a size <= 4 :-( + * + * - if the compared length is smaller than the string length we + * save the full string. This is likely better for fuzzing but + * might be wrong in a few cases depending on optimizers + * + * - not using StringRef because there is a bug in the llvm 11 + * checkout I am using which sometimes points to wrong strings + * + * Over and out. Took me a full day. damn. mh/vh + */ + for (auto &BB : F) { for (auto &IN : BB) { @@ -216,24 +245,28 @@ bool AFLLTOPass::runOnModule(Module &M) { if ((callInst = dyn_cast<CallInst>(&IN))) { - bool isStrcmp = true; - bool isMemcmp = true; - bool isStrncmp = true; - bool isStrcasecmp = true; - bool isStrncasecmp = true; + bool isStrcmp = true; + bool isMemcmp = true; + bool isStrncmp = true; + bool isStrcasecmp = true; + bool isStrncasecmp = true; + bool isIntMemcpy = true; + bool addedNull = false; + uint8_t optLen = 0; Function *Callee = callInst->getCalledFunction(); if (!Callee) continue; if (callInst->getCallingConv() != llvm::CallingConv::C) continue; - StringRef FuncName = Callee->getName(); - isStrcmp &= !FuncName.compare(StringRef("strcmp")); - isMemcmp &= !FuncName.compare(StringRef("memcmp")); - isStrncmp &= !FuncName.compare(StringRef("strncmp")); - isStrcasecmp &= !FuncName.compare(StringRef("strcasecmp")); - isStrncasecmp &= !FuncName.compare(StringRef("strncasecmp")); + std::string FuncName = Callee->getName().str(); + isStrcmp &= !FuncName.compare("strcmp"); + isMemcmp &= !FuncName.compare("memcmp"); + isStrncmp &= !FuncName.compare("strncmp"); + isStrcasecmp &= !FuncName.compare("strcasecmp"); + isStrncasecmp &= !FuncName.compare("strncasecmp"); + isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64"); if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && - !isStrncasecmp) + !isStrncasecmp && !isIntMemcpy) continue; /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function @@ -269,7 +302,7 @@ bool AFLLTOPass::runOnModule(Module &M) { FT->getParamType(2)->isIntegerTy(); if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp && - !isStrncasecmp) + !isStrncasecmp && !isIntMemcpy) continue; /* is a str{n,}{case,}cmp/memcmp, check if we have @@ -278,29 +311,205 @@ bool AFLLTOPass::runOnModule(Module &M) { * memcmp(x, "const", ..) or memcmp("const", x, ..) */ Value *Str1P = callInst->getArgOperand(0), *Str2P = callInst->getArgOperand(1); - StringRef Str1, Str2; - bool HasStr1 = getConstantStringInfo(Str1P, Str1); - bool HasStr2 = getConstantStringInfo(Str2P, Str2); + std::string Str1, Str2; + StringRef TmpStr; + bool HasStr1 = getConstantStringInfo(Str1P, TmpStr); + if (TmpStr.empty()) + HasStr1 = false; + else + Str1 = TmpStr.str(); + bool HasStr2 = getConstantStringInfo(Str2P, TmpStr); + if (TmpStr.empty()) + HasStr2 = false; + else + Str2 = TmpStr.str(); + + if (debug) + fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n", + FuncName.c_str(), Str1P, Str1P->getName().str().c_str(), + Str1.c_str(), HasStr1 == true ? "true" : "false", Str2P, + Str2P->getName().str().c_str(), Str2.c_str(), + HasStr2 == true ? "true" : "false"); + + // we handle the 2nd parameter first because of llvm memcpy + if (!HasStr2) { + + auto *Ptr = dyn_cast<ConstantExpr>(Str2P); + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (auto *Array = + dyn_cast<ConstantDataArray>(Var->getInitializer())) { + + HasStr2 = true; + Str2 = Array->getAsString().str(); + + } + + } + + } + + } + + // for the internal memcpy routine we only care for the second + // parameter and are not reporting anything. + if (isIntMemcpy == true) { + + if (HasStr2 == true) { + + Value * op2 = callInst->getArgOperand(2); + ConstantInt *ilen = dyn_cast<ConstantInt>(op2); + if (ilen) { + + uint64_t literalLength = Str2.size(); + uint64_t optLength = ilen->getZExtValue(); + if (literalLength + 1 == optLength) { + + Str2.append("\0", 1); // add null byte + addedNull = true; + + } + + } + + valueMap[Str1P] = new std::string(Str2); + + if (debug) + fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(), Str1P); + continue; + + } + + continue; + + } + + // Neither a literal nor a global variable? + // maybe it is a local variable that we saved + if (!HasStr2) { + + std::string *strng = valueMap[Str2P]; + if (strng && !strng->empty()) { + + Str2 = *strng; + HasStr2 = true; + if (debug) + fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(), + Str2P); + + } + + } + + if (!HasStr1) { + + auto Ptr = dyn_cast<ConstantExpr>(Str1P); + + if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) { + + if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) { + + if (auto *Array = + dyn_cast<ConstantDataArray>(Var->getInitializer())) { + + HasStr1 = true; + Str1 = Array->getAsString().str(); + + } + + } + + } + + } + + // Neither a literal nor a global variable? + // maybe it is a local variable that we saved + if (!HasStr1) { + + std::string *strng = valueMap[Str1P]; + if (strng && !strng->empty()) { + + Str1 = *strng; + HasStr1 = true; + if (debug) + fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(), + Str1P); + + } + + } /* handle cases of one string is const, one string is variable */ if (!(HasStr1 ^ HasStr2)) continue; + std::string thestring; + + if (HasStr1) + thestring = Str1; + else + thestring = Str2; + + optLen = thestring.length(); + if (isMemcmp || isStrncmp || isStrncasecmp) { - /* check if third operand is a constant integer - * strlen("constStr") and sizeof() are treated as constant */ Value * op2 = callInst->getArgOperand(2); ConstantInt *ilen = dyn_cast<ConstantInt>(op2); - if (!ilen) continue; - /* final precaution: if size of compare is larger than constant - * string skip it*/ - uint64_t literalLength = - HasStr1 ? GetStringLength(Str1P) : GetStringLength(Str2P); - if (literalLength < ilen->getZExtValue()) continue; + if (ilen) { + + uint64_t literalLength = optLen; + optLen = ilen->getZExtValue(); + if (literalLength + 1 == optLen) { // add null byte + thestring.append("\0", 1); + addedNull = true; + + } + + } } - calls.push_back(callInst); + // add null byte if this is a string compare function and a null + // was not already added + if (addedNull == false && !isMemcmp) { + + thestring.append("\0", 1); // add null byte + optLen++; + + } + + if (!be_quiet) { + + std::string outstring; + fprintf(stderr, "%s: length %u/%u \"", FuncName.c_str(), optLen, + (unsigned int)thestring.length()); + for (uint8_t i = 0; i < thestring.length(); i++) { + + uint8_t c = thestring[i]; + if (c <= 32 || c >= 127) + fprintf(stderr, "\\x%02x", c); + else + fprintf(stderr, "%c", c); + + } + + fprintf(stderr, "\"\n"); + + } + + // we take the longer string, even if the compare was to a + // shorter part. Note that depending on the optimizer of the + // compiler this can be wrong, but it is more likely that this + // is helping the fuzzer + if (optLen != thestring.length()) optLen = thestring.length(); + if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA; + if (optLen < MIN_AUTO_EXTRA) // too short? skip + continue; + + dictionary.push_back(thestring.substr(0, optLen)); } @@ -416,71 +625,6 @@ bool AFLLTOPass::runOnModule(Module &M) { } - if (calls.size()) { - - for (auto &callInst : calls) { - - Value *Str1P = callInst->getArgOperand(0), - *Str2P = callInst->getArgOperand(1); - StringRef Str1, Str2, ConstStr; - std::string TmpConstStr; - Value * VarStr; - bool HasStr1 = getConstantStringInfo(Str1P, Str1); - getConstantStringInfo(Str2P, Str2); - uint64_t constLen, sizedLen; - bool isMemcmp = !callInst->getCalledFunction()->getName().compare( - StringRef("memcmp")); - bool isSizedcmp = isMemcmp || - !callInst->getCalledFunction()->getName().compare( - StringRef("strncmp")) || - !callInst->getCalledFunction()->getName().compare( - StringRef("strncasecmp")); - - if (isSizedcmp) { - - Value * op2 = callInst->getArgOperand(2); - ConstantInt *ilen = dyn_cast<ConstantInt>(op2); - sizedLen = ilen->getZExtValue(); - - } else { - - sizedLen = 0; - - } - - if (HasStr1) { - - TmpConstStr = Str1.str(); - VarStr = Str2P; - constLen = isMemcmp ? sizedLen : GetStringLength(Str1P); - - } else { - - TmpConstStr = Str2.str(); - VarStr = Str1P; - constLen = isMemcmp ? sizedLen : GetStringLength(Str2P); - - } - - /* properly handle zero terminated C strings by adding the terminating 0 - * to the StringRef (in comparison to std::string a StringRef has built-in - * runtime bounds checking, which makes debugging easier) */ - TmpConstStr.append("\0", 1); - ConstStr = StringRef(TmpConstStr); - - if (isSizedcmp && constLen > sizedLen) constLen = sizedLen; - - if (debug) - errs() << callInst->getCalledFunction()->getName() << ": len " - << constLen << ": " << ConstStr << "\n"; - - if (constLen >= MIN_AUTO_EXTRA && constLen <= MAX_DICT_FILE) - dictionary.push_back(ConstStr.str().substr(0, constLen)); - - } - - } - if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL || dictionary.size()) { // yes we could create our own function, insert it into ctors ... diff --git a/test/test-compcov.c b/test/test-compcov.c index 89611bfb..fff9c759 100644 --- a/test/test-compcov.c +++ b/test/test-compcov.c @@ -3,8 +3,12 @@ #include <unistd.h> #include <string.h> +char global_cmpval[] = "GLOBALVARIABLE"; + int main(int argc, char **argv) { char *input = argv[1], *buf, buffer[20]; + char cmpval[] = "LOCALVARIABLE"; + char shortval[4] = "abc"; if (argc < 2) { ssize_t ret = read(0, buffer, sizeof(buffer) - 1); @@ -24,6 +28,12 @@ int main(int argc, char **argv) { return 0; } else if (*(unsigned int*)input == 0xabadcafe) printf("GG you eat cmp tokens for breakfast!\n"); + else if (memcmp(cmpval, input, 8) == 0) + printf("local var memcmp works!\n"); + else if (memcmp(shortval, input, 4) == 0) + printf("short local var memcmp works!\n"); + else if (memcmp(global_cmpval, input, sizeof(global_cmpval)) == 0) + printf("global var memcmp works!\n"); else printf("I do not know your string\n"); |