aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm_mode/README.lto.md2
-rw-r--r--llvm_mode/afl-clang-fast.c4
-rw-r--r--llvm_mode/afl-llvm-lto-instrumentation.so.cc330
-rw-r--r--test/test-compcov.c10
4 files changed, 251 insertions, 95 deletions
diff --git a/llvm_mode/README.lto.md b/llvm_mode/README.lto.md
index 9fc444df..51b50544 100644
--- a/llvm_mode/README.lto.md
+++ b/llvm_mode/README.lto.md
@@ -57,7 +57,7 @@ AUTODICTIONARY: 11 strings found
## Building llvm 11
```
-$ sudo apt install binutils-dev
+$ sudo apt install binutils-dev # this is *essential*!
$ git clone https://github.com/llvm/llvm-project
$ cd llvm-project
$ mkdir build
diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c
index cdb22cb9..fa76a11e 100644
--- a/llvm_mode/afl-clang-fast.c
+++ b/llvm_mode/afl-clang-fast.c
@@ -401,7 +401,9 @@ static void edit_params(u32 argc, char **argv, char **envp) {
}
- if (getenv("AFL_NO_BUILTIN")) {
+ if (getenv("AFL_NO_BUILTIN") || (instrument_mode == INSTRUMENT_LTO &&
+ (getenv("AFL_LLVM_LTO_AUTODICTIONARY") ||
+ getenv("AFL_LLVM_AUTODICTIONARY")))) {
cc_params[cc_par_cnt++] = "-fno-builtin-strcmp";
cc_params[cc_par_cnt++] = "-fno-builtin-strncmp";
diff --git a/llvm_mode/afl-llvm-lto-instrumentation.so.cc b/llvm_mode/afl-llvm-lto-instrumentation.so.cc
index 5cdf0b70..c5e7a2b7 100644
--- a/llvm_mode/afl-llvm-lto-instrumentation.so.cc
+++ b/llvm_mode/afl-llvm-lto-instrumentation.so.cc
@@ -161,9 +161,10 @@ class AFLLTOPass : public ModulePass {
bool AFLLTOPass::runOnModule(Module &M) {
- LLVMContext & C = M.getContext();
- std::vector<std::string> dictionary;
- std::vector<CallInst *> calls;
+ LLVMContext & C = M.getContext();
+ std::vector<std::string> dictionary;
+ std::vector<CallInst *> calls;
+ DenseMap<Value *, std::string *> valueMap;
IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
@@ -208,6 +209,34 @@ bool AFLLTOPass::runOnModule(Module &M) {
if (autodictionary) {
+ /* Some implementation notes.
+ *
+ * We try to handle 3 cases:
+ * - memcmp("foo", arg, 3) <- literal string
+ * - static char globalvar[] = "foo";
+ * memcmp(globalvar, arg, 3) <- global variable
+ * - char localvar[] = "foo";
+ * memcmp(locallvar, arg, 3) <- local variable
+ *
+ * The local variable case is the hardest. We can only detect that
+ * case if there is no reassignment or change in the variable.
+ * And it might not work across llvm version.
+ * What we do is hooking the initializer function for local variables
+ * (llvm.memcpy.p0i8.p0i8.i64) and note the string and the assigned
+ * variable. And if that variable is then used in a compare function
+ * we use that noted string.
+ * This seems not to work for tokens that have a size <= 4 :-(
+ *
+ * - if the compared length is smaller than the string length we
+ * save the full string. This is likely better for fuzzing but
+ * might be wrong in a few cases depending on optimizers
+ *
+ * - not using StringRef because there is a bug in the llvm 11
+ * checkout I am using which sometimes points to wrong strings
+ *
+ * Over and out. Took me a full day. damn. mh/vh
+ */
+
for (auto &BB : F) {
for (auto &IN : BB) {
@@ -216,24 +245,28 @@ bool AFLLTOPass::runOnModule(Module &M) {
if ((callInst = dyn_cast<CallInst>(&IN))) {
- bool isStrcmp = true;
- bool isMemcmp = true;
- bool isStrncmp = true;
- bool isStrcasecmp = true;
- bool isStrncasecmp = true;
+ bool isStrcmp = true;
+ bool isMemcmp = true;
+ bool isStrncmp = true;
+ bool isStrcasecmp = true;
+ bool isStrncasecmp = true;
+ bool isIntMemcpy = true;
+ bool addedNull = false;
+ uint8_t optLen = 0;
Function *Callee = callInst->getCalledFunction();
if (!Callee) continue;
if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
- StringRef FuncName = Callee->getName();
- isStrcmp &= !FuncName.compare(StringRef("strcmp"));
- isMemcmp &= !FuncName.compare(StringRef("memcmp"));
- isStrncmp &= !FuncName.compare(StringRef("strncmp"));
- isStrcasecmp &= !FuncName.compare(StringRef("strcasecmp"));
- isStrncasecmp &= !FuncName.compare(StringRef("strncasecmp"));
+ std::string FuncName = Callee->getName().str();
+ isStrcmp &= !FuncName.compare("strcmp");
+ isMemcmp &= !FuncName.compare("memcmp");
+ isStrncmp &= !FuncName.compare("strncmp");
+ isStrcasecmp &= !FuncName.compare("strcasecmp");
+ isStrncasecmp &= !FuncName.compare("strncasecmp");
+ isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64");
if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
- !isStrncasecmp)
+ !isStrncasecmp && !isIntMemcpy)
continue;
/* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function
@@ -269,7 +302,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
FT->getParamType(2)->isIntegerTy();
if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
- !isStrncasecmp)
+ !isStrncasecmp && !isIntMemcpy)
continue;
/* is a str{n,}{case,}cmp/memcmp, check if we have
@@ -278,29 +311,205 @@ bool AFLLTOPass::runOnModule(Module &M) {
* memcmp(x, "const", ..) or memcmp("const", x, ..) */
Value *Str1P = callInst->getArgOperand(0),
*Str2P = callInst->getArgOperand(1);
- StringRef Str1, Str2;
- bool HasStr1 = getConstantStringInfo(Str1P, Str1);
- bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+ std::string Str1, Str2;
+ StringRef TmpStr;
+ bool HasStr1 = getConstantStringInfo(Str1P, TmpStr);
+ if (TmpStr.empty())
+ HasStr1 = false;
+ else
+ Str1 = TmpStr.str();
+ bool HasStr2 = getConstantStringInfo(Str2P, TmpStr);
+ if (TmpStr.empty())
+ HasStr2 = false;
+ else
+ Str2 = TmpStr.str();
+
+ if (debug)
+ fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n",
+ FuncName.c_str(), Str1P, Str1P->getName().str().c_str(),
+ Str1.c_str(), HasStr1 == true ? "true" : "false", Str2P,
+ Str2P->getName().str().c_str(), Str2.c_str(),
+ HasStr2 == true ? "true" : "false");
+
+ // we handle the 2nd parameter first because of llvm memcpy
+ if (!HasStr2) {
+
+ auto *Ptr = dyn_cast<ConstantExpr>(Str2P);
+ if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+ if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+ if (auto *Array =
+ dyn_cast<ConstantDataArray>(Var->getInitializer())) {
+
+ HasStr2 = true;
+ Str2 = Array->getAsString().str();
+
+ }
+
+ }
+
+ }
+
+ }
+
+ // for the internal memcpy routine we only care for the second
+ // parameter and are not reporting anything.
+ if (isIntMemcpy == true) {
+
+ if (HasStr2 == true) {
+
+ Value * op2 = callInst->getArgOperand(2);
+ ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+ if (ilen) {
+
+ uint64_t literalLength = Str2.size();
+ uint64_t optLength = ilen->getZExtValue();
+ if (literalLength + 1 == optLength) {
+
+ Str2.append("\0", 1); // add null byte
+ addedNull = true;
+
+ }
+
+ }
+
+ valueMap[Str1P] = new std::string(Str2);
+
+ if (debug)
+ fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(), Str1P);
+ continue;
+
+ }
+
+ continue;
+
+ }
+
+ // Neither a literal nor a global variable?
+ // maybe it is a local variable that we saved
+ if (!HasStr2) {
+
+ std::string *strng = valueMap[Str2P];
+ if (strng && !strng->empty()) {
+
+ Str2 = *strng;
+ HasStr2 = true;
+ if (debug)
+ fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(),
+ Str2P);
+
+ }
+
+ }
+
+ if (!HasStr1) {
+
+ auto Ptr = dyn_cast<ConstantExpr>(Str1P);
+
+ if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+ if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+ if (auto *Array =
+ dyn_cast<ConstantDataArray>(Var->getInitializer())) {
+
+ HasStr1 = true;
+ Str1 = Array->getAsString().str();
+
+ }
+
+ }
+
+ }
+
+ }
+
+ // Neither a literal nor a global variable?
+ // maybe it is a local variable that we saved
+ if (!HasStr1) {
+
+ std::string *strng = valueMap[Str1P];
+ if (strng && !strng->empty()) {
+
+ Str1 = *strng;
+ HasStr1 = true;
+ if (debug)
+ fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(),
+ Str1P);
+
+ }
+
+ }
/* handle cases of one string is const, one string is variable */
if (!(HasStr1 ^ HasStr2)) continue;
+ std::string thestring;
+
+ if (HasStr1)
+ thestring = Str1;
+ else
+ thestring = Str2;
+
+ optLen = thestring.length();
+
if (isMemcmp || isStrncmp || isStrncasecmp) {
- /* check if third operand is a constant integer
- * strlen("constStr") and sizeof() are treated as constant */
Value * op2 = callInst->getArgOperand(2);
ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
- if (!ilen) continue;
- /* final precaution: if size of compare is larger than constant
- * string skip it*/
- uint64_t literalLength =
- HasStr1 ? GetStringLength(Str1P) : GetStringLength(Str2P);
- if (literalLength < ilen->getZExtValue()) continue;
+ if (ilen) {
+
+ uint64_t literalLength = optLen;
+ optLen = ilen->getZExtValue();
+ if (literalLength + 1 == optLen) { // add null byte
+ thestring.append("\0", 1);
+ addedNull = true;
+
+ }
+
+ }
}
- calls.push_back(callInst);
+ // add null byte if this is a string compare function and a null
+ // was not already added
+ if (addedNull == false && !isMemcmp) {
+
+ thestring.append("\0", 1); // add null byte
+ optLen++;
+
+ }
+
+ if (!be_quiet) {
+
+ std::string outstring;
+ fprintf(stderr, "%s: length %u/%u \"", FuncName.c_str(), optLen,
+ (unsigned int)thestring.length());
+ for (uint8_t i = 0; i < thestring.length(); i++) {
+
+ uint8_t c = thestring[i];
+ if (c <= 32 || c >= 127)
+ fprintf(stderr, "\\x%02x", c);
+ else
+ fprintf(stderr, "%c", c);
+
+ }
+
+ fprintf(stderr, "\"\n");
+
+ }
+
+ // we take the longer string, even if the compare was to a
+ // shorter part. Note that depending on the optimizer of the
+ // compiler this can be wrong, but it is more likely that this
+ // is helping the fuzzer
+ if (optLen != thestring.length()) optLen = thestring.length();
+ if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA;
+ if (optLen < MIN_AUTO_EXTRA) // too short? skip
+ continue;
+
+ dictionary.push_back(thestring.substr(0, optLen));
}
@@ -416,71 +625,6 @@ bool AFLLTOPass::runOnModule(Module &M) {
}
- if (calls.size()) {
-
- for (auto &callInst : calls) {
-
- Value *Str1P = callInst->getArgOperand(0),
- *Str2P = callInst->getArgOperand(1);
- StringRef Str1, Str2, ConstStr;
- std::string TmpConstStr;
- Value * VarStr;
- bool HasStr1 = getConstantStringInfo(Str1P, Str1);
- getConstantStringInfo(Str2P, Str2);
- uint64_t constLen, sizedLen;
- bool isMemcmp = !callInst->getCalledFunction()->getName().compare(
- StringRef("memcmp"));
- bool isSizedcmp = isMemcmp ||
- !callInst->getCalledFunction()->getName().compare(
- StringRef("strncmp")) ||
- !callInst->getCalledFunction()->getName().compare(
- StringRef("strncasecmp"));
-
- if (isSizedcmp) {
-
- Value * op2 = callInst->getArgOperand(2);
- ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
- sizedLen = ilen->getZExtValue();
-
- } else {
-
- sizedLen = 0;
-
- }
-
- if (HasStr1) {
-
- TmpConstStr = Str1.str();
- VarStr = Str2P;
- constLen = isMemcmp ? sizedLen : GetStringLength(Str1P);
-
- } else {
-
- TmpConstStr = Str2.str();
- VarStr = Str1P;
- constLen = isMemcmp ? sizedLen : GetStringLength(Str2P);
-
- }
-
- /* properly handle zero terminated C strings by adding the terminating 0
- * to the StringRef (in comparison to std::string a StringRef has built-in
- * runtime bounds checking, which makes debugging easier) */
- TmpConstStr.append("\0", 1);
- ConstStr = StringRef(TmpConstStr);
-
- if (isSizedcmp && constLen > sizedLen) constLen = sizedLen;
-
- if (debug)
- errs() << callInst->getCalledFunction()->getName() << ": len "
- << constLen << ": " << ConstStr << "\n";
-
- if (constLen >= MIN_AUTO_EXTRA && constLen <= MAX_DICT_FILE)
- dictionary.push_back(ConstStr.str().substr(0, constLen));
-
- }
-
- }
-
if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL || dictionary.size()) {
// yes we could create our own function, insert it into ctors ...
diff --git a/test/test-compcov.c b/test/test-compcov.c
index 89611bfb..fff9c759 100644
--- a/test/test-compcov.c
+++ b/test/test-compcov.c
@@ -3,8 +3,12 @@
#include <unistd.h>
#include <string.h>
+char global_cmpval[] = "GLOBALVARIABLE";
+
int main(int argc, char **argv) {
char *input = argv[1], *buf, buffer[20];
+ char cmpval[] = "LOCALVARIABLE";
+ char shortval[4] = "abc";
if (argc < 2) {
ssize_t ret = read(0, buffer, sizeof(buffer) - 1);
@@ -24,6 +28,12 @@ int main(int argc, char **argv) {
return 0;
} else if (*(unsigned int*)input == 0xabadcafe)
printf("GG you eat cmp tokens for breakfast!\n");
+ else if (memcmp(cmpval, input, 8) == 0)
+ printf("local var memcmp works!\n");
+ else if (memcmp(shortval, input, 4) == 0)
+ printf("short local var memcmp works!\n");
+ else if (memcmp(global_cmpval, input, sizeof(global_cmpval)) == 0)
+ printf("global var memcmp works!\n");
else
printf("I do not know your string\n");