diff options
author | van Hauser <vh@thc.org> | 2020-03-20 17:10:44 +0100 |
---|---|---|
committer | van Hauser <vh@thc.org> | 2020-03-20 17:10:44 +0100 |
commit | 5a74cffa0f22b4e3b3dbc829dfb1c8f7c7a6fb76 (patch) | |
tree | 364205f5e22d90706f5ca6e9cc625d3903e19033 | |
parent | f21ff8bac85449728e0ef267afa065b7622ea70f (diff) | |
download | afl++-5a74cffa0f22b4e3b3dbc829dfb1c8f7c7a6fb76.tar.gz |
added llvm_mode ngram coverage
-rw-r--r-- | README.md | 5 | ||||
-rw-r--r-- | docs/Changelog.md | 3 | ||||
-rw-r--r-- | docs/PATCHES.md | 1 | ||||
-rw-r--r-- | docs/env_variables.md | 34 | ||||
-rw-r--r-- | llvm_mode/afl-clang-fast.c | 11 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-pass.so.cc | 134 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-rt.o.c | 13 | ||||
-rw-r--r-- | llvm_mode/llvm-ngram-coverage.h | 18 | ||||
-rw-r--r-- | src/afl-common.c | 8 | ||||
-rw-r--r-- | src/afl-fuzz-stats.c | 2 |
10 files changed, 187 insertions, 42 deletions
diff --git a/README.md b/README.md index 5125928e..1476b440 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,8 @@ * The new CmpLog instrumentation for LLVM and QEMU inspired by [Redqueen](https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Redqueen.pdf) + * llvm_mode ngram coverage by Adrean Herrera [https://github.com/adrianherrera/afl-ngram-pass](https://github.com/adrianherrera/afl-ngram-pass) + A more thorough list is available in the PATCHES file. | Feature/Instrumentation | afl-gcc | llvm_mode | gcc_plugin | qemu_mode | unicorn_mode | @@ -84,6 +86,7 @@ | Whitelist | | x | x | (x)(3) | | | non-colliding coverage | | x(4) | | (x)(5) | | | InsTrim | | x | | | | + | ngram prev_loc coverage | | x(6) | | | | neverZero: @@ -97,6 +100,8 @@ (5) upcoming, development in branch + (6) not compatible with LTO and InsTrim modes + So all in all this is the best-of afl that is currently out there :-) For new versions and additional information, check out: diff --git a/docs/Changelog.md b/docs/Changelog.md index 3eb5d329..ece2c4b5 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -31,6 +31,9 @@ sending a mail to <afl-users+subscribe@googlegroups.com>. runtime - LTO collision free instrumented added in llvm_mode with afl-clang-lto - note that this mode is amazing, but quite some targets won't compile + - Added llvm_mode NGRAM prev_loc coverage by Adrean Herrera + (https://github.com/adrianherrera/afl-ngram-pass/), activate by setting + AFL_LLVM_NGRAM_SIZE - llvm_mode InsTrim mode: - removed workaround for bug where paths were not instrumented and imported fix by author diff --git a/docs/PATCHES.md b/docs/PATCHES.md index 1dfb6622..a6783523 100644 --- a/docs/PATCHES.md +++ b/docs/PATCHES.md @@ -20,6 +20,7 @@ afl-qemu-speed.diff by abiondo on github afl-qemu-optimize-map.diff by mh(at)mh-sec(dot)de ``` ++ llvm_mode ngram prev_loc coverage (github.com/adrianherrera/afl-ngram-pass) + Custom mutator (native library) (by kyakdan) + unicorn_mode (modernized and updated by domenukk) + instrim (https://github.com/csienslab/instrim) was integrated diff --git a/docs/env_variables.md b/docs/env_variables.md index 8c7510cd..98f27bdf 100644 --- a/docs/env_variables.md +++ b/docs/env_variables.md @@ -93,23 +93,26 @@ Then there are a few specific features that are only available in llvm_mode: ### LTO -This is a different kind way of instrumentation: first it compiles all -code in LTO (link time optimization) and then performs an edge inserting -instrumentation which is 100% collision free (collisions are a big issue -in afl and afl-like instrumentations). This is performed by using -afl-clang-lto/afl-clang-lto++ instead of afl-clang-fast, but is only -built if LLVM 9 or newer is used. - -None of these options are necessary to be used and are rather for manual -use (which only ever the author of this LTO implementation will use ;-) -These are used if several seperated instrumentation are performed which -are then later combined. + This is a different kind way of instrumentation: first it compiles all + code in LTO (link time optimization) and then performs an edge inserting + instrumentation which is 100% collision free (collisions are a big issue + in afl and afl-like instrumentations). This is performed by using + afl-clang-lto/afl-clang-lto++ instead of afl-clang-fast, but is only + built if LLVM 9 or newer is used. + + None of these options are necessary to be used and are rather for manual + use (which only ever the author of this LTO implementation will use ;-) + These are used if several seperated instrumentation are performed which + are then later combined. - AFL_LLVM_LTO_STARTID sets the starting location ID for the instrumentation. This defaults to 1 - AFL_LLVM_LTO_DONTWRITEID prevents that the highest location ID written into the instrumentation is set in a global variable + Instrim, LTO and ngram modes can not be used together. + See llvm_mode/README.LTO.md for more information. + ### LAF-INTEL This great feature will split compares to series of single byte comparisons @@ -149,8 +152,17 @@ are then later combined. functions with a single basic block. This is useful for most C and some C++ targets. + Instrim, LTO and ngram modes can not be used together. See llvm_mode/README.instrim.md +### NGRAM + + - Setting AFL_LLVM_NGRAM_SIZE activates ngram prev_loc coverage, good + values are 2, 4 or 8. + + Instrim, LTO and ngram modes can not be used together. + See llvm_mode/README.ngram.md + ### NOT_ZERO - Setting AFL_LLVM_NOT_ZERO=1 during compilation will use counters diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c index 313a2533..77cb1c0f 100644 --- a/llvm_mode/afl-clang-fast.c +++ b/llvm_mode/afl-clang-fast.c @@ -158,14 +158,20 @@ static void edit_params(u32 argc, char **argv) { #endif if (lto_flag[0] != '-') FATAL( - "afl-clang-lto not possible because Makefile magic did not identify " - "the correct -flto flag"); + "Using afl-clang-lto is not possible because Makefile magic did not " + "identify the correct -flto flag"); if (getenv("AFL_LLVM_INSTRIM") != NULL) FATAL("afl-clang-lto does not work with InsTrim mode"); + if (getenv("AFL_LLVM_NGRAM_SIZE") != NULL) + FATAL("afl-clang-lto does not work with ngram coverage mode"); lto_mode = 1; } + if (getenv("AFL_LLVM_NGRAM_SIZE") != NULL && + getenv("AFL_LLVM_INSTRIM") != NULL) + FATAL("AFL_LLVM_NGRAM_SIZE and AFL_LLVM_INSTRIM can not be used together"); + if (!strcmp(name, "afl-clang-fast++") || !strcmp(name, "afl-clang-lto++")) { u8 *alt_cxx = getenv("AFL_CXX"); @@ -605,6 +611,7 @@ int main(int argc, char **argv, char **envp) { "AFL_LLVM_LAF_SPLIT_COMPARES_BITW: size limit (default 8)\n" "AFL_LLVM_INSTRIM: use light weight instrumentation InsTrim\n" "AFL_LLVM_INSTRIM_LOOPHEAD: optimize loop tracing for speed\n" + "AFL_LLVM_NGRAM_SIZE: use ngram prev_loc coverage\n" "AFL_LLVM_CMPLOG: log operands of comparisons (RedQueen mutator)\n" "\nafl-clang-fast was built for llvm %s with the llvm binary path " "of " diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc index 133c64b4..fefd9edd 100644 --- a/llvm_mode/afl-llvm-pass.so.cc +++ b/llvm_mode/afl-llvm-pass.so.cc @@ -2,12 +2,15 @@ american fuzzy lop++ - LLVM-mode instrumentation pass --------------------------------------------------- - Written by Laszlo Szekeres <lszekeres@google.com> and + Written by Laszlo Szekeres <lszekeres@google.com>, + Adrian Herrera <adrian.herrera@anu.edu.au>, Michal Zalewski LLVM integration design comes from Laszlo Szekeres. C bits copied-and-pasted from afl-as.c are Michal's fault. + NGRAM previous location coverage comes from Adrian Herrera. + Copyright 2015, 2016 Google Inc. All rights reserved. Copyright 2019-2020 AFLplusplus Project. All rights reserved. @@ -27,7 +30,6 @@ #include "config.h" #include "debug.h" - #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -47,6 +49,7 @@ typedef long double max_align_t; #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #if LLVM_VERSION_MAJOR > 3 || \ @@ -58,6 +61,8 @@ typedef long double max_align_t; #include "llvm/Support/CFG.h" #endif +#include "llvm-ngram-coverage.h" + using namespace llvm; namespace { @@ -118,6 +123,7 @@ class AFLCoverage : public ModulePass { protected: std::list<std::string> myWhitelist; + uint32_t ngram_size = 0; }; @@ -129,8 +135,10 @@ bool AFLCoverage::runOnModule(Module &M) { LLVMContext &C = M.getContext(); - IntegerType * Int8Ty = IntegerType::getInt8Ty(C); - IntegerType * Int32Ty = IntegerType::getInt32Ty(C); + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + IntegerType *IntLocTy = + IntegerType::getIntNTy(C, sizeof(PREV_LOC_T) * CHAR_BIT); struct timeval tv; struct timezone tz; u32 rand_seed; @@ -147,7 +155,8 @@ bool AFLCoverage::runOnModule(Module &M) { if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) { - SAYF(cCYA "afl-llvm-pass" VERSION cRST " by <lszekeres@google.com>\n"); + SAYF(cCYA "afl-llvm-pass" VERSION cRST + " by <lszekeres@google.com> and <adrian.herrera@anu.edu.au>\n"); } else @@ -170,21 +179,73 @@ bool AFLCoverage::runOnModule(Module &M) { char *neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO"); #endif + /* Decide previous location vector size (must be a power of two) */ + + char *ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE"); + if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE"); + + if (ngram_size_str) + if (sscanf(ngram_size_str, "%u", &ngram_size) != 1 || ngram_size < 2 || + ngram_size > MAX_NGRAM_SIZE) + FATAL( + "Bad value of AFL_NGRAM_SIZE (must be between 2 and MAX_NGRAM_SIZE)"); + + unsigned PrevLocSize; + if (ngram_size == 1) ngram_size = 0; + if (ngram_size) + PrevLocSize = ngram_size - 1; + else + PrevLocSize = 1; + uint64_t PrevLocVecSize = PowerOf2Ceil(PrevLocSize); + VectorType *PrevLocTy; + + if (ngram_size) PrevLocTy = VectorType::get(IntLocTy, PrevLocVecSize); + /* Get globals for the SHM region and the previous location. Note that __afl_prev_loc is thread-local. */ GlobalVariable *AFLMapPtr = new GlobalVariable(M, PointerType::get(Int8Ty, 0), false, GlobalValue::ExternalLinkage, 0, "__afl_area_ptr"); + GlobalVariable *AFLPrevLoc; + if (ngram_size) #ifdef __ANDROID__ - GlobalVariable *AFLPrevLoc = new GlobalVariable( - M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc"); + AFLPrevLoc = new GlobalVariable( + M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage, + /* Initializer */ nullptr, "__afl_prev_loc"); #else - GlobalVariable *AFLPrevLoc = new GlobalVariable( - M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", 0, - GlobalVariable::GeneralDynamicTLSModel, 0, false); + AFLPrevLoc = new GlobalVariable( + M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage, + /* Initializer */ nullptr, "__afl_prev_loc", + /* InsertBefore */ nullptr, GlobalVariable::GeneralDynamicTLSModel, + /* AddressSpace */ 0, /* IsExternallyInitialized */ false); #endif + else +#ifdef __ANDROID__ + AFLPrevLoc = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc"); +#else + AFLPrevLoc = new GlobalVariable( + M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", 0, + GlobalVariable::GeneralDynamicTLSModel, 0, false); +#endif + + /* Create the vector shuffle mask for updating the previous block history. + Note that the first element of the vector will store cur_loc, so just set + it to undef to allow the optimizer to do its thing. */ + + SmallVector<Constant *, 32> PrevLocShuffle = {UndefValue::get(Int32Ty)}; + + for (unsigned I = 0; I < PrevLocSize - 1; ++I) + PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, I)); + + for (unsigned I = PrevLocSize; I < PrevLocVecSize; ++I) + PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, PrevLocSize)); + + Constant *PrevLocShuffleMask = ConstantVector::get(PrevLocShuffle); + + // other constants we need ConstantInt *Zero = ConstantInt::get(Int8Ty, 0); ConstantInt *One = ConstantInt::get(Int8Ty, 1); @@ -356,20 +417,41 @@ bool AFLCoverage::runOnModule(Module &M) { // fprintf(stderr, " == %d\n", more_than_one); if (more_than_one != 1) continue; #endif - ConstantInt *CurLoc = ConstantInt::get(Int32Ty, cur_loc); + + ConstantInt *CurLoc; + + if (ngram_size) + CurLoc = ConstantInt::get(IntLocTy, cur_loc); + else + CurLoc = ConstantInt::get(Int32Ty, cur_loc); /* Load prev_loc */ LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc); PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - Value *PrevLocCasted = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty()); + Value *PrevLocTrans; + + /* "For efficiency, we propose to hash the tuple as a key into the + hit_count map as (prev_block_trans << 1) ^ curr_block_trans, where + prev_block_trans = (block_trans_1 ^ ... ^ block_trans_(n-1)" */ + + if (ngram_size) + PrevLocTrans = IRB.CreateXorReduce(PrevLoc); + else + PrevLocTrans = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty()); /* Load SHM pointer */ LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr); MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - Value *MapPtrIdx = - IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, CurLoc)); + + Value *MapPtrIdx; + if (ngram_size) + MapPtrIdx = IRB.CreateGEP( + MapPtr, + IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, CurLoc), Int32Ty)); + else + MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocTrans, CurLoc)); /* Update bitmap */ @@ -449,11 +531,27 @@ bool AFLCoverage::runOnModule(Module &M) { IRB.CreateStore(Incr, MapPtrIdx) ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - /* Set prev_loc to cur_loc >> 1 */ + /* Update prev_loc history vector (by placing cur_loc at the head of the + vector and shuffle the other elements back by one) */ - StoreInst *Store = - IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc); - Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + StoreInst *Store; + + if (ngram_size) { + + Value *ShuffledPrevLoc = IRB.CreateShuffleVector( + PrevLoc, UndefValue::get(PrevLocTy), PrevLocShuffleMask); + Value *UpdatedPrevLoc = IRB.CreateInsertElement( + ShuffledPrevLoc, IRB.CreateLShr(CurLoc, (uint64_t)1), (uint64_t)0); + + Store = IRB.CreateStore(UpdatedPrevLoc, AFLPrevLoc); + Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + + } else { + + Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), + AFLPrevLoc); + + } inst_blocks++; diff --git a/llvm_mode/afl-llvm-rt.o.c b/llvm_mode/afl-llvm-rt.o.c index 5f9a5534..8fad0fbb 100644 --- a/llvm_mode/afl-llvm-rt.o.c +++ b/llvm_mode/afl-llvm-rt.o.c @@ -26,6 +26,7 @@ #include "config.h" #include "types.h" #include "cmplog.h" +#include "llvm-ngram-coverage.h" #include <stdio.h> #include <stdlib.h> @@ -62,11 +63,11 @@ u8 __afl_area_initial[MAP_SIZE]; u8 *__afl_area_ptr = __afl_area_initial; #ifdef __ANDROID__ -u32 __afl_prev_loc; -u32 __afl_final_loc; +PREV_LOC_T __afl_prev_loc[MAX_NGRAM_SIZE]; +u32 __afl_final_loc; #else -__thread u32 __afl_prev_loc; -__thread u32 __afl_final_loc; +__thread PREV_LOC_T __afl_prev_loc[MAX_NGRAM_SIZE]; +__thread u32 __afl_final_loc; #endif struct cmp_map *__afl_cmp_map; @@ -281,7 +282,7 @@ int __afl_persistent_loop(unsigned int max_cnt) { memset(__afl_area_ptr, 0, MAP_SIZE); __afl_area_ptr[0] = 1; - __afl_prev_loc = 0; + memset(__afl_prev_loc, 0, MAX_NGRAM_SIZE * sizeof(PREV_LOC_T)); } @@ -298,7 +299,7 @@ int __afl_persistent_loop(unsigned int max_cnt) { raise(SIGSTOP); __afl_area_ptr[0] = 1; - __afl_prev_loc = 0; + memset(__afl_prev_loc, 0, MAX_NGRAM_SIZE * sizeof(PREV_LOC_T)); return 1; diff --git a/llvm_mode/llvm-ngram-coverage.h b/llvm_mode/llvm-ngram-coverage.h new file mode 100644 index 00000000..4459bcd7 --- /dev/null +++ b/llvm_mode/llvm-ngram-coverage.h @@ -0,0 +1,18 @@ +#ifndef AFL_NGRAM_CONFIG_H +#define AFL_NGRAM_CONFIG_H + +#include "../config.h" + +#if (MAP_SIZE_POW2 <= 16) +typedef u16 PREV_LOC_T; +#elif (MAP_SIZE_POW2 <= 32) +typedef u32 PREV_LOC_T; +#else +typedef u64 PREV_LOC_T; +#endif + +/* Maximum ngram size */ +#define MAX_NGRAM_SIZE 128 + +#endif + diff --git a/src/afl-common.c b/src/afl-common.c index 1aa15442..8c4d53e8 100644 --- a/src/afl-common.c +++ b/src/afl-common.c @@ -57,10 +57,10 @@ char * afl_environment_variables[] = { "AFL_LLVM_INSTRIM_LOOPHEAD", "AFL_LLVM_INSTRIM_SKIPSINGLEBLOCK", "AFL_LLVM_LAF_SPLIT_COMPARES", "AFL_LLVM_LAF_SPLIT_COMPARES_BITW", "AFL_LLVM_LAF_SPLIT_FLOATS", "AFL_LLVM_LAF_SPLIT_SWITCHES", - "AFL_LLVM_LAF_TRANSFORM_COMPARES", "AFL_LLVM_NOT_ZERO", - "AFL_LLVM_WHITELIST", "AFL_NO_AFFINITY", "AFL_LLVM_LTO_STARTID", - "AFL_LLVM_LTO_DONTWRITEID", "AFL_NO_ARITH", "AFL_NO_BUILTIN", - "AFL_NO_CPU_RED", "AFL_NO_FORKSRV", "AFL_NO_UI", + "AFL_LLVM_LAF_TRANSFORM_COMPARES", "AFL_LLVM_NGRAM_SIZE", "AFL_NGRAM_SIZE", + "AFL_LLVM_NOT_ZERO", "AFL_LLVM_WHITELIST", "AFL_NO_AFFINITY", + "AFL_LLVM_LTO_STARTID", "AFL_LLVM_LTO_DONTWRITEID", "AFL_NO_ARITH", + "AFL_NO_BUILTIN", "AFL_NO_CPU_RED", "AFL_NO_FORKSRV", "AFL_NO_UI", "AFL_NO_X86", // not really an env but we dont want to warn on it "AFL_PATH", "AFL_PERFORMANCE_FILE", //"AFL_PERSISTENT", // not implemented anymore, so warn additionally diff --git a/src/afl-fuzz-stats.c b/src/afl-fuzz-stats.c index b6e64841..34fdea25 100644 --- a/src/afl-fuzz-stats.c +++ b/src/afl-fuzz-stats.c @@ -105,7 +105,7 @@ void write_stats_file(afl_state_t *afl, double bitmap_cvg, double stability, afl->start_time / 1000, cur_time / 1000, (cur_time - afl->start_time) / 1000, getpid(), afl->queue_cycle ? (afl->queue_cycle - 1) : 0, afl->cycles_wo_finds, - afl->total_execs, /*eps,*/ + afl->total_execs, afl->total_execs / ((double)(get_cur_time() - afl->start_time) / 1000), afl->queued_paths, afl->queued_favored, afl->queued_discovered, afl->queued_imported, afl->max_depth, afl->current_entry, |