From a0e6b98ce8c67270b4a6d31121896fea47b6c2a7 Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Wed, 15 Jan 2020 02:38:45 +0100 Subject: persistent mode harness --- src/afl-fuzz-init.c | 6 ++--- unicorn_mode/samples/c/harness.c | 50 ++++++++++++++++++++++++---------------- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c index 5fe3689e..48b0d8ac 100644 --- a/src/afl-fuzz-init.c +++ b/src/afl-fuzz-init.c @@ -1940,17 +1940,17 @@ void check_binary(u8* fname) { } - if ((qemu_mode || unicorn_mode) && + if ((qemu_mode) && memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) { SAYF("\n" cLRD "[-] " cRST "This program appears to be instrumented with afl-gcc, but is being " "run in\n" - " QEMU or Unicorn mode (-Q or -U). This is probably not what you " + " QEMU mode (-Q). This is probably not what you " "want -\n" " this setup will be slow and offer no practical benefits.\n"); - FATAL("Instrumentation found in -Q or -U mode"); + FATAL("Instrumentation found in -Q mode"); } diff --git a/unicorn_mode/samples/c/harness.c b/unicorn_mode/samples/c/harness.c index cc81ba7f..4239b222 100644 --- a/unicorn_mode/samples/c/harness.c +++ b/unicorn_mode/samples/c/harness.c @@ -33,21 +33,24 @@ // Memory map for the code to be tested // Arbitrary address where code to test will be loaded -#define BASE_ADDRESS (0x100000) -#define CODE_ADDRESS (0x101119) -#define END_ADDRESS (0x1011d7) +static const int64_t BASE_ADDRESS = 0x100000; +static const int64_t CODE_ADDRESS = 0x101119; +static const int64_t END_ADDRESS = 0x1011d7; // Address of the stack (Some random address again) -#define STACK_ADDRESS (((int64_t) 0x01) << 58) +static const int64_t STACK_ADDRESS = (((int64_t) 0x01) << 58); // Size of the stack (arbitrarily chosen, just make it big enough) -#define STACK_SIZE (0x10000) +static const int64_t STACK_SIZE = 0x10000; // Location where the input will be placed (make sure the emulated program knows this somehow, too ;) ) -#define INPUT_LOCATION (0x10000) +static const int64_t INPUT_LOCATION = 0x10000; // Inside the location, we have an ofset in our special case -#define INPUT_OFFSET (0x16) +static const int64_t INPUT_OFFSET = 0x16; // Maximum allowable size of mutated data from AFL -#define INPUT_SIZE_MAX (0x10000) +static const int64_t INPUT_SIZE_MAX = 0x10000; // Alignment for unicorn mappings (seems to be needed) -#define ALIGNMENT ((uint64_t) 0x1000) +static const int64_t ALIGNMENT = 0x1000; + +// In our special case, we emulate main(), so argc is needed. +static const uint64_t EMULATED_ARGC = 2; static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size); @@ -100,10 +103,22 @@ static bool place_input_callback( void *data ){ // printf("Placing input with len %ld to %x\n", input_len, DATA_ADDRESS); - if (input_len >= INPUT_SIZE_MAX - INPUT_OFFSET) { - // Test input too long, ignore this testcase + if (input_len < 1 || input_len >= INPUT_SIZE_MAX - INPUT_OFFSET) { + // Test input too short or too long, ignore this testcase return false; } + // We need a valid c string, make sure it never goes out of bounds. + input[input_len-1] = '\0'; + + // For persistent mode, we have to set up stack and memory each time. + uc_reg_write(uc, UC_X86_REG_RIP, &CODE_ADDRESS); // Set the instruction pointer back + // Set up the function parameters accordingly RSI, RDI (see calling convention/disassembly) + uc_reg_write(uc, UC_X86_REG_RSI, &INPUT_LOCATION); // argv + uc_reg_write(uc, UC_X86_REG_RDI, &EMULATED_ARGC); // argc == 2 + + // Make sure the input is 0 terminated. + //input[input_len-1] = '\0'; + // Write the testcase to unicorn. uc_mem_write(uc, INPUT_LOCATION + INPUT_OFFSET, input, input_len); return true; } @@ -188,12 +203,7 @@ int main(int argc, char **argv, char **envp) { uc_mem_write(uc, 0x10008, "\x16\x00\x01", 3); // little endian of 0x10016, see above - // Set up the function parameters accordingly RSI, RDI (see calling convention/disassembly) - uint64_t input_location = INPUT_LOCATION; - uc_reg_write(uc, UC_X86_REG_RSI, &input_location); // argv - uint64_t emulated_argc = 2; - uc_reg_write(uc, UC_X86_REG_RDI, &emulated_argc); // argc == 2 - + // If we want tracing output, set the callbacks here if (tracing) { // tracing all basic blocks with customized callback @@ -212,9 +222,9 @@ int main(int argc, char **argv, char **envp) { &end_address, // Where to exit (this is an array) 1, // Count of end addresses NULL, // Optional calback to run after each exec - false, - 1, // For persistent mode: How many rounds to run - NULL + false, // true, if the optional callback should be run also for non-crashes + 100, // For persistent mode: How many rounds to run + NULL // additional data pointer ); switch(afl_ret) { case UC_AFL_RET_ERROR: -- cgit 1.4.1 From 1ac31361ca61f71b6a419064de5063aef80203e5 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Fri, 17 Jan 2020 20:41:30 +0100 Subject: as suggested, added a comment, why NetBSD needs a higher memory limit --- include/config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/config.h b/include/config.h index 69380282..4eac82e0 100644 --- a/include/config.h +++ b/include/config.h @@ -67,7 +67,7 @@ # else # define MEM_LIMIT 50 # endif /* ^!WORD_SIZE_64 */ -#else +#else /* NetBSD's kernel needs more space for stack, see discussion for issue #165 */ # define MEM_LIMIT 200 #endif /* Default memory limit when running in QEMU mode (MB): */ -- cgit 1.4.1 From 858b5da24e3b060e2ebf6ab48ded22fbdd7d3ceb Mon Sep 17 00:00:00 2001 From: David Carlier Date: Sat, 18 Jan 2020 14:28:31 +0000 Subject: libdislocator: reallocarray API introduction --- libdislocator/libdislocator.so.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/libdislocator/libdislocator.so.c b/libdislocator/libdislocator.so.c index 20649470..b9ba8967 100644 --- a/libdislocator/libdislocator.so.c +++ b/libdislocator/libdislocator.so.c @@ -397,6 +397,28 @@ void* aligned_alloc(size_t align, size_t len) { } +/* specific BSD api mainly checking possible overflow for the size */ + +void* reallocarray(void* ptr, size_t elem_len, size_t elem_cnt) { + + const size_t elem_lim = 1UL << (sizeof(size_t) * 4); + const size_t elem_tot = elem_len * elem_cnt; + void* ret = NULL; + + if ((elem_len >= elem_lim || elem_cnt >= elem_lim) && elem_len > 0 && + elem_cnt > (SIZE_MAX / elem_len)) { + + DEBUGF("reallocarray size overflow (%zu)", elem_tot); + + } else { + + ret = realloc(ptr, elem_tot); + + } + + return ret; +} + __attribute__((constructor)) void __dislocator_init(void) { u8* tmp = (u8*)getenv("AFL_LD_LIMIT_MB"); -- cgit 1.4.1 From 6b0950b03d8a9fd0c21b4be71fd4a4bd6ab68547 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sat, 18 Jan 2020 16:13:57 +0100 Subject: fix some syntax errors regarding $(filter ...) --- Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 703ed673..df5ad048 100644 --- a/Makefile +++ b/Makefile @@ -55,17 +55,17 @@ CFLAGS += -Wall -g -Wno-pointer-sign -I include/ \ AFL_FUZZ_FILES = $(wildcard src/afl-fuzz*.c) -ifneq "($filter %3.7m, $(shell python3.7m-config --includes 2>/dev/null)" "" +ifneq "$(filter %3.7m, $(shell python3.7m-config --includes 2>/dev/null))" "" PYTHON_INCLUDE ?= $(shell python3.7m-config --includes) PYTHON_LIB ?= $(shell python3.7m-config --ldflags) PYTHON_VERSION = 3.7m else - ifneq "($filter %3.7, $(shell python3.7-config --includes) 2> /dev/null" "" + ifneq "$(filter %3.7, $(shell python3.7-config --includes) 2> /dev/null)" "" PYTHON_INCLUDE ?= $(shell python3.7-config --includes) PYTHON_LIB ?= $(shell python3.7-config --ldflags) PYTHON_VERSION = 3.7 else - ifneq "($filter %2.7, $(shell python2.7-config --includes) 2> /dev/null" "" + ifneq "$(filter %2.7, $(shell python2.7-config --includes) 2> /dev/null)" "" PYTHON_INCLUDE ?= $(shell python2.7-config --includes) PYTHON_LIB ?= $(shell python2.7-config --ldflags) PYTHON_VERSION = 2.7 @@ -77,14 +77,14 @@ PYTHON_INCLUDE ?= $(shell test -e /usr/include/python3.7m && echo /usr/include/p PYTHON_INCLUDE ?= $(shell test -e /usr/include/python3.7 && echo /usr/include/python3.7) PYTHON_INCLUDE ?= $(shell test -e /usr/include/python2.7 && echo /usr/include/python2.7) -ifneq "($filter %3.7m, $(PYTHON_INCLUDE))" "" +ifneq "$(filter %3.7m, $(PYTHON_INCLUDE))" "" PYTHON_VERSION ?= 3.7m PYTHON_LIB ?= -lpython3.7m else - ifneq "($filter %3.7, $(PYTHON_INCLUDE))" "" + ifneq "$(filter %3.7, $(PYTHON_INCLUDE))" "" PYTHON_VERSION ?= 3.7 else - ifneq "($filter %2.7, $(PYTHON_INCLUDE))" "" + ifneq "$(filter %2.7, $(PYTHON_INCLUDE))" "" PYTHON_VERSION ?= 2.7 PYTHON_LIB ?= -lpython2.7 else -- cgit 1.4.1 From 00b1d16ac61e9f86cd0c1defec6299e0a5e3fdde Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sat, 18 Jan 2020 16:28:13 +0100 Subject: more fixes for python checks --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index df5ad048..dbb37feb 100644 --- a/Makefile +++ b/Makefile @@ -60,12 +60,12 @@ ifneq "$(filter %3.7m, $(shell python3.7m-config --includes 2>/dev/null))" "" PYTHON_LIB ?= $(shell python3.7m-config --ldflags) PYTHON_VERSION = 3.7m else - ifneq "$(filter %3.7, $(shell python3.7-config --includes) 2> /dev/null)" "" + ifneq "$(filter %3.7, $(shell python3.7-config --includes 2>/dev/null))" "" PYTHON_INCLUDE ?= $(shell python3.7-config --includes) PYTHON_LIB ?= $(shell python3.7-config --ldflags) PYTHON_VERSION = 3.7 else - ifneq "$(filter %2.7, $(shell python2.7-config --includes) 2> /dev/null)" "" + ifneq "$(filter %2.7, $(shell python2.7-config --includes 2>/dev/null))" "" PYTHON_INCLUDE ?= $(shell python2.7-config --includes) PYTHON_LIB ?= $(shell python2.7-config --ldflags) PYTHON_VERSION = 2.7 -- cgit 1.4.1 From db5d5017155a24cb04bef97a0cf97d45456e7901 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sat, 18 Jan 2020 16:46:14 +0100 Subject: set AFL_CC for libradamsa test (needed on FreeBSD) --- test/test.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/test.sh b/test/test.sh index 8f40773c..5bab0d7a 100755 --- a/test/test.sh +++ b/test/test.sh @@ -457,7 +457,13 @@ test -e ../libdislocator.so && { } rm -f test-compcov test -e ../libradamsa.so && { - test -e test-instr.plain || ../afl-clang-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 + # on FreeBSD need to set AFL_CC + if which clang >/dev/null; then + export AFL_CC=`which clang` + else + export AFL_CC=`$LLVM_CONFIG --bindir`/clang + fi + test -e test-instr.plain || ../afl-clang-fast -o test-instr.plain ../test-instr.c test -e test-instr.plain || ../afl-gcc-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain || ../${AFL_GCC} -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain && { -- cgit 1.4.1 From 08691fcc974a9fcf2df3e926959b21199df7e946 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sat, 18 Jan 2020 16:58:20 +0100 Subject: add forgotten stderr redirect --- test/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test.sh b/test/test.sh index 5bab0d7a..c770c1b7 100755 --- a/test/test.sh +++ b/test/test.sh @@ -463,7 +463,7 @@ test -e ../libradamsa.so && { else export AFL_CC=`$LLVM_CONFIG --bindir`/clang fi - test -e test-instr.plain || ../afl-clang-fast -o test-instr.plain ../test-instr.c + test -e test-instr.plain || ../afl-clang-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain || ../afl-gcc-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain || ../${AFL_GCC} -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain && { -- cgit 1.4.1 From 0eec6221554c260b2d93de73e88c2279c4479753 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sat, 18 Jan 2020 16:35:21 +0100 Subject: Intel test taken from lto branch, extended (as in test.sh), and tested on RaspberryPi --- Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Makefile b/Makefile index dbb37feb..7260ee47 100644 --- a/Makefile +++ b/Makefile @@ -48,6 +48,14 @@ ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .te CFLAGS_OPT = -march=native endif +ifneq "$(shell uname -m)" "x86_64" + ifneq "$(shell uname -m)" "i386" + ifneq "$(shell uname -m)" "amd64" + AFL_NO_X86=1 + endif + endif +endif + CFLAGS ?= -O3 -funroll-loops $(CFLAGS_OPT) CFLAGS += -Wall -g -Wno-pointer-sign -I include/ \ -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \ -- cgit 1.4.1 From e7770a70023381bc7ff96b1d346b0ff9741f62de Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sun, 19 Jan 2020 12:25:32 +0100 Subject: make exporting AFL_CC FreeBSD specific, since it seems to harm the libradamsa test on travis/arm64 --- test/test.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/test/test.sh b/test/test.sh index c770c1b7..43b278b4 100755 --- a/test/test.sh +++ b/test/test.sh @@ -458,11 +458,14 @@ test -e ../libdislocator.so && { rm -f test-compcov test -e ../libradamsa.so && { # on FreeBSD need to set AFL_CC - if which clang >/dev/null; then - export AFL_CC=`which clang` - else - export AFL_CC=`$LLVM_CONFIG --bindir`/clang - fi + + test `uname -s` = 'FreeBSD' && { + if which clang >/dev/null; then + export AFL_CC=`which clang` + else + export AFL_CC=`$LLVM_CONFIG --bindir`/clang + fi + } test -e test-instr.plain || ../afl-clang-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain || ../afl-gcc-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain || ../${AFL_GCC} -o test-instr.plain ../test-instr.c > /dev/null 2>&1 -- cgit 1.4.1 From f706e210ec07d8797850781ed82d2279df9a88b9 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sun, 19 Jan 2020 21:20:51 +0100 Subject: add missing test cases for qemu_mode unsigaction library --- test/test.sh | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/test/test.sh b/test/test.sh index 43b278b4..97cc1511 100755 --- a/test/test.sh +++ b/test/test.sh @@ -569,8 +569,64 @@ test -e ../afl-qemu-trace && { CODE=1 exit 1 } - $ECHO "$YELLOW[-] we need a test case for qemu_mode unsigaction library" rm -rf in out errors + test -e ../qemu_mode/unsigaction/unsigaction32.so && { + ${AFL_CC} -o test-unsigaction32 -m32 test-unsigaction.c >> errors 2>&1 && { + ./test-unsigaction32 + RETVAL_NORMAL32=$? + LD_PRELOAD=../qemu_mode/unsigaction/unsigaction32.so ./test-unsigaction32 + RETVAL_LIBUNSIGACTION32=$? + test $RETVAL_NORMAL32 = "2" -a $RETVAL_LIBUNSIGACTION32 = "0" && { + $ECHO "$GREEN[+] qemu_mode unsigaction library (32 bit) ignores signals" + } || { + test $RETVAL_NORMAL32 != "2" && { + $ECHO "$RED[!] cannot trigger signal in test program (32 bit)" + } + test $RETVAL_LIBUNSIGACTION32 != "0" && { + $ECHO "$RED[!] signal in test program (32 bit) is not ignored with unsigaction" + } + CODE=1 + } + } || { + echo CUT------------------------------------------------------------------CUT + cat errors + echo CUT------------------------------------------------------------------CUT + $ECHO "$RED[!] cannot compile test program (32 bit) for unsigaction library" + CODE=1 + } + } || { + $ECHO "$YELLOW[-] we cannot test qemu_mode unsigaction library (32 bit) because it is not present" + INCOMPLETE=1 + } + test -e ../qemu_mode/unsigaction/unsigaction64.so && { + ${AFL_CC} -o test-unsigaction64 -m64 test-unsigaction.c >> errors 2>&1 && { + ./test-unsigaction64 + RETVAL_NORMAL64=$? + LD_PRELOAD=../qemu_mode/unsigaction/unsigaction64.so ./test-unsigaction64 + RETVAL_LIBUNSIGACTION64=$? + test $RETVAL_NORMAL64 = "2" -a $RETVAL_LIBUNSIGACTION64 = "0" && { + $ECHO "$GREEN[+] qemu_mode unsigaction library (64 bit) ignores signals" + } || { + test $RETVAL_NORMAL64 != "2" && { + $ECHO "$RED[!] cannot trigger signal in test program (64 bit)" + } + test $RETVAL_LIBUNSIGACTION64 != "0" && { + $ECHO "$RED[!] signal in test program (64 bit) is not ignored with unsigaction" + } + CODE=1 + } + } || { + echo CUT------------------------------------------------------------------CUT + cat errors + echo CUT------------------------------------------------------------------CUT + $ECHO "$RED[!] cannot compile test program (64 bit) for unsigaction library" + CODE=1 + } + } || { + $ECHO "$YELLOW[-] we cannot test qemu_mode unsigaction library (64 bit) because it is not present" + INCOMPLETE=1 + } + rm -rf errors test-unsigaction32 test-unsigaction64 } } || { $ECHO "$RED[!] gcc compilation of test targets failed - what is going on??" -- cgit 1.4.1 From 274c8d7d3cff7ad61f2a57c7f69914a3948711d2 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sun, 19 Jan 2020 21:22:41 +0100 Subject: add missing test program (oops) --- test/test-unsigaction.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 test/test-unsigaction.c diff --git a/test/test-unsigaction.c b/test/test-unsigaction.c new file mode 100644 index 00000000..1a5e4b26 --- /dev/null +++ b/test/test-unsigaction.c @@ -0,0 +1,25 @@ +#include /* sigemptyset(), sigaction(), kill(), SIGUSR1 */ +#include /* exit() */ +#include /* getpid() */ +#include /* errno */ +#include /* fprintf() */ + +static void mysig_handler(int sig) +{ + exit(2); +} + +int main() +{ + /* setup sig handler */ + struct sigaction sa; + sa.sa_handler = mysig_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + if (sigaction(SIGCHLD, &sa, NULL)) { + fprintf(stderr, "could not set signal handler %d, aborted\n", errno); + exit(1); + } + kill(getpid(), SIGCHLD); + return 0; +} -- cgit 1.4.1 From 72058fdcbcdc707824bd4211ce528237afc1140e Mon Sep 17 00:00:00 2001 From: van Hauser Date: Mon, 20 Jan 2020 12:56:55 +0100 Subject: another freebsd fix in test.sh --- test/test.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/test.sh b/test/test.sh index 97cc1511..23d98278 100755 --- a/test/test.sh +++ b/test/test.sh @@ -179,11 +179,13 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { $ECHO "$BLUE[*] Testing: llvm_mode" test -e ../afl-clang-fast -a -e ../split-switches-pass.so && { # on FreeBSD need to set AFL_CC - if which clang >/dev/null; then - export AFL_CC=`which clang` - else - export AFL_CC=`$LLVM_CONFIG --bindir`/clang - fi + test `uname -s` = 'FreeBSD' && { + if which clang >/dev/null; then + export AFL_CC=`which clang` + else + export AFL_CC=`$LLVM_CONFIG --bindir`/clang + fi + } ../afl-clang-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 AFL_HARDEN=1 ../afl-clang-fast -o test-compcov.harden test-compcov.c > /dev/null 2>&1 test -e test-instr.plain && { -- cgit 1.4.1 From 0d5a8f69e9785cdaec4c9b62e186050112a7cb8f Mon Sep 17 00:00:00 2001 From: van Hauser Date: Mon, 20 Jan 2020 19:21:44 +0100 Subject: fixed Heiko's global search-replace :) --- include/afl-as.h | 2 +- include/afl-fuzz.h | 2 +- include/alloc-inl.h | 2 +- include/android-ashmem.h | 2 +- include/common.h | 2 +- include/config.h | 17 +++++++++-------- include/debug.h | 2 +- include/forkserver.h | 2 +- include/sharedmem.h | 2 +- include/types.h | 2 +- libdislocator/libdislocator.so.c | 1 + src/afl-analyze.c | 2 +- src/afl-as.c | 2 +- src/afl-common.c | 2 +- src/afl-forkserver.c | 2 +- src/afl-fuzz-bitmap.c | 9 +++++++-- src/afl-fuzz-extras.c | 2 +- src/afl-fuzz-globals.c | 2 +- src/afl-fuzz-init.c | 2 +- src/afl-fuzz-misc.c | 2 +- src/afl-fuzz-one.c | 2 +- src/afl-fuzz-python.c | 2 +- src/afl-fuzz-queue.c | 2 +- src/afl-fuzz-run.c | 2 +- src/afl-fuzz-stats.c | 2 +- src/afl-fuzz.c | 2 +- src/afl-gcc.c | 2 +- src/afl-gotcpu.c | 2 +- src/afl-sharedmem.c | 2 +- src/afl-showmap.c | 2 +- src/afl-tmin.c | 2 +- 31 files changed, 45 insertions(+), 38 deletions(-) diff --git a/include/afl-as.h b/include/afl-as.h index 3af42205..bd5e734a 100644 --- a/include/afl-as.h +++ b/include/afl-as.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h index 9ecf1f29..00d29f76 100644 --- a/include/afl-fuzz.h +++ b/include/afl-fuzz.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/alloc-inl.h b/include/alloc-inl.h index 48598ed3..5592b295 100644 --- a/include/alloc-inl.h +++ b/include/alloc-inl.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/android-ashmem.h b/include/android-ashmem.h index 35a5ba5e..adddc05f 100755 --- a/include/android-ashmem.h +++ b/include/android-ashmem.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/common.h b/include/common.h index 8ab78b41..3b953470 100644 --- a/include/common.h +++ b/include/common.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/config.h b/include/config.h index 4eac82e0..83fcb8f9 100644 --- a/include/config.h +++ b/include/config.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi @@ -62,13 +62,14 @@ /* Default memory limit for child process (MB): */ #ifndef __NetBSD__ -# ifndef WORD_SIZE_64 -# define MEM_LIMIT 25 -# else -# define MEM_LIMIT 50 -# endif /* ^!WORD_SIZE_64 */ -#else /* NetBSD's kernel needs more space for stack, see discussion for issue #165 */ -# define MEM_LIMIT 200 +#ifndef WORD_SIZE_64 +#define MEM_LIMIT 25 +#else +#define MEM_LIMIT 50 +#endif /* ^!WORD_SIZE_64 */ +#else /* NetBSD's kernel needs more space for stack, see discussion for issue \ + #165 */ +#define MEM_LIMIT 200 #endif /* Default memory limit when running in QEMU mode (MB): */ diff --git a/include/debug.h b/include/debug.h index 68109927..d6c04935 100644 --- a/include/debug.h +++ b/include/debug.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/forkserver.h b/include/forkserver.h index 17bc65af..0fdcba48 100644 --- a/include/forkserver.h +++ b/include/forkserver.h @@ -6,7 +6,7 @@ Forkserver design by Jann Horn - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/sharedmem.h b/include/sharedmem.h index 69291330..7604d64c 100644 --- a/include/sharedmem.h +++ b/include/sharedmem.h @@ -6,7 +6,7 @@ Forkserver design by Jann Horn - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/include/types.h b/include/types.h index eba47be7..9e681e81 100644 --- a/include/types.h +++ b/include/types.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/libdislocator/libdislocator.so.c b/libdislocator/libdislocator.so.c index b9ba8967..221a629b 100644 --- a/libdislocator/libdislocator.so.c +++ b/libdislocator/libdislocator.so.c @@ -417,6 +417,7 @@ void* reallocarray(void* ptr, size_t elem_len, size_t elem_cnt) { } return ret; + } __attribute__((constructor)) void __dislocator_init(void) { diff --git a/src/afl-analyze.c b/src/afl-analyze.c index 3d4e636e..3de8c037 100644 --- a/src/afl-analyze.c +++ b/src/afl-analyze.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-as.c b/src/afl-as.c index 77ac2f97..8d689385 100644 --- a/src/afl-as.c +++ b/src/afl-as.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-common.c b/src/afl-common.c index 8c2f2b9a..6cb97cdf 100644 --- a/src/afl-common.c +++ b/src/afl-common.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c index de50c73c..77e1d648 100644 --- a/src/afl-forkserver.c +++ b/src/afl-forkserver.c @@ -6,7 +6,7 @@ Forkserver design by Jann Horn - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-bitmap.c b/src/afl-fuzz-bitmap.c index 515a7a79..3ffda284 100644 --- a/src/afl-fuzz-bitmap.c +++ b/src/afl-fuzz-bitmap.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi @@ -524,7 +524,12 @@ u8 save_if_interesting(char** argv, void* mem, u32 len, u8 fault) { struct queue_entry* q = queue; while (q) { - if (q->exec_cksum == cksum) { q->n_fuzz = q->n_fuzz + 1; break ; } + if (q->exec_cksum == cksum) { + + q->n_fuzz = q->n_fuzz + 1; + break; + + } q = q->next; diff --git a/src/afl-fuzz-extras.c b/src/afl-fuzz-extras.c index fcc7749d..6c6dc28c 100644 --- a/src/afl-fuzz-extras.c +++ b/src/afl-fuzz-extras.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-globals.c b/src/afl-fuzz-globals.c index b3476778..f0d98192 100644 --- a/src/afl-fuzz-globals.c +++ b/src/afl-fuzz-globals.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c index 5fe3689e..219be822 100644 --- a/src/afl-fuzz-init.c +++ b/src/afl-fuzz-init.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-misc.c b/src/afl-fuzz-misc.c index b8f376be..0da0cb0a 100644 --- a/src/afl-fuzz-misc.c +++ b/src/afl-fuzz-misc.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index 74123300..b04683be 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-python.c b/src/afl-fuzz-python.c index f1cdecde..f06c8e25 100644 --- a/src/afl-fuzz-python.c +++ b/src/afl-fuzz-python.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c index 1b51e3aa..0880de75 100644 --- a/src/afl-fuzz-queue.c +++ b/src/afl-fuzz-queue.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c index fa7a872a..a006194d 100644 --- a/src/afl-fuzz-run.c +++ b/src/afl-fuzz-run.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz-stats.c b/src/afl-fuzz-stats.c index 7679403b..f2afb295 100644 --- a/src/afl-fuzz-stats.c +++ b/src/afl-fuzz-stats.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index 0af8b35f..9a7495ef 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-gcc.c b/src/afl-gcc.c index 301e2034..e46fe5cd 100644 --- a/src/afl-gcc.c +++ b/src/afl-gcc.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-gotcpu.c b/src/afl-gotcpu.c index 9a56159c..5be30238 100644 --- a/src/afl-gotcpu.c +++ b/src/afl-gotcpu.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-sharedmem.c b/src/afl-sharedmem.c index 16eb14a7..04fcaa1c 100644 --- a/src/afl-sharedmem.c +++ b/src/afl-sharedmem.c @@ -6,7 +6,7 @@ Forkserver design by Jann Horn - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-showmap.c b/src/afl-showmap.c index 8c899c9d..b9da3208 100644 --- a/src/afl-showmap.c +++ b/src/afl-showmap.c @@ -6,7 +6,7 @@ Forkserver design by Jann Horn - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi diff --git a/src/afl-tmin.c b/src/afl-tmin.c index 3e33b72f..7ce0ccaa 100644 --- a/src/afl-tmin.c +++ b/src/afl-tmin.c @@ -6,7 +6,7 @@ Forkserver design by Jann Horn - Now maintained by by Marc Heuse , + Now maintained by Marc Heuse , Heiko Eißfeldt and Andrea Fioraldi -- cgit 1.4.1 From 00d086f816d6b517a6817d6093a83ed8a65b18fa Mon Sep 17 00:00:00 2001 From: van Hauser Date: Tue, 21 Jan 2020 12:53:36 +0100 Subject: USE_TRACE_PC unnecessary, set env AFL_LLVM_USE_TRACE_PC instead --- docs/ChangeLog | 8 +++++--- llvm_mode/README.md | 19 +++++++++---------- llvm_mode/afl-clang-fast.c | 33 ++++++++++++++++++++++++--------- 3 files changed, 38 insertions(+), 22 deletions(-) diff --git a/docs/ChangeLog b/docs/ChangeLog index 5347d244..bb3537dd 100644 --- a/docs/ChangeLog +++ b/docs/ChangeLog @@ -21,9 +21,11 @@ Version ++2.60d (develop): - afl-fuzz: - now prints the real python version support compiled in - set stronger performance compile options and little tweaks - - afl-clang-fast now shows in the help output for which llvm version it - was compiled for - - added blacklisted function check in llvm_mode + - afl-clang-fast: + - show in the help output for which llvm version it was compiled for + - now does not need to be recompiled between trace-pc and pass + instrumentation. compile normally and set AFL_LLVM_USE_TRACE_PC :) + - added blacklisted function check in all modules of llvm_mode - added fix from Debian project to compile libdislocator and libtokencap diff --git a/llvm_mode/README.md b/llvm_mode/README.md index 5afa4dfd..150d1a17 100644 --- a/llvm_mode/README.md +++ b/llvm_mode/README.md @@ -198,24 +198,23 @@ PS. Because there are task switches still involved, the mode isn't as fast as faster than the normal fork() model, and compared to in-process fuzzing, should be a lot more robust. -## 8) Bonus feature #3: new 'trace-pc-guard' mode +## 8) Bonus feature #3: 'trace-pc-guard' mode -Recent versions of LLVM are shipping with a built-in execution tracing feature +LLVM is shipping with a built-in execution tracing feature that provides AFL with the necessary tracing data without the need to post-process the assembly or install any compiler plugins. See: http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards -If you have a sufficiently recent compiler and want to give it a try, build -afl-clang-fast this way: +If you have not an outdated compiler and want to give it a try, build +targets this way: ``` - AFL_TRACE_PC=1 make clean all + libtarget-1.0 $ AFL_LLVM_USE_TRACE_PC=1 make ``` -Note that this mode is currently about 20% slower than "vanilla" afl-clang-fast, +Note that this mode is about 20% slower than "vanilla" afl-clang-fast, and about 5-10% slower than afl-clang. This is likely because the -instrumentation is not inlined, and instead involves a function call. On systems -that support it, compiling your target with -flto should help. - - +instrumentation is not inlined, and instead involves a function call. +On systems that support it, compiling your target with -flto can help +a bit. diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c index b322b762..7da7c5a3 100644 --- a/llvm_mode/afl-clang-fast.c +++ b/llvm_mode/afl-clang-fast.c @@ -204,13 +204,24 @@ static void edit_params(u32 argc, char** argv) { // "-fsanitize-coverage=trace-cmp,trace-div,trace-gep"; // cc_params[cc_par_cnt++] = "-sanitizer-coverage-block-threshold=0"; #else - cc_params[cc_par_cnt++] = "-Xclang"; - cc_params[cc_par_cnt++] = "-load"; - cc_params[cc_par_cnt++] = "-Xclang"; - if (getenv("AFL_LLVM_INSTRIM") != NULL || getenv("INSTRIM_LIB") != NULL) - cc_params[cc_par_cnt++] = alloc_printf("%s/libLLVMInsTrim.so", obj_path); - else - cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path); + if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") || + getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) { + + cc_params[cc_par_cnt++] = + "-fsanitize-coverage=trace-pc-guard"; // edge coverage by default + + } else { + + cc_params[cc_par_cnt++] = "-Xclang"; + cc_params[cc_par_cnt++] = "-load"; + cc_params[cc_par_cnt++] = "-Xclang"; + if (getenv("AFL_LLVM_INSTRIM") != NULL || getenv("INSTRIM_LIB") != NULL) + cc_params[cc_par_cnt++] = alloc_printf("%s/libLLVMInsTrim.so", obj_path); + else + cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path); + + } + #endif /* ^USE_TRACE_PC */ cc_params[cc_par_cnt++] = "-Qunused-arguments"; @@ -282,8 +293,10 @@ static void edit_params(u32 argc, char** argv) { #ifdef USE_TRACE_PC - if (getenv("AFL_INST_RATIO")) - FATAL("AFL_INST_RATIO not available at compile time with 'trace-pc'."); + if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") || + getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) + if (getenv("AFL_INST_RATIO")) + FATAL("AFL_INST_RATIO not available at compile time with 'trace-pc'."); #endif /* USE_TRACE_PC */ @@ -455,6 +468,8 @@ int main(int argc, char** argv) { #ifdef USE_TRACE_PC SAYF(cCYA "afl-clang-fast" VERSION cRST " [tpcg] by \n"); +#warning \ + "You do not need to specifically compile with USE_TRACE_PC anymore, setting the environment variable AFL_LLVM_USE_TRACE_PC is enough." #else SAYF(cCYA "afl-clang-fast" VERSION cRST " by \n"); #endif /* ^USE_TRACE_PC */ -- cgit 1.4.1 From 8b92a40e19c1a90a31e7514de1c90f0cf558a62a Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Wed, 22 Jan 2020 02:08:30 +0100 Subject: c example now uses persistent mode --- unicorn_mode/samples/c/a.out | Bin 17184 -> 0 bytes unicorn_mode/samples/c/harness.c | 43 +++++++++++++++++++----- unicorn_mode/samples/c/persistent_target.c | 39 +++++++++++++++++++++ unicorn_mode/samples/c/persistent_target_x86_64 | Bin 0 -> 16544 bytes unicorn_mode/samples/c/simple_target.c | 34 ------------------- 5 files changed, 73 insertions(+), 43 deletions(-) delete mode 100644 unicorn_mode/samples/c/a.out create mode 100644 unicorn_mode/samples/c/persistent_target.c create mode 100644 unicorn_mode/samples/c/persistent_target_x86_64 delete mode 100644 unicorn_mode/samples/c/simple_target.c diff --git a/unicorn_mode/samples/c/a.out b/unicorn_mode/samples/c/a.out deleted file mode 100644 index 176c25e1..00000000 Binary files a/unicorn_mode/samples/c/a.out and /dev/null differ diff --git a/unicorn_mode/samples/c/harness.c b/unicorn_mode/samples/c/harness.c index 4239b222..2529c46e 100644 --- a/unicorn_mode/samples/c/harness.c +++ b/unicorn_mode/samples/c/harness.c @@ -29,13 +29,13 @@ #include // Path to the file containing the binary to emulate -#define BINARY_FILE ("simple_target_x86_64") +#define BINARY_FILE ("persistent_target_x86_64") // Memory map for the code to be tested // Arbitrary address where code to test will be loaded static const int64_t BASE_ADDRESS = 0x100000; -static const int64_t CODE_ADDRESS = 0x101119; -static const int64_t END_ADDRESS = 0x1011d7; +static const int64_t CODE_ADDRESS = 0x101139; +static const int64_t END_ADDRESS = 0x10120d; // Address of the stack (Some random address again) static const int64_t STACK_ADDRESS = (((int64_t) 0x01) << 58); // Size of the stack (arbitrarily chosen, just make it big enough) @@ -52,15 +52,33 @@ static const int64_t ALIGNMENT = 0x1000; // In our special case, we emulate main(), so argc is needed. static const uint64_t EMULATED_ARGC = 2; +// The return from our fake strlen +static size_t current_input_len = 0; + static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size); } -static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) -{ +static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { printf(">>> Tracing instruction at 0x%"PRIx64 ", instruction size = 0x%x\n", address, size); } +/* +The sample uses strlen, since we don't have a loader or libc, we'll fake it. +We know the strlen will return the lenght of argv[1] that we just planted. +It will be a lot faster than an actual strlen for this specific purpose. +*/ +static void hook_strlen(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { + //Hook + //116b: e8 c0 fe ff ff call 1030 + // We place the return at RAX + //printf("Strlen hook at addr 0x%lx (size: 0x%x), result: %ld\n", address, size, current_input_len); + uc_reg_write(uc, UC_X86_REG_RAX, ¤t_input_len); + // We skip the actual call by updating RIP + uint64_t next_addr = address + size; + uc_reg_write(uc, UC_X86_REG_RIP, &next_addr); +} + /* Unicorn page needs to be 0x1000 aligned, apparently */ static uint64_t pad(uint64_t size) { if (size % ALIGNMENT == 0) return size; @@ -107,8 +125,6 @@ static bool place_input_callback( // Test input too short or too long, ignore this testcase return false; } - // We need a valid c string, make sure it never goes out of bounds. - input[input_len-1] = '\0'; // For persistent mode, we have to set up stack and memory each time. uc_reg_write(uc, UC_X86_REG_RIP, &CODE_ADDRESS); // Set the instruction pointer back @@ -116,10 +132,14 @@ static bool place_input_callback( uc_reg_write(uc, UC_X86_REG_RSI, &INPUT_LOCATION); // argv uc_reg_write(uc, UC_X86_REG_RDI, &EMULATED_ARGC); // argc == 2 - // Make sure the input is 0 terminated. - //input[input_len-1] = '\0'; + // We need a valid c string, make sure it never goes out of bounds. + input[input_len-1] = '\0'; // Write the testcase to unicorn. uc_mem_write(uc, INPUT_LOCATION + INPUT_OFFSET, input, input_len); + + // store input_len for the faux strlen hook + current_input_len = input_len; + return true; } @@ -211,6 +231,11 @@ int main(int argc, char **argv, char **envp) { uc_hook_add(uc, &hooks[1], UC_HOOK_CODE, hook_code, NULL, BASE_ADDRESS, BASE_ADDRESS + len - 1); } + // Add our strlen hook (for this specific testcase only) + int strlen_hook_pos = BASE_ADDRESS + 0x116b; + uc_hook strlen_hook; + uc_hook_add(uc, &strlen_hook, UC_HOOK_CODE, hook_strlen, NULL, strlen_hook_pos, strlen_hook_pos); + printf("Starting to fuzz :)\n"); fflush(stdout); diff --git a/unicorn_mode/samples/c/persistent_target.c b/unicorn_mode/samples/c/persistent_target.c new file mode 100644 index 00000000..5b866f86 --- /dev/null +++ b/unicorn_mode/samples/c/persistent_target.c @@ -0,0 +1,39 @@ +/* + * Sample target file to test afl-unicorn fuzzing capabilities. + * This is a very trivial example that will crash pretty easily + * in several different exciting ways. + * + * Input is assumed to come from a buffer located at DATA_ADDRESS + * (0x00300000), so make sure that your Unicorn emulation of this + * puts user data there. + * + * Written by Nathan Voss + * Adapted by Lukas Seidel + */ +#include +#include + + +int main(int argc, char** argv) { + if (argc < 2) return -1; + + char *data_buf = argv[1]; + uint64_t data_len = strlen(data_buf); + if (data_len < 20) return -2; + + for (; data_len --> 0 ;) { + if (data_len >= 18) continue; + if (data_len > 2 && data_len < 18) { + ((char *)data_len)[(uint64_t)data_buf] = data_buf[data_len + 1]; + } else if (data_buf[9] == 0x90 && data_buf[10] != 0x00 && data_buf[11] == 0x90) { + // Cause a crash if data[10] is not zero, but [9] and [11] are zero + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } + } + if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) { + // Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2] + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } + + return 0; +} diff --git a/unicorn_mode/samples/c/persistent_target_x86_64 b/unicorn_mode/samples/c/persistent_target_x86_64 new file mode 100644 index 00000000..22e04357 Binary files /dev/null and b/unicorn_mode/samples/c/persistent_target_x86_64 differ diff --git a/unicorn_mode/samples/c/simple_target.c b/unicorn_mode/samples/c/simple_target.c deleted file mode 100644 index dbf10911..00000000 --- a/unicorn_mode/samples/c/simple_target.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Sample target file to test afl-unicorn fuzzing capabilities. - * This is a very trivial example that will crash pretty easily - * in several different exciting ways. - * - * Input is assumed to come from a buffer located at DATA_ADDRESS - * (0x00300000), so make sure that your Unicorn emulation of this - * puts user data there. - * - * Written by Nathan Voss - * Adapted by Lukas Seidel - */ - - -int main(int argc, char** argv) { - if(argc < 2){ - return -1; - } - - char *data_buf = argv[1]; - - if (data_buf[20] != 0) { - // Cause an 'invalid read' crash if data[0..3] == '\x01\x02\x03\x04' - unsigned char invalid_read = *(unsigned char *) 0x00000000; - } else if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) { - // Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2] - unsigned char invalid_read = *(unsigned char *) 0x00000000; - } else if (data_buf[9] == 0x00 && data_buf[10] != 0x00 && data_buf[11] == 0x00) { - // Cause a crash if data[10] is not zero, but [9] and [11] are zero - unsigned char invalid_read = *(unsigned char *) 0x00000000; - } - - return 0; -} -- cgit 1.4.1 From fb221db8ae4d640aa6261633ca249a86305292c4 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Wed, 22 Jan 2020 08:35:41 +0100 Subject: clarify gcc plugin test case result --- test/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test.sh b/test/test.sh index 23d98278..0ae6fd09 100755 --- a/test/test.sh +++ b/test/test.sh @@ -336,7 +336,7 @@ test -e ../afl-gcc-fast -a -e ../afl-gcc-rt.o && { $ECHO "$GREEN[+] gcc_plugin run reported $TUPLES instrumented locations which is fine" } || { $ECHO "$RED[!] gcc_plugin instrumentation produces a weird number of instrumented locations: $TUPLES" - $ECHO "$YELLOW[-] the gcc_plugin instrumentation issue is not flagged as an error because travis builds would all fail otherwise :-(" + $ECHO "$YELLOW[-] this is a known issue in gcc, not afl++. It is not flagged as an error because travis builds would all fail otherwise :-(" #CODE=1 } } -- cgit 1.4.1 From 4fbcc37f8450136759913875b6234d2e3ab2f032 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Wed, 22 Jan 2020 09:26:54 +0100 Subject: awk version for portability, tested on linux and FreeBSD so far --- afl-cmin.awk | 440 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 440 insertions(+) create mode 100755 afl-cmin.awk diff --git a/afl-cmin.awk b/afl-cmin.awk new file mode 100755 index 00000000..021f7059 --- /dev/null +++ b/afl-cmin.awk @@ -0,0 +1,440 @@ +#!/usr/bin/awk -f + +# getopt.awk --- Do C library getopt(3) function in awk + +# External variables: +# Optind -- index in ARGV of first nonoption argument +# Optarg -- string value of argument to current option +# Opterr -- if nonzero, print our own diagnostic +# Optopt -- current option letter + +# Returns: +# -1 at end of options +# "?" for unrecognized option +# a character representing the current option + +# Private Data: +# _opti -- index in multiflag option, e.g., -abc + +function getopt(argc, argv, options, thisopt, i) +{ + if (length(options) == 0) # no options given + return -1 + + if (argv[Optind] == "--") { # all done + Optind++ + _opti = 0 + return -1 + } else if (argv[Optind] !~ /^-[^:[:space:]]/) { + _opti = 0 + return -1 + } + if (_opti == 0) + _opti = 2 + thisopt = substr(argv[Optind], _opti, 1) + Optopt = thisopt + i = index(options, thisopt) + if (i == 0) { + if (Opterr) + printf("%c -- invalid option\n", thisopt) > "/dev/stderr" + if (_opti >= length(argv[Optind])) { + Optind++ + _opti = 0 + } else + _opti++ + return "?" + } + if (substr(options, i + 1, 1) == ":") { + # get option argument + if (length(substr(argv[Optind], _opti + 1)) > 0) + Optarg = substr(argv[Optind], _opti + 1) + else + Optarg = argv[++Optind] + _opti = 0 + } else + Optarg = "" + if (_opti == 0 || _opti >= length(argv[Optind])) { + Optind++ + _opti = 0 + } else + _opti++ + return thisopt +} + +BEGIN { + Opterr = 1 # default is to diagnose + Optind = 1 # skip ARGV[0] + + # test program + if (_getopt_test) { + while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) + printf("c = <%c>, Optarg = <%s>\n", + _go_c, Optarg) + printf("non-option arguments:\n") + for (; Optind < ARGC; Optind++) + printf("\tARGV[%d] = <%s>\n", + Optind, ARGV[Optind]) + } +} + +function usage() { + print \ +"Usage: afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \ +"\n" \ +"Required parameters:\n" \ +"\n" \ +" -i dir - input directory with starting corpus\n" \ +" -o dir - output directory for minimized files\n" \ +"\n" \ +"Execution control settings:\n" \ +"\n" \ +" -f file - location read by the fuzzed program (stdin)\n" \ +" -m megs - memory limit for child process ("mem_limit" MB)\n" \ +" -t msec - run time limit for child process (none)\n" \ +" -Q - use binary-only instrumentation (QEMU mode)\n" \ +" -U - use unicorn-based instrumentation (unicorn mode)\n" \ +"\n" \ +"Minimization settings:\n" \ +" -C - keep crashing inputs, reject everything else\n" \ +" -e - solve for edge coverage only, ignore hit counts\n" \ +"\n" \ +"For additional tips, please consult docs/README.md\n" \ +"\n" \ + > "/dev/stderr" + exit 1 +} + +function exists_and_is_executable(binarypath) { + return 0 == system("test -f "binarypath" -a -x "binarypath) +} + +BEGIN { + print "corpus minimization tool for afl-fuzz++ (awk version)\n" +print "PATH="ENVIRON["PATH"] + + # defaults + extra_par = "" + # process options + Opterr = 1 # default is to diagnose + Optind = 1 # skip ARGV[0] + while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eCQU?")) != -1) { + if (_go_c == "i") { + if (!Optarg) usage() + if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + in_dir = Optarg + continue + } else + if (_go_c == "o") { + if (!Optarg) usage() + if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + out_dir = Optarg + continue + } else + if (_go_c == "f") { + if (!Optarg) usage() + if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + stdin_file = Optarg + continue + } else + if (_go_c == "m") { + if (!Optarg) usage() + if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + mem_limit = Optarg + mem_limit_given = 1 + continue + } else + if (_go_c == "t") { + if (!Optarg) usage() + if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + timeout = Optarg + continue + } else + if (_go_c == "C") { + ENVIRON["AFL_CMIN_CRASHES_ONLY"] = 1 + continue + } else + if (_go_c == "e") { + extra_par = extra_par " -e" + continue + } else + if (_go_c == "Q") { + if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + extra_par = extra_par " -Q" + if ( !mem_limit_given ) mem_limit = "250" + qemu_mode = 1 + continue + } else + if (_go_c == "U") { + if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + extra_par = extra_par " -U" + if ( !mem_limit_given ) mem_limit = "250" + unicorn_mode = 1 + continue + } else + if (_go_c == "?") { + exit 1 + } else + usage() + } # while options + + if (!mem_limit) mem_limit = 200 + if (!timeout) timeout = "none" + + # get program args + i = 0 + prog_args_string = "" + for (; Optind < ARGC; Optind++) { + prog_args[i++] = ARGV[Optind] + if (i > 1) + prog_args_string = prog_args_string" "ARGV[Optind] + } + + # sanity checks + if (!prog_args[0] || !in_dir || !out_dir) usage() + + target_bin = prog_args[0] + + # Do a sanity check to discourage the use of /tmp, since we can't really + # handle this safely from an awk script. + + if (!ENVIRON["AFL_ALLOW_TMP"]) { + dirlist[0] = in_dir + dirlist[1] = target_bin + dirlist[2] = out_dir + dirlist[3] = stdin_file + "pwd" | getline dirlist[4] # current directory + for (dirind in dirlist) { + dir = dirlist[dirind] + + if (dir ~ /^(\/var)?\/tmp/) { + print "[-] Error: do not use this script in /tmp or /var/tmp." > "/dev/stderr" + exit 1 + } + } + delete dirlist + } + + # If @@ is specified, but there's no -f, let's come up with a temporary input + # file name. + + trace_dir = out_dir "/.traces" + + if (!stdin_file) { + found_atat = 0 + for (prog_args_ind in prog_args) { + if ("@@" == prog_args[prog_args_ind]) { + found_atat = 1 + break + } + } + if (found_atat) { + stdin_file = trace_dir "/.cur_input" + } + } + + # Check for obvious errors. + + if (mem_limit && mem_limit != "none" && mem_limit < 5) { + print "[-] Error: dangerously low memory limit." > "/dev/stderr" + exit 1 + } + + if (timeout && timeout != "none" && timeout < 10) { + print "[-] Error: dangerously low timeout." > "/dev/stderr" + exit 1 + } + + if (target_bin && !exists_and_is_executable(target_bin)) { + + "which "target_bin" 2>/dev/null" | getline tnew + if (!tnew || !exists_and_is_executable(tnew)) { + print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr" + exit 1 + } + target_bin = tnew + } + + if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !unicorn_mode) { + if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) { + print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr" + exit 1 + } + } + + if (0 != system( "test -d "in_dir )) { + print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr" + exit 1 + } + + if (0 == system( "test -d "in_dir"/queue" )) { + in_dir = in_dir "/queue" + } + + system("rm -rf "trace_dir" 2>/dev/null"); + system("rm "out_dir"/id[:_]* 2>/dev/null") + + if (0 == system( "test -d "out_dir" -a -e "out_dir"/*" )) { + print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr" + exit 1 + } + + if (stdin_file) { + # truncate input file + printf "" > stdin_file + close( stdin_file ) + } + + if (!ENVIRON["AFL_PATH"]) { + if (0 == system("test -f afl-cmin.awk")) { + path = "." + } else { + "which afl-showmap 2>/dev/null" | getline path + } + showmap = path + } else { + showmap = ENVIRON["AFL_PATH"] "/afl-showmap" + } + + if (!showmap || 0 != system("test -x "showmap )) { + print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr" + exit 1 + } + + # get list of input filenames sorted by size + i = 0 + while ("find "in_dir" -type f -exec stat -f '%z %N' \{\} \; | sort -n | cut -d' ' -f2-" | getline) { + infilesSmallToBig[i++] = $0 + } + in_count = i + + first_file = infilesSmallToBig[0] + + # Make sure that we're not dealing with a directory. + + if (0 == system("test -d "in_dir"/"first_file)) { + print "[-] Error: The input directory contains subdirectories - please fix." > "/dev/stderr" + exit 1 + } + + # Check for the more efficient way to copy files... + if (0 != system("mkdir -p -m 0700 "trace_dir)) { + print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" + exit 1 + } + + if (0 == system("ln "in_dir"/"first_file" "trace_dir"/.link_test")) { + cp_tool = "ln" + } else { + cp_tool = "cp" + } + + # Make sure that we can actually get anything out of afl-showmap before we + # waste too much time. + + print "[*] Testing the target binary..." + + if (!stdin_file) { + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"") + } else { + system("cp "in_dir"/"first_file" "stdin_file) + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" 0) { + ++first_count + } + + if (first_count) { + print "[+] OK, "first_count" tuples recorded." + } else { + print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr" + if (!ENVIRON["AFL_KEEP_TRACES"]) { + system("rm -rf "trace_dir" 2>/dev/null") + } + exit 1 + } + + # Let's roll! + + ############################# + # STEP 1: Collecting traces # + ############################# + + print "[*] Obtaining traces for "in_count" input files in '"in_dir"'." + + cur = 0; + if (!stdin_file) { + while (cur < in_count) { + fn = infilesSmallToBig[cur] + ++cur; + printf "\r Processing file "cur"/"in_count + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"fn"\"") + } + } else { + while (cur < in_count) { + fn = infilesSmallToBig[cur] + ++cur + printf "\r Processing file "cur"/"in_count + system("cp "in_dir"/"fn" "stdin_file) + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" 0) { + key = line + if (!(key in key_count)) { + ++tuple_count + } + ++key_count[key] + if (! (key in best_file)) { + # this is the best file for this key + best_file[key] = fn + # copy file unless already done + if (! (fn in file_already_copied)) { + system(cp_tool" "in_dir"/"fn" "out_dir"/"fn) + file_already_copied[fn] = "" + ++out_count + } + } + } + close(tracefile_path) + } + + print "" + print "[+] Found "tuple_count" unique tuples across "in_count" files." + + if (out_count == 1) { + print "[!] WARNING: All test cases had the same traces, check syntax!" + } + print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'." + + if (!ENVIRON["AFL_KEEP_TRACES"]) { + system("rm -rf "trace_dir" 2>/dev/null") + } + + exit 0 +} -- cgit 1.4.1 From 7ce627c92e9b0536e254422d5ef604c3f58e43ce Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Wed, 22 Jan 2020 18:38:41 +0100 Subject: Oops, only this version works with FreeBSD, OpenBSD, NetBSD, MacOS, raspbian --- afl-cmin.awk | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/afl-cmin.awk b/afl-cmin.awk index 021f7059..28c460e8 100755 --- a/afl-cmin.awk +++ b/afl-cmin.awk @@ -110,7 +110,6 @@ function exists_and_is_executable(binarypath) { BEGIN { print "corpus minimization tool for afl-fuzz++ (awk version)\n" -print "PATH="ENVIRON["PATH"] # defaults extra_par = "" @@ -302,7 +301,13 @@ print "PATH="ENVIRON["PATH"] # get list of input filenames sorted by size i = 0 - while ("find "in_dir" -type f -exec stat -f '%z %N' \{\} \; | sort -n | cut -d' ' -f2-" | getline) { + # yuck, gnu stat is incompatible to bsd stat + if ("stat --version 2>/dev/null" !~ /GNU coreutils/) { + stat_format = "-f '%z %N'" + } else { + stat_format = "-c '%s %n'" + } + while ("cd "in_dir" && find . -type f -exec stat "stat_format" \{\} \\; | sort -n | cut -d' ' -f2-" | getline) { infilesSmallToBig[i++] = $0 } in_count = i -- cgit 1.4.1 From ce0b9dae5971f22cd0ae0b468322f78ee2a8a766 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Wed, 22 Jan 2020 19:07:02 +0100 Subject: final step: rename afl-cmin to afl-cmin.bash and add a wrapper afl-cmin for afl-cmin.awk --- afl-cmin | 474 +--------------------------------------------------------- afl-cmin.bash | 470 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 474 insertions(+), 470 deletions(-) create mode 100755 afl-cmin.bash diff --git a/afl-cmin b/afl-cmin index 1dd782d8..75dc63a7 100755 --- a/afl-cmin +++ b/afl-cmin @@ -1,470 +1,4 @@ -#!/usr/bin/env bash -# -# american fuzzy lop++ - corpus minimization tool -# --------------------------------------------- -# -# Originally written by Michal Zalewski -# -# Copyright 2014, 2015 Google Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# This tool tries to find the smallest subset of files in the input directory -# that still trigger the full range of instrumentation data points seen in -# the starting corpus. This has two uses: -# -# - Screening large corpora of input files before using them as a seed for -# afl-fuzz. The tool will remove functionally redundant files and likely -# leave you with a much smaller set. -# -# (In this case, you probably also want to consider running afl-tmin on -# the individual files later on to reduce their size.) -# -# - Minimizing the corpus generated organically by afl-fuzz, perhaps when -# planning to feed it to more resource-intensive tools. The tool achieves -# this by removing all entries that used to trigger unique behaviors in the -# past, but have been made obsolete by later finds. -# -# Note that the tool doesn't modify the files themselves. For that, you want -# afl-tmin. -# -# This script must use bash because other shells may have hardcoded limits on -# array sizes. -# - -echo "corpus minimization tool for afl-fuzz by Michal Zalewski" -echo - -######### -# SETUP # -######### - -# Process command-line options... - -MEM_LIMIT=200 -TIMEOUT=none - -unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \ - AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE UNICORN_MODE - -while getopts "+i:o:f:m:t:eQUCh" opt; do - - case "$opt" in - - "h") - ;; - - "i") - IN_DIR="$OPTARG" - ;; - - "o") - OUT_DIR="$OPTARG" - ;; - "f") - STDIN_FILE="$OPTARG" - ;; - "m") - MEM_LIMIT="$OPTARG" - MEM_LIMIT_GIVEN=1 - ;; - "t") - TIMEOUT="$OPTARG" - ;; - "e") - EXTRA_PAR="$EXTRA_PAR -e" - ;; - "C") - export AFL_CMIN_CRASHES_ONLY=1 - ;; - "Q") - EXTRA_PAR="$EXTRA_PAR -Q" - test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250 - QEMU_MODE=1 - ;; - "U") - EXTRA_PAR="$EXTRA_PAR -U" - test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250 - UNICORN_MODE=1 - ;; - "?") - exit 1 - ;; - - esac - -done - -shift $((OPTIND-1)) - -TARGET_BIN="$1" - -if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then - - cat 1>&2 <<_EOF_ -Usage: $0 [ options ] -- /path/to/target_app [ ... ] - -Required parameters: - - -i dir - input directory with the starting corpus - -o dir - output directory for minimized files - -Execution control settings: - - -f file - location read by the fuzzed program (stdin) - -m megs - memory limit for child process ($MEM_LIMIT MB) - -t msec - run time limit for child process (none) - -Q - use binary-only instrumentation (QEMU mode) - -U - use unicorn-based instrumentation (Unicorn mode) - -Minimization settings: - - -C - keep crashing inputs, reject everything else - -e - solve for edge coverage only, ignore hit counts - -For additional tips, please consult docs/README. - -_EOF_ - exit 1 -fi - -# Do a sanity check to discourage the use of /tmp, since we can't really -# handle this safely from a shell script. - -if [ "$AFL_ALLOW_TMP" = "" ]; then - - echo "$IN_DIR" | grep -qE '^(/var)?/tmp/' - T1="$?" - - echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/' - T2="$?" - - echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/' - T3="$?" - - echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/' - T4="$?" - - echo "$PWD" | grep -qE '^(/var)?/tmp/' - T5="$?" - - if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then - echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2 - exit 1 - fi - -fi - -# If @@ is specified, but there's no -f, let's come up with a temporary input -# file name. - -TRACE_DIR="$OUT_DIR/.traces" - -if [ "$STDIN_FILE" = "" ]; then - - if echo "$*" | grep -qF '@@'; then - STDIN_FILE="$TRACE_DIR/.cur_input" - fi - -fi - -# Check for obvious errors. - -if [ ! "$MEM_LIMIT" = "none" ]; then - - if [ "$MEM_LIMIT" -lt "5" ]; then - echo "[-] Error: dangerously low memory limit." 1>&2 - exit 1 - fi - -fi - -if [ ! "$TIMEOUT" = "none" ]; then - - if [ "$TIMEOUT" -lt "10" ]; then - echo "[-] Error: dangerously low timeout." 1>&2 - exit 1 - fi - -fi - -if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then - - TNEW="`which "$TARGET_BIN" 2>/dev/null`" - - if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then - echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2 - exit 1 - fi - - TARGET_BIN="$TNEW" - -fi - -if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" -a "$UNICORN_MODE" = "" ]; then - - if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then - echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2 - exit 1 - fi - -fi - -if [ ! -d "$IN_DIR" ]; then - echo "[-] Error: directory '$IN_DIR' not found." 1>&2 - exit 1 -fi - -test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue" - -find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null -rm -rf "$TRACE_DIR" 2>/dev/null - -rmdir "$OUT_DIR" 2>/dev/null - -if [ -d "$OUT_DIR" ]; then - echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it first." 1>&2 - exit 1 -fi - -mkdir -m 700 -p "$TRACE_DIR" || exit 1 - -if [ ! "$STDIN_FILE" = "" ]; then - rm -f "$STDIN_FILE" || exit 1 - touch "$STDIN_FILE" || exit 1 -fi - -if [ "$AFL_PATH" = "" ]; then - SHOWMAP="${0%/afl-cmin}/afl-showmap" -else - SHOWMAP="$AFL_PATH/afl-showmap" -fi - -if [ ! -x "$SHOWMAP" ]; then - echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2 - rm -rf "$TRACE_DIR" - exit 1 -fi - -IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`)) - -if [ "$IN_COUNT" = "0" ]; then - echo "[+] Hmm, no inputs in the target directory. Nothing to be done." - rm -rf "$TRACE_DIR" - exit 1 -fi - -FIRST_FILE=`ls "$IN_DIR" | head -1` - -# Make sure that we're not dealing with a directory. - -if [ -d "$IN_DIR/$FIRST_FILE" ]; then - echo "[-] Error: The target directory contains subdirectories - please fix." 1>&2 - rm -rf "$TRACE_DIR" - exit 1 -fi - -# Check for the more efficient way to copy files... - -if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then - CP_TOOL=ln -else - CP_TOOL=cp -fi - -# Make sure that we can actually get anything out of afl-showmap before we -# waste too much time. - -echo "[*] Testing the target binary..." - -if [ "$STDIN_FILE" = "" ]; then - - AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE" - -else - - cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE" - AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" &2 - test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" - exit 1 - -fi - -# Let's roll! - -############################# -# STEP 1: COLLECTING TRACES # -############################# - -echo "[*] Obtaining traces for input files in '$IN_DIR'..." - -( - - CUR=0 - - if [ "$STDIN_FILE" = "" ]; then - - ls "$IN_DIR" | while read -r fn; do - - CUR=$((CUR+1)) - printf "\\r Processing file $CUR/$IN_COUNT... " - - "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn" - - done - - else - - ls "$IN_DIR" | while read -r fn; do - - CUR=$((CUR+1)) - printf "\\r Processing file $CUR/$IN_COUNT... " - - cp "$IN_DIR/$fn" "$STDIN_FILE" - - "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" "$TRACE_DIR/.all_uniq" - -TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`)) - -echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files." - -##################################### -# STEP 3: SELECTING CANDIDATE FILES # -##################################### - -# The next step is to find the best candidate for each tuple. The "best" -# part is understood simply as the smallest input that includes a particular -# tuple in its trace. Empirical evidence suggests that this produces smaller -# datasets than more involved algorithms that could be still pulled off in -# a shell script. - -echo "[*] Finding best candidates for each tuple..." - -CUR=0 - -ls -rS "$IN_DIR" | while read -r fn; do - - CUR=$((CUR+1)) - printf "\\r Processing file $CUR/$IN_COUNT... " - - sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list" - -done - -echo - -############################## -# STEP 4: LOADING CANDIDATES # -############################## - -# At this point, we have a file of tuple-file pairs, sorted by file size -# in ascending order (as a consequence of ls -rS). By doing sort keyed -# only by tuple (-k 1,1) and configured to output only the first line for -# every key (-s -u), we end up with the smallest file for each tuple. - -echo "[*] Sorting candidate list (be patient)..." - -sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \ - sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script" - -if [ ! -s "$TRACE_DIR/.candidate_script" ]; then - echo "[-] Error: no traces obtained from test cases, check syntax!" 1>&2 - test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" - exit 1 -fi - -# The sed command converted the sorted list to a shell script that populates -# BEST_FILE[tuple]="fname". Let's load that! - -. "$TRACE_DIR/.candidate_script" - -########################## -# STEP 5: WRITING OUTPUT # -########################## - -# The final trick is to grab the top pick for each tuple, unless said tuple is -# already set due to the inclusion of an earlier candidate; and then put all -# tuples associated with the newly-added file to the "already have" list. The -# loop works from least popular tuples and toward the most common ones. - -echo "[*] Processing candidates and writing output files..." - -CUR=0 - -touch "$TRACE_DIR/.already_have" - -while read -r cnt tuple; do - - CUR=$((CUR+1)) - printf "\\r Processing tuple $CUR/$TUPLE_COUNT... " - - # If we already have this tuple, skip it. - - grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue - - FN=${BEST_FILE[tuple]} - - $CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN" - - if [ "$((CUR % 5))" = "0" ]; then - sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp" - mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have" - else - cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have" - fi - -done <"$TRACE_DIR/.all_uniq" - -echo - -OUT_COUNT=`ls -- "$OUT_DIR" | wc -l` - -if [ "$OUT_COUNT" = "1" ]; then - echo "[!] WARNING: All test cases had the same traces, check syntax!" -fi - -echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'." -echo - -test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" - -exit 0 +#!/usr/bin/env sh +THISPATH=`dirname ${0}` +export PATH=${THISPATH}:$PATH +awk -f ${0}.awk -- ${@+"$@"} diff --git a/afl-cmin.bash b/afl-cmin.bash new file mode 100755 index 00000000..1dd782d8 --- /dev/null +++ b/afl-cmin.bash @@ -0,0 +1,470 @@ +#!/usr/bin/env bash +# +# american fuzzy lop++ - corpus minimization tool +# --------------------------------------------- +# +# Originally written by Michal Zalewski +# +# Copyright 2014, 2015 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# This tool tries to find the smallest subset of files in the input directory +# that still trigger the full range of instrumentation data points seen in +# the starting corpus. This has two uses: +# +# - Screening large corpora of input files before using them as a seed for +# afl-fuzz. The tool will remove functionally redundant files and likely +# leave you with a much smaller set. +# +# (In this case, you probably also want to consider running afl-tmin on +# the individual files later on to reduce their size.) +# +# - Minimizing the corpus generated organically by afl-fuzz, perhaps when +# planning to feed it to more resource-intensive tools. The tool achieves +# this by removing all entries that used to trigger unique behaviors in the +# past, but have been made obsolete by later finds. +# +# Note that the tool doesn't modify the files themselves. For that, you want +# afl-tmin. +# +# This script must use bash because other shells may have hardcoded limits on +# array sizes. +# + +echo "corpus minimization tool for afl-fuzz by Michal Zalewski" +echo + +######### +# SETUP # +######### + +# Process command-line options... + +MEM_LIMIT=200 +TIMEOUT=none + +unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \ + AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE UNICORN_MODE + +while getopts "+i:o:f:m:t:eQUCh" opt; do + + case "$opt" in + + "h") + ;; + + "i") + IN_DIR="$OPTARG" + ;; + + "o") + OUT_DIR="$OPTARG" + ;; + "f") + STDIN_FILE="$OPTARG" + ;; + "m") + MEM_LIMIT="$OPTARG" + MEM_LIMIT_GIVEN=1 + ;; + "t") + TIMEOUT="$OPTARG" + ;; + "e") + EXTRA_PAR="$EXTRA_PAR -e" + ;; + "C") + export AFL_CMIN_CRASHES_ONLY=1 + ;; + "Q") + EXTRA_PAR="$EXTRA_PAR -Q" + test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250 + QEMU_MODE=1 + ;; + "U") + EXTRA_PAR="$EXTRA_PAR -U" + test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250 + UNICORN_MODE=1 + ;; + "?") + exit 1 + ;; + + esac + +done + +shift $((OPTIND-1)) + +TARGET_BIN="$1" + +if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then + + cat 1>&2 <<_EOF_ +Usage: $0 [ options ] -- /path/to/target_app [ ... ] + +Required parameters: + + -i dir - input directory with the starting corpus + -o dir - output directory for minimized files + +Execution control settings: + + -f file - location read by the fuzzed program (stdin) + -m megs - memory limit for child process ($MEM_LIMIT MB) + -t msec - run time limit for child process (none) + -Q - use binary-only instrumentation (QEMU mode) + -U - use unicorn-based instrumentation (Unicorn mode) + +Minimization settings: + + -C - keep crashing inputs, reject everything else + -e - solve for edge coverage only, ignore hit counts + +For additional tips, please consult docs/README. + +_EOF_ + exit 1 +fi + +# Do a sanity check to discourage the use of /tmp, since we can't really +# handle this safely from a shell script. + +if [ "$AFL_ALLOW_TMP" = "" ]; then + + echo "$IN_DIR" | grep -qE '^(/var)?/tmp/' + T1="$?" + + echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/' + T2="$?" + + echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/' + T3="$?" + + echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/' + T4="$?" + + echo "$PWD" | grep -qE '^(/var)?/tmp/' + T5="$?" + + if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then + echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2 + exit 1 + fi + +fi + +# If @@ is specified, but there's no -f, let's come up with a temporary input +# file name. + +TRACE_DIR="$OUT_DIR/.traces" + +if [ "$STDIN_FILE" = "" ]; then + + if echo "$*" | grep -qF '@@'; then + STDIN_FILE="$TRACE_DIR/.cur_input" + fi + +fi + +# Check for obvious errors. + +if [ ! "$MEM_LIMIT" = "none" ]; then + + if [ "$MEM_LIMIT" -lt "5" ]; then + echo "[-] Error: dangerously low memory limit." 1>&2 + exit 1 + fi + +fi + +if [ ! "$TIMEOUT" = "none" ]; then + + if [ "$TIMEOUT" -lt "10" ]; then + echo "[-] Error: dangerously low timeout." 1>&2 + exit 1 + fi + +fi + +if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then + + TNEW="`which "$TARGET_BIN" 2>/dev/null`" + + if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then + echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2 + exit 1 + fi + + TARGET_BIN="$TNEW" + +fi + +if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" -a "$UNICORN_MODE" = "" ]; then + + if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then + echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2 + exit 1 + fi + +fi + +if [ ! -d "$IN_DIR" ]; then + echo "[-] Error: directory '$IN_DIR' not found." 1>&2 + exit 1 +fi + +test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue" + +find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null +rm -rf "$TRACE_DIR" 2>/dev/null + +rmdir "$OUT_DIR" 2>/dev/null + +if [ -d "$OUT_DIR" ]; then + echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it first." 1>&2 + exit 1 +fi + +mkdir -m 700 -p "$TRACE_DIR" || exit 1 + +if [ ! "$STDIN_FILE" = "" ]; then + rm -f "$STDIN_FILE" || exit 1 + touch "$STDIN_FILE" || exit 1 +fi + +if [ "$AFL_PATH" = "" ]; then + SHOWMAP="${0%/afl-cmin}/afl-showmap" +else + SHOWMAP="$AFL_PATH/afl-showmap" +fi + +if [ ! -x "$SHOWMAP" ]; then + echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2 + rm -rf "$TRACE_DIR" + exit 1 +fi + +IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`)) + +if [ "$IN_COUNT" = "0" ]; then + echo "[+] Hmm, no inputs in the target directory. Nothing to be done." + rm -rf "$TRACE_DIR" + exit 1 +fi + +FIRST_FILE=`ls "$IN_DIR" | head -1` + +# Make sure that we're not dealing with a directory. + +if [ -d "$IN_DIR/$FIRST_FILE" ]; then + echo "[-] Error: The target directory contains subdirectories - please fix." 1>&2 + rm -rf "$TRACE_DIR" + exit 1 +fi + +# Check for the more efficient way to copy files... + +if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then + CP_TOOL=ln +else + CP_TOOL=cp +fi + +# Make sure that we can actually get anything out of afl-showmap before we +# waste too much time. + +echo "[*] Testing the target binary..." + +if [ "$STDIN_FILE" = "" ]; then + + AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE" + +else + + cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE" + AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" &2 + test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" + exit 1 + +fi + +# Let's roll! + +############################# +# STEP 1: COLLECTING TRACES # +############################# + +echo "[*] Obtaining traces for input files in '$IN_DIR'..." + +( + + CUR=0 + + if [ "$STDIN_FILE" = "" ]; then + + ls "$IN_DIR" | while read -r fn; do + + CUR=$((CUR+1)) + printf "\\r Processing file $CUR/$IN_COUNT... " + + "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn" + + done + + else + + ls "$IN_DIR" | while read -r fn; do + + CUR=$((CUR+1)) + printf "\\r Processing file $CUR/$IN_COUNT... " + + cp "$IN_DIR/$fn" "$STDIN_FILE" + + "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" "$TRACE_DIR/.all_uniq" + +TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`)) + +echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files." + +##################################### +# STEP 3: SELECTING CANDIDATE FILES # +##################################### + +# The next step is to find the best candidate for each tuple. The "best" +# part is understood simply as the smallest input that includes a particular +# tuple in its trace. Empirical evidence suggests that this produces smaller +# datasets than more involved algorithms that could be still pulled off in +# a shell script. + +echo "[*] Finding best candidates for each tuple..." + +CUR=0 + +ls -rS "$IN_DIR" | while read -r fn; do + + CUR=$((CUR+1)) + printf "\\r Processing file $CUR/$IN_COUNT... " + + sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list" + +done + +echo + +############################## +# STEP 4: LOADING CANDIDATES # +############################## + +# At this point, we have a file of tuple-file pairs, sorted by file size +# in ascending order (as a consequence of ls -rS). By doing sort keyed +# only by tuple (-k 1,1) and configured to output only the first line for +# every key (-s -u), we end up with the smallest file for each tuple. + +echo "[*] Sorting candidate list (be patient)..." + +sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \ + sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script" + +if [ ! -s "$TRACE_DIR/.candidate_script" ]; then + echo "[-] Error: no traces obtained from test cases, check syntax!" 1>&2 + test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" + exit 1 +fi + +# The sed command converted the sorted list to a shell script that populates +# BEST_FILE[tuple]="fname". Let's load that! + +. "$TRACE_DIR/.candidate_script" + +########################## +# STEP 5: WRITING OUTPUT # +########################## + +# The final trick is to grab the top pick for each tuple, unless said tuple is +# already set due to the inclusion of an earlier candidate; and then put all +# tuples associated with the newly-added file to the "already have" list. The +# loop works from least popular tuples and toward the most common ones. + +echo "[*] Processing candidates and writing output files..." + +CUR=0 + +touch "$TRACE_DIR/.already_have" + +while read -r cnt tuple; do + + CUR=$((CUR+1)) + printf "\\r Processing tuple $CUR/$TUPLE_COUNT... " + + # If we already have this tuple, skip it. + + grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue + + FN=${BEST_FILE[tuple]} + + $CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN" + + if [ "$((CUR % 5))" = "0" ]; then + sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp" + mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have" + else + cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have" + fi + +done <"$TRACE_DIR/.all_uniq" + +echo + +OUT_COUNT=`ls -- "$OUT_DIR" | wc -l` + +if [ "$OUT_COUNT" = "1" ]; then + echo "[!] WARNING: All test cases had the same traces, check syntax!" +fi + +echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'." +echo + +test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" + +exit 0 -- cgit 1.4.1 From 9da167dffdc14468d17ac3c1c942e483baf17433 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Wed, 22 Jan 2020 21:08:47 +0100 Subject: fix for modern linux --- afl-cmin.awk | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/afl-cmin.awk b/afl-cmin.awk index 28c460e8..fcdfb71f 100755 --- a/afl-cmin.awk +++ b/afl-cmin.awk @@ -109,7 +109,7 @@ function exists_and_is_executable(binarypath) { } BEGIN { - print "corpus minimization tool for afl-fuzz++ (awk version)\n" + print "corpus minimization tool for afl++ (awk version)\n" # defaults extra_par = "" @@ -289,7 +289,7 @@ BEGIN { } else { "which afl-showmap 2>/dev/null" | getline path } - showmap = path + showmap = path "/afl-showmap" } else { showmap = ENVIRON["AFL_PATH"] "/afl-showmap" } @@ -303,11 +303,12 @@ BEGIN { i = 0 # yuck, gnu stat is incompatible to bsd stat if ("stat --version 2>/dev/null" !~ /GNU coreutils/) { - stat_format = "-f '%z %N'" - } else { + # I dont get it why this does not work, output is "stat (GNU coreutils) 8.30" and still it goes here ... stat_format = "-c '%s %n'" + } else { + stat_format = "-f '%z %N'" } - while ("cd "in_dir" && find . -type f -exec stat "stat_format" \{\} \\; | sort -n | cut -d' ' -f2-" | getline) { + while ("cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" | getline) { infilesSmallToBig[i++] = $0 } in_count = i -- cgit 1.4.1 From c51f89b58e56338a5a430344548d1385432d173e Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Wed, 22 Jan 2020 21:50:35 +0100 Subject: rectification of vanhauser's fix, made it a bit more robust, enabled error output for travis debugging --- afl-cmin.awk | 40 ++++++++++++++++++++++++++++++++-------- test/test.sh | 2 +- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/afl-cmin.awk b/afl-cmin.awk index fcdfb71f..967c4e87 100755 --- a/afl-cmin.awk +++ b/afl-cmin.awk @@ -1,5 +1,28 @@ #!/usr/bin/awk -f +# awk script to minimize a test corpus of input files +# +# based on afl-cmin bash script written by Michal Zalewski +# rewritten by Heiko Eißfeldt (hexcoder-) +# +# uses getopt.awk package from Arnold Robbins +# +# external tools used by this script: +# test +# grep +# rm +# mkdir +# ln +# cp +# pwd +# which +# cd +# find +# stat +# sort +# cut +# and afl-showmap from this project :-) + # getopt.awk --- Do C library getopt(3) function in awk # External variables: @@ -285,11 +308,10 @@ BEGIN { if (!ENVIRON["AFL_PATH"]) { if (0 == system("test -f afl-cmin.awk")) { - path = "." + showmap = "./afl-showmap" } else { - "which afl-showmap 2>/dev/null" | getline path + "which afl-showmap 2>/dev/null" | getline showmap } - showmap = path "/afl-showmap" } else { showmap = ENVIRON["AFL_PATH"] "/afl-showmap" } @@ -301,12 +323,14 @@ BEGIN { # get list of input filenames sorted by size i = 0 - # yuck, gnu stat is incompatible to bsd stat - if ("stat --version 2>/dev/null" !~ /GNU coreutils/) { - # I dont get it why this does not work, output is "stat (GNU coreutils) 8.30" and still it goes here ... - stat_format = "-c '%s %n'" + # yuck, gnu stat is option incompatible to bsd stat + # we use a heuristic to differentiate between + # GNU stat and other stats + "stat --version 2>/dev/null" | getline statversion + if (statversion ~ /GNU coreutils/) { + stat_format = "-c '%s %n'" # GNU } else { - stat_format = "-f '%z %N'" + stat_format = "-f '%z %N'" # *BSD, MacOS } while ("cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" | getline) { infilesSmallToBig[i++] = $0 diff --git a/test/test.sh b/test/test.sh index 0ae6fd09..cc7fe224 100755 --- a/test/test.sh +++ b/test/test.sh @@ -150,7 +150,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { } echo 000000000000000000000000 > in/in2 mkdir -p in2 - ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null 2>&1 + ../afl-cmin -i in -o in2 -- ./test-instr.plain CNT=`ls in2/ | wc -l` case "$CNT" in 1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized testcase numbers" ;; -- cgit 1.4.1 From 7e7ab8f5415409fd1bb643f4dfef44c5a3935006 Mon Sep 17 00:00:00 2001 From: hexcoder Date: Wed, 22 Jan 2020 22:24:00 +0100 Subject: Update binaryonly_fuzzing.txt --- docs/binaryonly_fuzzing.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/binaryonly_fuzzing.txt b/docs/binaryonly_fuzzing.txt index 239fb4b0..f8d68cd8 100644 --- a/docs/binaryonly_fuzzing.txt +++ b/docs/binaryonly_fuzzing.txt @@ -5,10 +5,10 @@ Fuzzing binary-only programs with afl++ afl++, libfuzzer and others are great if you have the source code, and it allows for very fast and coverage guided fuzzing. -However, if there is only the binary program and not source code available, -then standard afl++ (dumb mode) is not effective. +However, if there is only the binary program and no source code available, +then standard `afl-fuzz -n` (dumb mode) is not effective. -The following is a description of how these can be fuzzed with afl++ +The following is a description of how these binaries can be fuzzed with afl++ !!!!! TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then @@ -28,7 +28,7 @@ As it is included in afl++ this needs no URL. WINE+QEMU --------- -Wine mode can run Win32 PE with the QEMU instrumentation. +Wine mode can run Win32 PE binaries with the QEMU instrumentation. It needs Wine, python3 and the pefile python package installed. UNICORN @@ -37,7 +37,7 @@ Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar. In contrast to QEMU, Unicorn does not offer a full system or even userland emulation. Runtime environment and/or loaders have to be written from scratch, if needed. On top, block chaining has been removed. This means the speed boost introduced in -to the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn. +the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn. For further information, check out ./unicorn_mode.txt. -- cgit 1.4.1 From c490b9aa3694ba9c33ba0657ddd5e19dd979f2ed Mon Sep 17 00:00:00 2001 From: hexcoder Date: Thu, 23 Jan 2020 09:11:35 +0100 Subject: afl-cmin debugging is done now, so suppress stdout messages again (but not stderr) --- test/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test.sh b/test/test.sh index cc7fe224..3473155f 100755 --- a/test/test.sh +++ b/test/test.sh @@ -150,7 +150,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { } echo 000000000000000000000000 > in/in2 mkdir -p in2 - ../afl-cmin -i in -o in2 -- ./test-instr.plain + ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null CNT=`ls in2/ | wc -l` case "$CNT" in 1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized testcase numbers" ;; -- cgit 1.4.1 From 8b17cac71c5196bae11a5a7ee8f6a17bdb3917e0 Mon Sep 17 00:00:00 2001 From: hexcoder Date: Thu, 23 Jan 2020 09:46:07 +0100 Subject: add socket_fuzz description --- experimental/README.experiments | 3 +++ 1 file changed, 3 insertions(+) diff --git a/experimental/README.experiments b/experimental/README.experiments index af9739bd..5a505ad7 100644 --- a/experimental/README.experiments +++ b/experimental/README.experiments @@ -28,6 +28,9 @@ Here's a quick overview of the stuff you can find in this directory: mode to speed up certain fuzzing jobs. - post_library - an example of how to build postprocessors for AFL. + + - socketfuzz - a LD_PRELOAD library 'redirects' a socket to stdin + for fuzzing access with afl++ Note that the minimize_corpus.sh tool has graduated from the experimental/ directory and is now available as ../afl-cmin. The LLVM mode has likewise -- cgit 1.4.1 From a58800b90122f3d612a0badb243d2c1b6fc9c742 Mon Sep 17 00:00:00 2001 From: hexcoder Date: Thu, 23 Jan 2020 09:46:59 +0100 Subject: typo --- experimental/README.experiments | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/README.experiments b/experimental/README.experiments index 5a505ad7..543c078c 100644 --- a/experimental/README.experiments +++ b/experimental/README.experiments @@ -29,7 +29,7 @@ Here's a quick overview of the stuff you can find in this directory: - post_library - an example of how to build postprocessors for AFL. - - socketfuzz - a LD_PRELOAD library 'redirects' a socket to stdin + - socket_fuzzing - a LD_PRELOAD library 'redirects' a socket to stdin for fuzzing access with afl++ Note that the minimize_corpus.sh tool has graduated from the experimental/ -- cgit 1.4.1 From e7c95ebf5a4828b662252b10052a89923dd25030 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Thu, 23 Jan 2020 10:15:33 +0100 Subject: afl-cmin final touches --- Makefile | 2 +- afl-cmin | 473 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- afl-cmin.awk | 470 -------------------------------------------------------- docs/ChangeLog | 2 + test/test.sh | 2 +- 5 files changed, 476 insertions(+), 473 deletions(-) delete mode 100755 afl-cmin.awk diff --git a/Makefile b/Makefile index 7260ee47..459cae5f 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ VERSION = $(shell grep '^\#define VERSION ' ../config.h | cut -d '"' -f2) # PROGS intentionally omit afl-as, which gets installed elsewhere. PROGS = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze -SH_PROGS = afl-plot afl-cmin afl-whatsup afl-system-config +SH_PROGS = afl-plot afl-cmin afl-cmin.bash afl-whatsup afl-system-config MANPAGES=$(foreach p, $(PROGS) $(SH_PROGS), $(p).8) ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" diff --git a/afl-cmin b/afl-cmin index 75dc63a7..a072a62a 100755 --- a/afl-cmin +++ b/afl-cmin @@ -1,4 +1,475 @@ #!/usr/bin/env sh THISPATH=`dirname ${0}` export PATH=${THISPATH}:$PATH -awk -f ${0}.awk -- ${@+"$@"} +awk -f - -- ${@+"$@"} <<'EOF' +#!/usr/bin/awk -f + +# awk script to minimize a test corpus of input files +# +# based on afl-cmin bash script written by Michal Zalewski +# rewritten by Heiko Eißfeldt (hexcoder-) +# +# uses getopt.awk package from Arnold Robbins +# +# external tools used by this script: +# test +# grep +# rm +# mkdir +# ln +# cp +# pwd +# which +# cd +# find +# stat +# sort +# cut +# and afl-showmap from this project :-) + +# getopt.awk --- Do C library getopt(3) function in awk + +# External variables: +# Optind -- index in ARGV of first nonoption argument +# Optarg -- string value of argument to current option +# Opterr -- if nonzero, print our own diagnostic +# Optopt -- current option letter + +# Returns: +# -1 at end of options +# "?" for unrecognized option +# a character representing the current option + +# Private Data: +# _opti -- index in multiflag option, e.g., -abc + +function getopt(argc, argv, options, thisopt, i) +{ + if (length(options) == 0) # no options given + return -1 + + if (argv[Optind] == "--") { # all done + Optind++ + _opti = 0 + return -1 + } else if (argv[Optind] !~ /^-[^:[:space:]]/) { + _opti = 0 + return -1 + } + if (_opti == 0) + _opti = 2 + thisopt = substr(argv[Optind], _opti, 1) + Optopt = thisopt + i = index(options, thisopt) + if (i == 0) { + if (Opterr) + printf("%c -- invalid option\n", thisopt) > "/dev/stderr" + if (_opti >= length(argv[Optind])) { + Optind++ + _opti = 0 + } else + _opti++ + return "?" + } + if (substr(options, i + 1, 1) == ":") { + # get option argument + if (length(substr(argv[Optind], _opti + 1)) > 0) + Optarg = substr(argv[Optind], _opti + 1) + else + Optarg = argv[++Optind] + _opti = 0 + } else + Optarg = "" + if (_opti == 0 || _opti >= length(argv[Optind])) { + Optind++ + _opti = 0 + } else + _opti++ + return thisopt +} + +BEGIN { + Opterr = 1 # default is to diagnose + Optind = 1 # skip ARGV[0] + + # test program + if (_getopt_test) { + while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) + printf("c = <%c>, Optarg = <%s>\n", + _go_c, Optarg) + printf("non-option arguments:\n") + for (; Optind < ARGC; Optind++) + printf("\tARGV[%d] = <%s>\n", + Optind, ARGV[Optind]) + } +} + +function usage() { + print \ +"Usage: afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \ +"\n" \ +"Required parameters:\n" \ +"\n" \ +" -i dir - input directory with starting corpus\n" \ +" -o dir - output directory for minimized files\n" \ +"\n" \ +"Execution control settings:\n" \ +"\n" \ +" -f file - location read by the fuzzed program (stdin)\n" \ +" -m megs - memory limit for child process ("mem_limit" MB)\n" \ +" -t msec - run time limit for child process (none)\n" \ +" -Q - use binary-only instrumentation (QEMU mode)\n" \ +" -U - use unicorn-based instrumentation (unicorn mode)\n" \ +"\n" \ +"Minimization settings:\n" \ +" -C - keep crashing inputs, reject everything else\n" \ +" -e - solve for edge coverage only, ignore hit counts\n" \ +"\n" \ +"For additional tips, please consult docs/README.md\n" \ +"\n" \ + > "/dev/stderr" + exit 1 +} + +function exists_and_is_executable(binarypath) { + return 0 == system("test -f "binarypath" -a -x "binarypath) +} + +BEGIN { + print "corpus minimization tool for afl++ (awk version)\n" + + # defaults + extra_par = "" + # process options + Opterr = 1 # default is to diagnose + Optind = 1 # skip ARGV[0] + while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eCQU?")) != -1) { + if (_go_c == "i") { + if (!Optarg) usage() + if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + in_dir = Optarg + continue + } else + if (_go_c == "o") { + if (!Optarg) usage() + if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + out_dir = Optarg + continue + } else + if (_go_c == "f") { + if (!Optarg) usage() + if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + stdin_file = Optarg + continue + } else + if (_go_c == "m") { + if (!Optarg) usage() + if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + mem_limit = Optarg + mem_limit_given = 1 + continue + } else + if (_go_c == "t") { + if (!Optarg) usage() + if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + timeout = Optarg + continue + } else + if (_go_c == "C") { + ENVIRON["AFL_CMIN_CRASHES_ONLY"] = 1 + continue + } else + if (_go_c == "e") { + extra_par = extra_par " -e" + continue + } else + if (_go_c == "Q") { + if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + extra_par = extra_par " -Q" + if ( !mem_limit_given ) mem_limit = "250" + qemu_mode = 1 + continue + } else + if (_go_c == "U") { + if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + extra_par = extra_par " -U" + if ( !mem_limit_given ) mem_limit = "250" + unicorn_mode = 1 + continue + } else + if (_go_c == "?") { + exit 1 + } else + usage() + } # while options + + if (!mem_limit) mem_limit = 200 + if (!timeout) timeout = "none" + + # get program args + i = 0 + prog_args_string = "" + for (; Optind < ARGC; Optind++) { + prog_args[i++] = ARGV[Optind] + if (i > 1) + prog_args_string = prog_args_string" "ARGV[Optind] + } + + # sanity checks + if (!prog_args[0] || !in_dir || !out_dir) usage() + + target_bin = prog_args[0] + + # Do a sanity check to discourage the use of /tmp, since we can't really + # handle this safely from an awk script. + + if (!ENVIRON["AFL_ALLOW_TMP"]) { + dirlist[0] = in_dir + dirlist[1] = target_bin + dirlist[2] = out_dir + dirlist[3] = stdin_file + "pwd" | getline dirlist[4] # current directory + for (dirind in dirlist) { + dir = dirlist[dirind] + + if (dir ~ /^(\/var)?\/tmp/) { + print "[-] Error: do not use this script in /tmp or /var/tmp." > "/dev/stderr" + exit 1 + } + } + delete dirlist + } + + # If @@ is specified, but there's no -f, let's come up with a temporary input + # file name. + + trace_dir = out_dir "/.traces" + + if (!stdin_file) { + found_atat = 0 + for (prog_args_ind in prog_args) { + if ("@@" == prog_args[prog_args_ind]) { + found_atat = 1 + break + } + } + if (found_atat) { + stdin_file = trace_dir "/.cur_input" + } + } + + # Check for obvious errors. + + if (mem_limit && mem_limit != "none" && mem_limit < 5) { + print "[-] Error: dangerously low memory limit." > "/dev/stderr" + exit 1 + } + + if (timeout && timeout != "none" && timeout < 10) { + print "[-] Error: dangerously low timeout." > "/dev/stderr" + exit 1 + } + + if (target_bin && !exists_and_is_executable(target_bin)) { + + "which "target_bin" 2>/dev/null" | getline tnew + if (!tnew || !exists_and_is_executable(tnew)) { + print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr" + exit 1 + } + target_bin = tnew + } + + if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !unicorn_mode) { + if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) { + print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr" + exit 1 + } + } + + if (0 != system( "test -d "in_dir )) { + print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr" + exit 1 + } + + if (0 == system( "test -d "in_dir"/queue" )) { + in_dir = in_dir "/queue" + } + + system("rm -rf "trace_dir" 2>/dev/null"); + system("rm "out_dir"/id[:_]* 2>/dev/null") + + if (0 == system( "test -d "out_dir" -a -e "out_dir"/*" )) { + print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr" + exit 1 + } + + if (stdin_file) { + # truncate input file + printf "" > stdin_file + close( stdin_file ) + } + + if (!ENVIRON["AFL_PATH"]) { + if (0 == system("test -f afl-cmin.awk")) { + showmap = "./afl-showmap" + } else { + "which afl-showmap 2>/dev/null" | getline showmap + } + } else { + showmap = ENVIRON["AFL_PATH"] "/afl-showmap" + } + + if (!showmap || 0 != system("test -x "showmap )) { + print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr" + exit 1 + } + + # get list of input filenames sorted by size + i = 0 + # yuck, gnu stat is option incompatible to bsd stat + # we use a heuristic to differentiate between + # GNU stat and other stats + "stat --version 2>/dev/null" | getline statversion + if (statversion ~ /GNU coreutils/) { + stat_format = "-c '%s %n'" # GNU + } else { + stat_format = "-f '%z %N'" # *BSD, MacOS + } + while ("cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" | getline) { + infilesSmallToBig[i++] = $0 + } + in_count = i + + first_file = infilesSmallToBig[0] + + # Make sure that we're not dealing with a directory. + + if (0 == system("test -d "in_dir"/"first_file)) { + print "[-] Error: The input directory contains subdirectories - please fix." > "/dev/stderr" + exit 1 + } + + # Check for the more efficient way to copy files... + if (0 != system("mkdir -p -m 0700 "trace_dir)) { + print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" + exit 1 + } + + if (0 == system("ln "in_dir"/"first_file" "trace_dir"/.link_test")) { + cp_tool = "ln" + } else { + cp_tool = "cp" + } + + # Make sure that we can actually get anything out of afl-showmap before we + # waste too much time. + + print "[*] Testing the target binary..." + + if (!stdin_file) { + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"") + } else { + system("cp "in_dir"/"first_file" "stdin_file) + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" 0) { + ++first_count + } + + if (first_count) { + print "[+] OK, "first_count" tuples recorded." + } else { + print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr" + if (!ENVIRON["AFL_KEEP_TRACES"]) { + system("rm -rf "trace_dir" 2>/dev/null") + } + exit 1 + } + + # Let's roll! + + ############################# + # STEP 1: Collecting traces # + ############################# + + print "[*] Obtaining traces for "in_count" input files in '"in_dir"'." + + cur = 0; + if (!stdin_file) { + while (cur < in_count) { + fn = infilesSmallToBig[cur] + ++cur; + printf "\r Processing file "cur"/"in_count + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"fn"\"") + } + } else { + while (cur < in_count) { + fn = infilesSmallToBig[cur] + ++cur + printf "\r Processing file "cur"/"in_count + system("cp "in_dir"/"fn" "stdin_file) + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" 0) { + key = line + if (!(key in key_count)) { + ++tuple_count + } + ++key_count[key] + if (! (key in best_file)) { + # this is the best file for this key + best_file[key] = fn + # copy file unless already done + if (! (fn in file_already_copied)) { + system(cp_tool" "in_dir"/"fn" "out_dir"/"fn) + file_already_copied[fn] = "" + ++out_count + } + } + } + close(tracefile_path) + } + + print "" + print "[+] Found "tuple_count" unique tuples across "in_count" files." + + if (out_count == 1) { + print "[!] WARNING: All test cases had the same traces, check syntax!" + } + print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'." + + if (!ENVIRON["AFL_KEEP_TRACES"]) { + system("rm -rf "trace_dir" 2>/dev/null") + } + + exit 0 +} +EOF diff --git a/afl-cmin.awk b/afl-cmin.awk deleted file mode 100755 index 967c4e87..00000000 --- a/afl-cmin.awk +++ /dev/null @@ -1,470 +0,0 @@ -#!/usr/bin/awk -f - -# awk script to minimize a test corpus of input files -# -# based on afl-cmin bash script written by Michal Zalewski -# rewritten by Heiko Eißfeldt (hexcoder-) -# -# uses getopt.awk package from Arnold Robbins -# -# external tools used by this script: -# test -# grep -# rm -# mkdir -# ln -# cp -# pwd -# which -# cd -# find -# stat -# sort -# cut -# and afl-showmap from this project :-) - -# getopt.awk --- Do C library getopt(3) function in awk - -# External variables: -# Optind -- index in ARGV of first nonoption argument -# Optarg -- string value of argument to current option -# Opterr -- if nonzero, print our own diagnostic -# Optopt -- current option letter - -# Returns: -# -1 at end of options -# "?" for unrecognized option -# a character representing the current option - -# Private Data: -# _opti -- index in multiflag option, e.g., -abc - -function getopt(argc, argv, options, thisopt, i) -{ - if (length(options) == 0) # no options given - return -1 - - if (argv[Optind] == "--") { # all done - Optind++ - _opti = 0 - return -1 - } else if (argv[Optind] !~ /^-[^:[:space:]]/) { - _opti = 0 - return -1 - } - if (_opti == 0) - _opti = 2 - thisopt = substr(argv[Optind], _opti, 1) - Optopt = thisopt - i = index(options, thisopt) - if (i == 0) { - if (Opterr) - printf("%c -- invalid option\n", thisopt) > "/dev/stderr" - if (_opti >= length(argv[Optind])) { - Optind++ - _opti = 0 - } else - _opti++ - return "?" - } - if (substr(options, i + 1, 1) == ":") { - # get option argument - if (length(substr(argv[Optind], _opti + 1)) > 0) - Optarg = substr(argv[Optind], _opti + 1) - else - Optarg = argv[++Optind] - _opti = 0 - } else - Optarg = "" - if (_opti == 0 || _opti >= length(argv[Optind])) { - Optind++ - _opti = 0 - } else - _opti++ - return thisopt -} - -BEGIN { - Opterr = 1 # default is to diagnose - Optind = 1 # skip ARGV[0] - - # test program - if (_getopt_test) { - while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, Optarg = <%s>\n", - _go_c, Optarg) - printf("non-option arguments:\n") - for (; Optind < ARGC; Optind++) - printf("\tARGV[%d] = <%s>\n", - Optind, ARGV[Optind]) - } -} - -function usage() { - print \ -"Usage: afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \ -"\n" \ -"Required parameters:\n" \ -"\n" \ -" -i dir - input directory with starting corpus\n" \ -" -o dir - output directory for minimized files\n" \ -"\n" \ -"Execution control settings:\n" \ -"\n" \ -" -f file - location read by the fuzzed program (stdin)\n" \ -" -m megs - memory limit for child process ("mem_limit" MB)\n" \ -" -t msec - run time limit for child process (none)\n" \ -" -Q - use binary-only instrumentation (QEMU mode)\n" \ -" -U - use unicorn-based instrumentation (unicorn mode)\n" \ -"\n" \ -"Minimization settings:\n" \ -" -C - keep crashing inputs, reject everything else\n" \ -" -e - solve for edge coverage only, ignore hit counts\n" \ -"\n" \ -"For additional tips, please consult docs/README.md\n" \ -"\n" \ - > "/dev/stderr" - exit 1 -} - -function exists_and_is_executable(binarypath) { - return 0 == system("test -f "binarypath" -a -x "binarypath) -} - -BEGIN { - print "corpus minimization tool for afl++ (awk version)\n" - - # defaults - extra_par = "" - # process options - Opterr = 1 # default is to diagnose - Optind = 1 # skip ARGV[0] - while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eCQU?")) != -1) { - if (_go_c == "i") { - if (!Optarg) usage() - if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} - in_dir = Optarg - continue - } else - if (_go_c == "o") { - if (!Optarg) usage() - if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} - out_dir = Optarg - continue - } else - if (_go_c == "f") { - if (!Optarg) usage() - if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} - stdin_file = Optarg - continue - } else - if (_go_c == "m") { - if (!Optarg) usage() - if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} - mem_limit = Optarg - mem_limit_given = 1 - continue - } else - if (_go_c == "t") { - if (!Optarg) usage() - if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} - timeout = Optarg - continue - } else - if (_go_c == "C") { - ENVIRON["AFL_CMIN_CRASHES_ONLY"] = 1 - continue - } else - if (_go_c == "e") { - extra_par = extra_par " -e" - continue - } else - if (_go_c == "Q") { - if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} - extra_par = extra_par " -Q" - if ( !mem_limit_given ) mem_limit = "250" - qemu_mode = 1 - continue - } else - if (_go_c == "U") { - if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} - extra_par = extra_par " -U" - if ( !mem_limit_given ) mem_limit = "250" - unicorn_mode = 1 - continue - } else - if (_go_c == "?") { - exit 1 - } else - usage() - } # while options - - if (!mem_limit) mem_limit = 200 - if (!timeout) timeout = "none" - - # get program args - i = 0 - prog_args_string = "" - for (; Optind < ARGC; Optind++) { - prog_args[i++] = ARGV[Optind] - if (i > 1) - prog_args_string = prog_args_string" "ARGV[Optind] - } - - # sanity checks - if (!prog_args[0] || !in_dir || !out_dir) usage() - - target_bin = prog_args[0] - - # Do a sanity check to discourage the use of /tmp, since we can't really - # handle this safely from an awk script. - - if (!ENVIRON["AFL_ALLOW_TMP"]) { - dirlist[0] = in_dir - dirlist[1] = target_bin - dirlist[2] = out_dir - dirlist[3] = stdin_file - "pwd" | getline dirlist[4] # current directory - for (dirind in dirlist) { - dir = dirlist[dirind] - - if (dir ~ /^(\/var)?\/tmp/) { - print "[-] Error: do not use this script in /tmp or /var/tmp." > "/dev/stderr" - exit 1 - } - } - delete dirlist - } - - # If @@ is specified, but there's no -f, let's come up with a temporary input - # file name. - - trace_dir = out_dir "/.traces" - - if (!stdin_file) { - found_atat = 0 - for (prog_args_ind in prog_args) { - if ("@@" == prog_args[prog_args_ind]) { - found_atat = 1 - break - } - } - if (found_atat) { - stdin_file = trace_dir "/.cur_input" - } - } - - # Check for obvious errors. - - if (mem_limit && mem_limit != "none" && mem_limit < 5) { - print "[-] Error: dangerously low memory limit." > "/dev/stderr" - exit 1 - } - - if (timeout && timeout != "none" && timeout < 10) { - print "[-] Error: dangerously low timeout." > "/dev/stderr" - exit 1 - } - - if (target_bin && !exists_and_is_executable(target_bin)) { - - "which "target_bin" 2>/dev/null" | getline tnew - if (!tnew || !exists_and_is_executable(tnew)) { - print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr" - exit 1 - } - target_bin = tnew - } - - if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !unicorn_mode) { - if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) { - print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr" - exit 1 - } - } - - if (0 != system( "test -d "in_dir )) { - print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr" - exit 1 - } - - if (0 == system( "test -d "in_dir"/queue" )) { - in_dir = in_dir "/queue" - } - - system("rm -rf "trace_dir" 2>/dev/null"); - system("rm "out_dir"/id[:_]* 2>/dev/null") - - if (0 == system( "test -d "out_dir" -a -e "out_dir"/*" )) { - print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr" - exit 1 - } - - if (stdin_file) { - # truncate input file - printf "" > stdin_file - close( stdin_file ) - } - - if (!ENVIRON["AFL_PATH"]) { - if (0 == system("test -f afl-cmin.awk")) { - showmap = "./afl-showmap" - } else { - "which afl-showmap 2>/dev/null" | getline showmap - } - } else { - showmap = ENVIRON["AFL_PATH"] "/afl-showmap" - } - - if (!showmap || 0 != system("test -x "showmap )) { - print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr" - exit 1 - } - - # get list of input filenames sorted by size - i = 0 - # yuck, gnu stat is option incompatible to bsd stat - # we use a heuristic to differentiate between - # GNU stat and other stats - "stat --version 2>/dev/null" | getline statversion - if (statversion ~ /GNU coreutils/) { - stat_format = "-c '%s %n'" # GNU - } else { - stat_format = "-f '%z %N'" # *BSD, MacOS - } - while ("cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" | getline) { - infilesSmallToBig[i++] = $0 - } - in_count = i - - first_file = infilesSmallToBig[0] - - # Make sure that we're not dealing with a directory. - - if (0 == system("test -d "in_dir"/"first_file)) { - print "[-] Error: The input directory contains subdirectories - please fix." > "/dev/stderr" - exit 1 - } - - # Check for the more efficient way to copy files... - if (0 != system("mkdir -p -m 0700 "trace_dir)) { - print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" - exit 1 - } - - if (0 == system("ln "in_dir"/"first_file" "trace_dir"/.link_test")) { - cp_tool = "ln" - } else { - cp_tool = "cp" - } - - # Make sure that we can actually get anything out of afl-showmap before we - # waste too much time. - - print "[*] Testing the target binary..." - - if (!stdin_file) { - system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"") - } else { - system("cp "in_dir"/"first_file" "stdin_file) - system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" 0) { - ++first_count - } - - if (first_count) { - print "[+] OK, "first_count" tuples recorded." - } else { - print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr" - if (!ENVIRON["AFL_KEEP_TRACES"]) { - system("rm -rf "trace_dir" 2>/dev/null") - } - exit 1 - } - - # Let's roll! - - ############################# - # STEP 1: Collecting traces # - ############################# - - print "[*] Obtaining traces for "in_count" input files in '"in_dir"'." - - cur = 0; - if (!stdin_file) { - while (cur < in_count) { - fn = infilesSmallToBig[cur] - ++cur; - printf "\r Processing file "cur"/"in_count - system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"fn"\"") - } - } else { - while (cur < in_count) { - fn = infilesSmallToBig[cur] - ++cur - printf "\r Processing file "cur"/"in_count - system("cp "in_dir"/"fn" "stdin_file) - system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" 0) { - key = line - if (!(key in key_count)) { - ++tuple_count - } - ++key_count[key] - if (! (key in best_file)) { - # this is the best file for this key - best_file[key] = fn - # copy file unless already done - if (! (fn in file_already_copied)) { - system(cp_tool" "in_dir"/"fn" "out_dir"/"fn) - file_already_copied[fn] = "" - ++out_count - } - } - } - close(tracefile_path) - } - - print "" - print "[+] Found "tuple_count" unique tuples across "in_count" files." - - if (out_count == 1) { - print "[!] WARNING: All test cases had the same traces, check syntax!" - } - print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'." - - if (!ENVIRON["AFL_KEEP_TRACES"]) { - system("rm -rf "trace_dir" 2>/dev/null") - } - - exit 0 -} diff --git a/docs/ChangeLog b/docs/ChangeLog index bb3537dd..33c6f618 100644 --- a/docs/ChangeLog +++ b/docs/ChangeLog @@ -25,6 +25,8 @@ Version ++2.60d (develop): - show in the help output for which llvm version it was compiled for - now does not need to be recompiled between trace-pc and pass instrumentation. compile normally and set AFL_LLVM_USE_TRACE_PC :) + - afl-cmin is now a sh script (invoking awk) instead of bash for portability + the original script is still present as afl-cmin.bash - added blacklisted function check in all modules of llvm_mode - added fix from Debian project to compile libdislocator and libtokencap diff --git a/test/test.sh b/test/test.sh index 3473155f..0ae6fd09 100755 --- a/test/test.sh +++ b/test/test.sh @@ -150,7 +150,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { } echo 000000000000000000000000 > in/in2 mkdir -p in2 - ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null + ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null 2>&1 CNT=`ls in2/ | wc -l` case "$CNT" in 1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized testcase numbers" ;; -- cgit 1.4.1 From 436873a19abe5858e56555db02095f4eb7e6febd Mon Sep 17 00:00:00 2001 From: van Hauser Date: Thu, 23 Jan 2020 11:55:53 +0100 Subject: show stderr on afl-cmin test.sh --- test/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test.sh b/test/test.sh index 0ae6fd09..3473155f 100755 --- a/test/test.sh +++ b/test/test.sh @@ -150,7 +150,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { } echo 000000000000000000000000 > in/in2 mkdir -p in2 - ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null 2>&1 + ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null CNT=`ls in2/ | wc -l` case "$CNT" in 1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized testcase numbers" ;; -- cgit 1.4.1 From 6abe33030396c8f15f00b4fe3d083f3841de3212 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Fri, 24 Jan 2020 20:58:15 +0100 Subject: afl-cmin more awk portability (mawk), add afl-cmin/afl-tmin tests for non-x86 platforms --- afl-cmin | 27 ++++++++------------------- test/test.sh | 26 +++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/afl-cmin b/afl-cmin index a072a62a..de5a66ed 100755 --- a/afl-cmin +++ b/afl-cmin @@ -8,6 +8,10 @@ awk -f - -- ${@+"$@"} <<'EOF' # # based on afl-cmin bash script written by Michal Zalewski # rewritten by Heiko Eißfeldt (hexcoder-) +# tested with: +# gnu awk (x86 Linux) +# bsd awk (x86 *BSD) +# mawk (arm32 raspbian) # # uses getopt.awk package from Arnold Robbins # @@ -52,7 +56,7 @@ function getopt(argc, argv, options, thisopt, i) Optind++ _opti = 0 return -1 - } else if (argv[Optind] !~ /^-[^:[:space:]]/) { + } else if (argv[Optind] !~ /^-[^:\t ]/) { _opti = 0 return -1 } @@ -88,22 +92,6 @@ function getopt(argc, argv, options, thisopt, i) return thisopt } -BEGIN { - Opterr = 1 # default is to diagnose - Optind = 1 # skip ARGV[0] - - # test program - if (_getopt_test) { - while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, Optarg = <%s>\n", - _go_c, Optarg) - printf("non-option arguments:\n") - for (; Optind < ARGC; Optind++) - printf("\tARGV[%d] = <%s>\n", - Optind, ARGV[Optind]) - } -} - function usage() { print \ "Usage: afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \ @@ -311,7 +299,7 @@ BEGIN { } if (!ENVIRON["AFL_PATH"]) { - if (0 == system("test -f afl-cmin.awk")) { + if (0 == system("test -f afl-cmin")) { showmap = "./afl-showmap" } else { "which afl-showmap 2>/dev/null" | getline showmap @@ -336,7 +324,8 @@ BEGIN { } else { stat_format = "-f '%z %N'" # *BSD, MacOS } - while ("cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" | getline) { + cmdline = "cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" + while (cmdline | getline) { infilesSmallToBig[i++] = $0 } in_count = i diff --git a/test/test.sh b/test/test.sh index 3473155f..93a4e008 100755 --- a/test/test.sh +++ b/test/test.sh @@ -153,8 +153,8 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null CNT=`ls in2/ | wc -l` case "$CNT" in -1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized testcase numbers" ;; -*) $ECHO "$RED[!] afl-cmin did not correctly minimize testcase numbers" +1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;; +*) $ECHO "$RED[!] afl-cmin did not correctly minimizethe number of testcases" CODE=1 ;; esac @@ -176,7 +176,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { $ECHO "$YELLOW[-] not an intel platform, cannot test afl-gcc" } -$ECHO "$BLUE[*] Testing: llvm_mode" +$ECHO "$BLUE[*] Testing: llvm_mode, afl-showmap, afl-fuzz, afl-cmin and afl-tmin" test -e ../afl-clang-fast -a -e ../split-switches-pass.so && { # on FreeBSD need to set AFL_CC test `uname -s` = 'FreeBSD' && { @@ -253,6 +253,26 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && { $ECHO "$RED[!] afl-fuzz is not working correctly with llvm_mode" CODE=1 } + test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" || { + echo 000000000000000000000000 > in/in2 + mkdir -p in2 + ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null + CNT=`ls in2/ | wc -l` + case "$CNT" in +1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;; +*) $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases" + CODE=1 + ;; + esac + ../afl-tmin -i in/in2 -o in2/in2 -- ./test-instr.plain > /dev/null 2>&1 + SIZE=`ls -l in2/in2 2> /dev/null | awk '{print$5}'` + test "$SIZE" = 1 && $ECHO "$GREEN[+] afl-tmin correctly minimized the testcase" + test "$SIZE" = 1 || { + $ECHO "$RED[!] afl-tmin did incorrectly minimize the testcase to $SIZE" + CODE=1 + } + rm -rf in2 + } rm -rf in out errors } rm -f test-instr.plain -- cgit 1.4.1 From 5d2330f04e45225588a11c64b26a7dbb1a2fbe1a Mon Sep 17 00:00:00 2001 From: van Hauser Date: Sat, 25 Jan 2020 05:27:10 +0100 Subject: nicer output for afl-system-config --- afl-system-config | 93 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 38 deletions(-) diff --git a/afl-system-config b/afl-system-config index 2a7df17f..1e180d8b 100755 --- a/afl-system-config +++ b/afl-system-config @@ -1,6 +1,6 @@ #!/bin/sh test "$1" = "-h" && { - echo afl-system-config by Marc Heuse + echo 'afl-system-config by Marc Heuse ' echo echo $0 echo @@ -12,55 +12,72 @@ test "$1" = "-h" && { exit 1 } +DONE= PLATFORM=`uname -s` -echo This reconfigures the system to have a better fuzzing performance +echo This reconfigures the system to have a better fuzzing performance. if [ '!' "$EUID" = 0 ] && [ '!' `id -u` = 0 ] ; then - echo Error you need to be root to run this - exit 1 + echo "Warning: you need to be root to run this!" + # we do not exit as other mechanisms exist that allows to do this than + # being root. let the errors speak for themselves. fi if [ "$PLATFORM" = "Linux" ] ; then -sysctl -w kernel.core_pattern=core -sysctl -w kernel.randomize_va_space=0 -sysctl -w kernel.sched_child_runs_first=1 -sysctl -w kernel.sched_autogroup_enabled=1 -sysctl -w kernel.sched_migration_cost_ns=50000000 -sysctl -w kernel.sched_latency_ns=250000000 -echo never > /sys/kernel/mm/transparent_hugepage/enabled -test -e /sys/devices/system/cpu/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/scaling_governor -test -e /sys/devices/system/cpu/cpufreq/policy0/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/policy*/scaling_governor -test -e /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor -test -e /sys/devices/system/cpu/intel_pstate/no_turbo && echo 0 > /sys/devices/system/cpu/intel_pstate/no_turbo -test -e /sys/devices/system/cpu/cpufreq/boost && echo 1 > /sys/devices/system/cpu/cpufreq/boost -echo -echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this: -echo '/etc/default/grub:GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"' +{ + sysctl -w kernel.core_pattern=core + sysctl -w kernel.randomize_va_space=0 + sysctl -w kernel.sched_child_runs_first=1 + sysctl -w kernel.sched_autogroup_enabled=1 + sysctl -w kernel.sched_migration_cost_ns=50000000 + sysctl -w kernel.sched_latency_ns=250000000 + echo never > /sys/kernel/mm/transparent_hugepage/enabled + test -e /sys/devices/system/cpu/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/scaling_governor + test -e /sys/devices/system/cpu/cpufreq/policy0/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/policy*/scaling_governor + test -e /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor + test -e /sys/devices/system/cpu/intel_pstate/no_turbo && echo 0 > /sys/devices/system/cpu/intel_pstate/no_turbo + test -e /sys/devices/system/cpu/cpufreq/boost && echo 1 > /sys/devices/system/cpu/cpufreq/boost +} > /dev/null + echo Settings applied. + dmesg | egrep -q 'nospectre_v2|spectre_v2=off' || { + echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this: + echo ' /etc/default/grub:GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"' + } + DONE=1 fi if [ "$PLATFORM" = "FreeBSD" ] ; then -sysctl kern.elf32.aslr.enable=0 -sysctl kern.elf64.aslr.enable=0 -echo -echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this: -echo 'sysctl hw.ibrs_disable=1' -echo -echo 'Setting kern.pmap.pg_ps_enabled=0 into /boot/loader.conf might be helpful too.' +{ + sysctl kern.elf32.aslr.enable=0 + sysctl kern.elf64.aslr.enable=0 +} > /dev/null + echo Settings applied. + echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this: + echo ' sysctl hw.ibrs_disable=1' + echo 'Setting kern.pmap.pg_ps_enabled=0 into /boot/loader.conf might be helpful too.' + DONE=1 fi if [ "$PLATFORM" = "OpenBSD" ] ; then -echo -echo 'System security features cannot be disabled on OpenBSD.' + echo + echo 'System security features cannot be disabled on OpenBSD.' + DONE=1 fi if [ "$PLATFORM" = "NetBSD" ] ; then -echo -echo It is recommended to enable unprivileged users to set cpu affinity -echo to be able to use afl-gotcpu meaningfully. -/sbin/sysctl -w security.models.extensions.user_set_cpu_affinity=1 +{ + #echo It is recommended to enable unprivileged users to set cpu affinity + #echo to be able to use afl-gotcpu meaningfully. + /sbin/sysctl -w security.models.extensions.user_set_cpu_affinity=1 +} > /dev/null + echo Settings applied. + DONE=1 fi if [ "$PLATFORM" = "Darwin" ] ; then if [ $(launchctl list 2>/dev/null | grep -q '\.ReportCrash$') ] ; then -echo We unload the default crash reporter here -SL=/System/Library; PL=com.apple.ReportCrash -launchctl unload -w ${SL}/LaunchAgents/${PL}.plist -sudo launchctl unload -w ${SL}/LaunchDaemons/${PL}.Root.plist + echo We unload the default crash reporter here + SL=/System/Library; PL=com.apple.ReportCrash + launchctl unload -w ${SL}/LaunchAgents/${PL}.plist + sudo launchctl unload -w ${SL}/LaunchDaemons/${PL}.Root.plist + echo Settings applied. + else + echo Nothing to do. fi + DONE=1 fi -echo -echo Also use AFL_TMPDIR to use a tmpfs for the input file +test -z "$DONE" && echo Error: Unknown platform: $PLATFORM +test -z "$AFL_TMPDIR" && echo Also use AFL_TMPDIR and point it to a tmpfs for the input file caching -- cgit 1.4.1 From 2c6847bfa0b57f3330b1aab9b91d935757db51b7 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Sat, 25 Jan 2020 16:11:42 +0100 Subject: added whitelist+blacklist to all llvm_mode passes --- docs/ChangeLog | 2 +- llvm_mode/LLVMInsTrim.so.cc | 29 +------- llvm_mode/MarkNodes.cc | 19 ++---- llvm_mode/compare-transform-pass.so.cc | 94 ++++++++++++++++++++++++++ llvm_mode/split-compares-pass.so.cc | 118 +++++++++++++++++++++++++++++++++ llvm_mode/split-switches-pass.so.cc | 113 +++++++++++++++++++++++++++++++ test/test.sh | 16 ++--- 7 files changed, 343 insertions(+), 48 deletions(-) diff --git a/docs/ChangeLog b/docs/ChangeLog index 33c6f618..c1d53379 100644 --- a/docs/ChangeLog +++ b/docs/ChangeLog @@ -27,7 +27,7 @@ Version ++2.60d (develop): instrumentation. compile normally and set AFL_LLVM_USE_TRACE_PC :) - afl-cmin is now a sh script (invoking awk) instead of bash for portability the original script is still present as afl-cmin.bash - - added blacklisted function check in all modules of llvm_mode + - added blacklist and whitelisting function check in all modules of llvm_mode - added fix from Debian project to compile libdislocator and libtokencap diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc index 11451b43..24df6d42 100644 --- a/llvm_mode/LLVMInsTrim.so.cc +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -144,19 +144,6 @@ struct InsTrim : public ModulePass { // this is our default MarkSetOpt = true; - /* // I dont think this makes sense to port into LLVMInsTrim - char* inst_ratio_str = getenv("AFL_INST_RATIO"); - unsigned int inst_ratio = 100; - if (inst_ratio_str) { - - if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio || - inst_ratio > 100) FATAL("Bad value of AFL_INST_RATIO (must be between 1 - and 100)"); - - } - - */ - LLVMContext &C = M.getContext(); IntegerType *Int8Ty = IntegerType::getInt8Ty(C); IntegerType *Int32Ty = IntegerType::getInt32Ty(C); @@ -203,8 +190,7 @@ struct InsTrim : public ModulePass { if (instFilename.str().empty()) { - /* If the original location is empty, try using the inlined location - */ + /* If the original location is empty, try using the inlined location */ DILocation *oDILoc = cDILoc->getInlinedAt(); if (oDILoc) { @@ -432,28 +418,19 @@ struct InsTrim : public ModulePass { IRB.CreateStore(Incr, MapPtrIdx) ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - /* Set prev_loc to cur_loc >> 1 */ - /* - StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int32Ty, L >> 1), - OldPrev); Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, - None)); - */ - total_instr++; } } - OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n" /*", ratio - %u%%)."*/ - , + OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n", total_instr, total_rs, total_hs, getenv("AFL_HARDEN") ? "hardened" : ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) ? "ASAN/MSAN" - : "non-hardened") /*, inst_ratio*/); + : "non-hardened")); return false; } diff --git a/llvm_mode/MarkNodes.cc b/llvm_mode/MarkNodes.cc index 2aeeda8d..caa8cede 100644 --- a/llvm_mode/MarkNodes.cc +++ b/llvm_mode/MarkNodes.cc @@ -65,16 +65,11 @@ void buildCFG(Function *F) { } - // uint32_t FakeID = 0; for (auto S = F->begin(), E = F->end(); S != E; ++S) { BasicBlock *BB = &*S; uint32_t MyID = LMap[BB]; - // if (succ_begin(BB) == succ_end(BB)) { - // Succs[MyID].push_back(FakeID); - // Marked.insert(MyID); - //} for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { Succs[MyID].push_back(LMap[*I]); @@ -113,7 +108,7 @@ void DFStree(size_t now_id) { } -void turnCFGintoDAG(Function *F) { +void turnCFGintoDAG() { tSuccs = Succs; tag.resize(Blocks.size()); @@ -176,7 +171,7 @@ void DFS(uint32_t now) { } -void DominatorTree(Function *F) { +void DominatorTree() { if (Blocks.empty()) return; uint32_t s = start_point; @@ -390,7 +385,7 @@ void MarkSubGraph(uint32_t ss, uint32_t tt) { } -void MarkVertice(Function *F) { +void MarkVertice() { uint32_t s = start_point; @@ -411,8 +406,6 @@ void MarkVertice(Function *F) { timeStamp = 0; uint32_t t = 0; - // MarkSubGraph(s, t); - // return; while (s != t) { @@ -432,9 +425,9 @@ std::pair, std::vector > markNodes( reset(); labelEachBlock(F); buildCFG(F); - turnCFGintoDAG(F); - DominatorTree::DominatorTree(F); - MarkVertice(F); + turnCFGintoDAG(); + DominatorTree::DominatorTree(); + MarkVertice(); std::vector Result, ResultAbove; for (uint32_t x : Markabove) { diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc index 0ccce875..5d924b63 100644 --- a/llvm_mode/compare-transform-pass.so.cc +++ b/llvm_mode/compare-transform-pass.so.cc @@ -18,7 +18,13 @@ #include #include +#include +#include +#include +#include + #include "llvm/ADT/Statistic.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" @@ -42,6 +48,23 @@ class CompareTransform : public ModulePass { static char ID; CompareTransform() : ModulePass(ID) { + char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST"); + if (instWhiteListFilename) { + + std::string line; + std::ifstream fileStream; + fileStream.open(instWhiteListFilename); + if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST"); + getline(fileStream, line); + while (fileStream) { + + myWhitelist.push_back(line); + getline(fileStream, line); + + } + + } + } bool runOnModule(Module &M) override; @@ -57,6 +80,9 @@ class CompareTransform : public ModulePass { } + protected: + std::list myWhitelist; + private: bool transformCmps(Module &M, const bool processStrcmp, const bool processMemcmp, const bool processStrncmp, @@ -104,6 +130,74 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, for (auto &BB : F) { + if (!myWhitelist.empty()) { + + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + + bool instrumentBlock = false; + + /* Get the current location using debug information. + * For now, just instrument the block if we are not able + * to determine our location. */ + DebugLoc Loc = IP->getDebugLoc(); + if (Loc) { + + DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); + + unsigned int instLine = cDILoc->getLine(); + StringRef instFilename = cDILoc->getFilename(); + + if (instFilename.str().empty()) { + + /* If the original location is empty, try using the inlined location + */ + DILocation *oDILoc = cDILoc->getInlinedAt(); + if (oDILoc) { + + instFilename = oDILoc->getFilename(); + instLine = oDILoc->getLine(); + + } + + } + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + + /* Either we couldn't figure out our location or the location is + * not whitelisted, so we skip instrumentation. */ + if (!instrumentBlock) continue; + + } + for (auto &IN : BB) { CallInst *callInst = nullptr; diff --git a/llvm_mode/split-compares-pass.so.cc b/llvm_mode/split-compares-pass.so.cc index eeac4a55..bc25b322 100644 --- a/llvm_mode/split-compares-pass.so.cc +++ b/llvm_mode/split-compares-pass.so.cc @@ -15,7 +15,17 @@ * limitations under the License. */ +#include +#include +#include + +#include +#include +#include +#include + #include "llvm/Pass.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/Support/raw_ostream.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" @@ -35,6 +45,41 @@ class SplitComparesTransform : public ModulePass { static char ID; SplitComparesTransform() : ModulePass(ID) { + char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST"); + if (instWhiteListFilename) { + + std::string line; + std::ifstream fileStream; + fileStream.open(instWhiteListFilename); + if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST"); + getline(fileStream, line); + while (fileStream) { + + myWhitelist.push_back(line); + getline(fileStream, line); + + } + + } + + } + + static bool isBlacklisted(const Function *F) { + + static const SmallVector Blacklist = { + + "asan.", "llvm.", "sancov.", "__ubsan_handle_", "ign." + + }; + + for (auto const &BlacklistFunc : Blacklist) { + + if (F->getName().startswith(BlacklistFunc)) { return true; } + + } + + return false; + } bool runOnModule(Module &M) override; @@ -49,6 +94,9 @@ class SplitComparesTransform : public ModulePass { } + protected: + std::list myWhitelist; + private: int enableFPSplit; @@ -77,8 +125,78 @@ bool SplitComparesTransform::simplifyCompares(Module &M) { * all integer comparisons with >= and <= predicates to the icomps vector */ for (auto &F : M) { + if (isBlacklisted(&F)) continue; + for (auto &BB : F) { + if (!myWhitelist.empty()) { + + bool instrumentBlock = false; + + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + + /* Get the current location using debug information. + * For now, just instrument the block if we are not able + * to determine our location. */ + DebugLoc Loc = IP->getDebugLoc(); + if (Loc) { + + DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); + + unsigned int instLine = cDILoc->getLine(); + StringRef instFilename = cDILoc->getFilename(); + + if (instFilename.str().empty()) { + + /* If the original location is empty, try using the inlined location + */ + DILocation *oDILoc = cDILoc->getInlinedAt(); + if (oDILoc) { + + instFilename = oDILoc->getFilename(); + instLine = oDILoc->getLine(); + + } + + } + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + + /* Either we couldn't figure out our location or the location is + * not whitelisted, so we skip instrumentation. */ + if (!instrumentBlock) continue; + + } + for (auto &IN : BB) { CmpInst *selectcmpInst = nullptr; diff --git a/llvm_mode/split-switches-pass.so.cc b/llvm_mode/split-switches-pass.so.cc index 2743a71a..3a2838c0 100644 --- a/llvm_mode/split-switches-pass.so.cc +++ b/llvm_mode/split-switches-pass.so.cc @@ -18,7 +18,13 @@ #include #include +#include +#include +#include +#include + #include "llvm/ADT/Statistic.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" @@ -42,6 +48,41 @@ class SplitSwitchesTransform : public ModulePass { static char ID; SplitSwitchesTransform() : ModulePass(ID) { + char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST"); + if (instWhiteListFilename) { + + std::string line; + std::ifstream fileStream; + fileStream.open(instWhiteListFilename); + if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST"); + getline(fileStream, line); + while (fileStream) { + + myWhitelist.push_back(line); + getline(fileStream, line); + + } + + } + + } + + static bool isBlacklisted(const Function *F) { + + static const SmallVector Blacklist = { + + "asan.", "llvm.", "sancov.", "__ubsan_handle_", "ign." + + }; + + for (auto const &BlacklistFunc : Blacklist) { + + if (F->getName().startswith(BlacklistFunc)) { return true; } + + } + + return false; + } bool runOnModule(Module &M) override; @@ -71,6 +112,9 @@ class SplitSwitchesTransform : public ModulePass { typedef std::vector CaseVector; + protected: + std::list myWhitelist; + private: bool splitSwitches(Module &M); bool transformCmps(Module &M, const bool processStrcmp, @@ -268,10 +312,79 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) { * all switches to switches vector for later processing */ for (auto &F : M) { + if (isBlacklisted(&F)) continue; + for (auto &BB : F) { SwitchInst *switchInst = nullptr; + if (!myWhitelist.empty()) { + + bool instrumentBlock = false; + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + + /* Get the current location using debug information. + * For now, just instrument the block if we are not able + * to determine our location. */ + DebugLoc Loc = IP->getDebugLoc(); + if (Loc) { + + DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); + + unsigned int instLine = cDILoc->getLine(); + StringRef instFilename = cDILoc->getFilename(); + + if (instFilename.str().empty()) { + + /* If the original location is empty, try using the inlined location + */ + DILocation *oDILoc = cDILoc->getInlinedAt(); + if (oDILoc) { + + instFilename = oDILoc->getFilename(); + instLine = oDILoc->getLine(); + + } + + } + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + + /* Either we couldn't figure out our location or the location is + * not whitelisted, so we skip instrumentation. */ + if (!instrumentBlock) continue; + + } + if ((switchInst = dyn_cast(BB.getTerminator()))) { if (switchInst->getNumCases() < 1) continue; diff --git a/test/test.sh b/test/test.sh index 93a4e008..9676d22d 100755 --- a/test/test.sh +++ b/test/test.sh @@ -153,10 +153,10 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null CNT=`ls in2/ | wc -l` case "$CNT" in -1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;; -*) $ECHO "$RED[!] afl-cmin did not correctly minimizethe number of testcases" - CODE=1 - ;; + *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;; + *) $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases" + CODE=1 + ;; esac ../afl-tmin -i in/in2 -o in2/in2 -- ./test-instr.plain > /dev/null 2>&1 SIZE=`ls -l in2/in2 2> /dev/null | awk '{print$5}'` @@ -259,10 +259,10 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && { ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null CNT=`ls in2/ | wc -l` case "$CNT" in -1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;; -*) $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases" - CODE=1 - ;; + *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;; + *) $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases" + CODE=1 + ;; esac ../afl-tmin -i in/in2 -o in2/in2 -- ./test-instr.plain > /dev/null 2>&1 SIZE=`ls -l in2/in2 2> /dev/null | awk '{print$5}'` -- cgit 1.4.1 From 3561a1b775989a0cf37221f810eec601cdb14bcf Mon Sep 17 00:00:00 2001 From: van Hauser Date: Mon, 27 Jan 2020 00:19:59 +0100 Subject: dockerfile update --- Dockerfile | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1947f211..7bb60610 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,6 +9,9 @@ RUN apt-get update && apt-get install -y \ clang \ clang-9 \ flex \ + git \ + python3.7 \ + python3.7-dev \ gcc-9 \ gcc-9-plugin-dev \ gcc-9-multilib \ @@ -23,10 +26,12 @@ RUN apt-get update && apt-get install -y \ ca-certificates \ libpixman-1-dev \ && rm -rf /var/lib/apt/lists/* + ARG CC=gcc-9 ARG CXX=g++-9 ARG LLVM_CONFIG=llvm-config-9 -COPY . /app -RUN cd /app && make clean && make distrib && \ - make install && cd .. && rm -rf /app -WORKDIR /work + +RUN git clone https://github.com/vanhauser-thc/AFLplusplus + +RUN cd AFLplusplus && make clean && make distrib && \ + make install && cd .. && rm -rf AFLplusplus -- cgit 1.4.1 From fa64c0d4a5a6eb1eddd13071e3b326778bf6db5a Mon Sep 17 00:00:00 2001 From: van Hauser Date: Mon, 27 Jan 2020 10:40:13 +0100 Subject: important fixes for afl-cmin --- afl-cmin | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/afl-cmin b/afl-cmin index de5a66ed..865809e1 100755 --- a/afl-cmin +++ b/afl-cmin @@ -292,6 +292,24 @@ BEGIN { exit 1 } + # Check for the more efficient way to copy files... + if (0 != system("mkdir -p -m 0700 "trace_dir)) { + print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" + exit 1 + } + if (0 != system("mkdir -p -m 0700 "trace_dir"/.state/auto_extras")) { + print "[-] Error: Cannot create directory "trace_dir"/.state/auto_extras" > "/dev/stderr" + exit 1 + } + if (0 != system("mkdir -p -m 0700 "trace_dir"/.state/redundant_edges")) { + print "[-] Error: Cannot create directory "trace_dir"/.state/redundant_edges" > "/dev/stderr" + exit 1 + } + if (0 != system("mkdir -p -m 0700 "trace_dir"/.state/deterministic_done")) { + print "[-] Error: Cannot create directory "trace_dir"/.state/deterministic_done" > "/dev/stderr" + exit 1 + } + if (stdin_file) { # truncate input file printf "" > stdin_file @@ -339,12 +357,6 @@ BEGIN { exit 1 } - # Check for the more efficient way to copy files... - if (0 != system("mkdir -p -m 0700 "trace_dir)) { - print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" - exit 1 - } - if (0 == system("ln "in_dir"/"first_file" "trace_dir"/.link_test")) { cp_tool = "ln" } else { @@ -360,7 +372,7 @@ BEGIN { system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"") } else { system("cp "in_dir"/"first_file" "stdin_file) - system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" Date: Mon, 27 Jan 2020 11:47:39 +0100 Subject: nearing afl-cmin perfection :-) --- afl-cmin | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/afl-cmin b/afl-cmin index 865809e1..f6e76263 100755 --- a/afl-cmin +++ b/afl-cmin @@ -297,18 +297,6 @@ BEGIN { print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" exit 1 } - if (0 != system("mkdir -p -m 0700 "trace_dir"/.state/auto_extras")) { - print "[-] Error: Cannot create directory "trace_dir"/.state/auto_extras" > "/dev/stderr" - exit 1 - } - if (0 != system("mkdir -p -m 0700 "trace_dir"/.state/redundant_edges")) { - print "[-] Error: Cannot create directory "trace_dir"/.state/redundant_edges" > "/dev/stderr" - exit 1 - } - if (0 != system("mkdir -p -m 0700 "trace_dir"/.state/deterministic_done")) { - print "[-] Error: Cannot create directory "trace_dir"/.state/deterministic_done" > "/dev/stderr" - exit 1 - } if (stdin_file) { # truncate input file @@ -342,7 +330,7 @@ BEGIN { } else { stat_format = "-f '%z %N'" # *BSD, MacOS } - cmdline = "cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" + cmdline = "cd "in_dir" && find . -maxdepth 1 -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" while (cmdline | getline) { infilesSmallToBig[i++] = $0 } -- cgit 1.4.1 From 3374ada561e5dcfe052c41837fc15bd29287b285 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Mon, 27 Jan 2020 11:48:49 +0100 Subject: nearing afl-cmin perfection :-) --- afl-cmin | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/afl-cmin b/afl-cmin index f6e76263..e9d713aa 100755 --- a/afl-cmin +++ b/afl-cmin @@ -360,7 +360,7 @@ BEGIN { system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"") } else { system("cp "in_dir"/"first_file" "stdin_file) - system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" < /dev/null") + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" Date: Mon, 27 Jan 2020 13:06:00 +0100 Subject: updated binary_fuzzing document --- docs/binaryonly_fuzzing.md | 153 ++++++++++++++++++++++++++++++++++++++++++++ docs/binaryonly_fuzzing.txt | 144 ----------------------------------------- 2 files changed, 153 insertions(+), 144 deletions(-) create mode 100644 docs/binaryonly_fuzzing.md delete mode 100644 docs/binaryonly_fuzzing.txt diff --git a/docs/binaryonly_fuzzing.md b/docs/binaryonly_fuzzing.md new file mode 100644 index 00000000..d22e4ce2 --- /dev/null +++ b/docs/binaryonly_fuzzing.md @@ -0,0 +1,153 @@ +#Fuzzing binary-only programs with afl++ + +afl++, libfuzzer and others are great if you have the source code, and +it allows for very fast and coverage guided fuzzing. + +However, if there is only the binary program and no source code available, +then standard `afl-fuzz -n` (dumb mode) is not effective. + +The following is a description of how these binaries can be fuzzed with afl++ + +!!!!! +TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then + use afl -Q qemu_mode, or better: use both in parallel. +!!!!! + + +##QEMU +Qemu is the "native" solution to the program. +It is available in the ./qemu_mode/ directory and once compiled it can +be accessed by the afl-fuzz -Q command line option. +The speed decrease is at about 50% +It is the easiest to use alternative and even works for cross-platform binaries. + +Note that there is also honggfuzz: [https://github.com/google/honggfuzz](https://github.com/google/honggfuzz) +which now has a qemu_mode, but its performance is just 1.5%! + +As it is included in afl++ this needs no URL. + + +##WINE+QEMU +Wine mode can run Win32 PE binaries with the QEMU instrumentation. +It needs Wine, python3 and the pefile python package installed. + +As it is included in afl++ this needs no URL. + + +##UNICORN +Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar. +In contrast to QEMU, Unicorn does not offer a full system or even userland +emulation. Runtime environment and/or loaders have to be written from scratch, +if needed. On top, block chaining has been removed. This means the speed boost +introduced in the patched QEMU Mode of afl++ cannot simply be ported over to +Unicorn. For further information, check out ./unicorn_mode.txt. + +As it is included in afl++ this needs no URL. + + +##DYNINST +Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio +(see far below). However whereas Pintool and Dynamorio work at runtime, dyninst +instruments the target at load time, and then let it run - or save the +binary with the changes. +This is great for some things, e.g. fuzzing, and not so effective for others, +e.g. malware analysis. + +So what we can do with dyninst is taking every basic block, and put afl's +instrumention code in there - and then save the binary. +Afterwards we can just fuzz the newly saved target binary with afl-fuzz. +Sounds great? It is. The issue though - it is a non-trivial problem to +insert instructions, which change addresses in the process space, so that +everything is still working afterwards. Hence more often than not binaries +crash when they are run. + +The speed decrease is about 15-35%, depending on the optimization options +used with afl-dyninst. + +So if dyninst works, it is the best option available. Otherwise it just doesn't +work well. + +[https://github.com/vanhauser-thc/afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) + + +##INTEL-PT +If you have a newer Intel CPU, you can make use of Intels processor trace. +The big issue with Intel's PT is the small buffer size and the complex +encoding of the debug information collected through PT. +This makes the decoding very CPU intensive and hence slow. +As a result, the overall speed decrease is about 70-90% (depending on +the implementation and other factors). + +There are two afl intel-pt implementations: + +1. [https://github.com/junxzm1990/afl-pt](https://github.com/junxzm1990/afl-pt) + => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel. + +2. [https://github.com/hunter-ht-2018/ptfuzzer](https://github.com/hunter-ht-2018/ptfuzzer) + => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must + be used. This one is faster than the other. + +Note that there is also honggfuzz: https://github.com/google/honggfuzz +But its IPT performance is just 6%! + + +##CORESIGHT +Coresight is ARM's answer to Intel's PT. +There is no implementation so far which handle coresight and getting +it working on an ARM Linux is very difficult due to custom kernel building +on embedded systems is difficult. And finding one that has coresight in +the ARM chip is difficult too. +My guess is that it is slower than Qemu, but faster than Intel PT. + +If anyone finds any coresight implementation for afl please ping me: +vh@thc.org + + +##FRIDA +Frida is a dynamic instrumentation engine like Pintool, Dyninst and Dynamorio. +What is special is that it is written Python, and scripted with Javascript. +It is mostly used to reverse binaries on mobile phones however can be used +everywhere. + +There is a WIP fuzzer available at [https://github.com/andreafioraldi/frida-fuzzer](https://github.com/andreafioraldi/frida-fuzzer) + + +##PIN & DYNAMORIO +Pintool and Dynamorio are dynamic instrumentation engines, and they can be +used for getting basic block information at runtime. +Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows +whereas Dynamorio is additionally available for ARM and AARCH64. +Dynamorio is also 10x faster than Pintool. + +The big issue with Dynamorio (and therefore Pintool too) is speed. +Dynamorio has a speed decrease of 98-99% +Pintool has a speed decrease of 99.5% + +Hence Dynamorio is the option to go for if everything fails, and Pintool +only if Dynamorio fails too. + +Dynamorio solutions: + * [https://github.com/vanhauser-thc/afl-dynamorio](https://github.com/vanhauser-thc/afl-dynamorio) + * [https://github.com/mxmssh/drAFL](https://github.com/mxmssh/drAFL) + * [https://github.com/googleprojectzero/winafl/](https://github.com/googleprojectzero/winafl/) <= very good but windows only + +Pintool solutions: + * [https://github.com/vanhauser-thc/afl-pin](https://github.com/vanhauser-thc/afl-pin) + * [https://github.com/mothran/aflpin](https://github.com/mothran/aflpin) + * [https://github.com/spinpx/afl_pin_mode](https://github.com/spinpx/afl_pin_mode) <= only old Pintool version supported + + +##Non-AFL solutions +There are many binary-only fuzzing frameworks. +Some are great for CTFs but don't work with large binaries, others are very +slow but have good path discovery, some are very hard to set-up ... + +* QSYM: [https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym) +* Manticore: [https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore) +* S2E: [https://github.com/S2E](https://github.com/S2E) +* + + +## Closing words + +That's it! News, corrections, updates? Send an email to vh@thc.org diff --git a/docs/binaryonly_fuzzing.txt b/docs/binaryonly_fuzzing.txt deleted file mode 100644 index f8d68cd8..00000000 --- a/docs/binaryonly_fuzzing.txt +++ /dev/null @@ -1,144 +0,0 @@ - -Fuzzing binary-only programs with afl++ -======================================= - -afl++, libfuzzer and others are great if you have the source code, and -it allows for very fast and coverage guided fuzzing. - -However, if there is only the binary program and no source code available, -then standard `afl-fuzz -n` (dumb mode) is not effective. - -The following is a description of how these binaries can be fuzzed with afl++ - -!!!!! -TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then - use afl -Q qemu_mode, or better: use both in parallel. -!!!!! - - -QEMU ----- -Qemu is the "native" solution to the program. -It is available in the ./qemu_mode/ directory and once compiled it can -be accessed by the afl-fuzz -Q command line option. -The speed decrease is at about 50% -It is the easiest to use alternative and even works for cross-platform binaries. - -As it is included in afl++ this needs no URL. - -WINE+QEMU ---------- -Wine mode can run Win32 PE binaries with the QEMU instrumentation. -It needs Wine, python3 and the pefile python package installed. - -UNICORN -------- -Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar. -In contrast to QEMU, Unicorn does not offer a full system or even userland emulation. -Runtime environment and/or loaders have to be written from scratch, if needed. -On top, block chaining has been removed. This means the speed boost introduced in -the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn. -For further information, check out ./unicorn_mode.txt. - - -DYNINST -------- -Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio -(see far below). However whereas Pintool and Dynamorio work at runtime, dyninst -instruments the target at load time, and then let it run. -This is great for some things, e.g. fuzzing, and not so effective for others, -e.g. malware analysis. - -So what we can do with dyninst is taking every basic block, and put afl's -instrumention code in there - and then save the binary. -Afterwards we can just fuzz the newly saved target binary with afl-fuzz. -Sounds great? It is. The issue though - it is a non-trivial problem to -insert instructions, which change addresses in the process space, so -everything is still working afterwards. Hence more often than not binaries -crash when they are run (because of instrumentation). - -The speed decrease is about 15-35%, depending on the optimization options -used with afl-dyninst. - -So if dyninst works, it is the best option available. Otherwise it just doesn't -work well. - -https://github.com/vanhauser-thc/afl-dyninst - - -INTEL-PT --------- -If you have a newer Intel CPU, you can make use of Intels processor trace. -The big issue with Intel's PT is the small buffer size and the complex -encoding of the debug information collected through PT. -This makes the decoding very CPU intensive and hence slow. -As a result, the overall speed decrease is about 70-90% (depending on -the implementation and other factors). - -There are two afl intel-pt implementations: - -1. https://github.com/junxzm1990/afl-pt - => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel. - -2. https://github.com/hunter-ht-2018/ptfuzzer - => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must - be used. This one is faster than the other. - - -CORESIGHT ---------- - -Coresight is ARM's answer to Intel's PT. -There is no implementation so far which handle coresight and getting -it working on an ARM Linux is very difficult due to custom kernel building -on embedded systems is difficult. And finding one that has coresight in -the ARM chip is difficult too. -My guess is that it is slower than Qemu, but faster than Intel PT. -If anyone finds any coresight implementation for afl please ping me: -vh@thc.org - - -PIN & DYNAMORIO ---------------- - -Pintool and Dynamorio are dynamic instrumentation engines, and they can be -used for getting basic block information at runtime. -Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows -whereas Dynamorio is additionally available for ARM and AARCH64. -Dynamorio is also 10x faster than Pintool. - -The big issue with Dynamorio (and therefore Pintool too) is speed. -Dynamorio has a speed decrease of 98-99% -Pintool has a speed decrease of 99.5% - -Hence Dynamorio is the option to go for if everything fails, and Pintool -only if Dynamorio fails too. - -Dynamorio solutions: - https://github.com/vanhauser-thc/afl-dynamorio - https://github.com/mxmssh/drAFL - https://github.com/googleprojectzero/winafl/ <= very good but windows only - -Pintool solutions: - https://github.com/vanhauser-thc/afl-pin - https://github.com/mothran/aflpin - https://github.com/spinpx/afl_pin_mode <= only old Pintool version supported - - -Non-AFL solutions ------------------ - -There are many binary-only fuzzing frameworks. Some are great for CTFs but don't -work with large binaries, others are very slow but have good path discovery, -some are very hard to set-up ... - -QSYM: https://github.com/sslab-gatech/qsym -Manticore: https://github.com/trailofbits/manticore -S2E: https://github.com/S2E - - - - -That's it! -News, corrections, updates? -Email vh@thc.org -- cgit 1.4.1 From 38232979587b6c37b024f22849b311d7e6962edf Mon Sep 17 00:00:00 2001 From: Dominik Maier Date: Mon, 27 Jan 2020 13:29:22 +0100 Subject: Added persistent mode sample --- unicorn_mode/samples/persistent/.gitignore | 3 + unicorn_mode/samples/persistent/COMPILE.md | 24 ++ unicorn_mode/samples/persistent/Makefile | 42 ++++ unicorn_mode/samples/persistent/harness.c | 269 +++++++++++++++++++++ .../samples/persistent/persistent_target.c | 39 +++ .../samples/persistent/persistent_target_x86_64 | Bin 0 -> 16544 bytes unicorn_mode/samples/persistent/sample_all.sh | 18 ++ .../samples/persistent/sample_inputs/sample1.bin | 1 + .../samples/persistent/sample_inputs/sample2.bin | Bin 0 -> 1 bytes .../samples/persistent/sample_inputs/sample3.bin | 1 + .../samples/persistent/sample_inputs/sample4.bin | 1 + .../samples/persistent/sample_inputs/sample5.bin | 1 + .../samples/persistent/simple_target_noncrashing.c | 33 +++ .../samples/persistent/simple_target_x86_64 | Bin 0 -> 17624 bytes 14 files changed, 432 insertions(+) create mode 100644 unicorn_mode/samples/persistent/.gitignore create mode 100644 unicorn_mode/samples/persistent/COMPILE.md create mode 100644 unicorn_mode/samples/persistent/Makefile create mode 100644 unicorn_mode/samples/persistent/harness.c create mode 100644 unicorn_mode/samples/persistent/persistent_target.c create mode 100644 unicorn_mode/samples/persistent/persistent_target_x86_64 create mode 100644 unicorn_mode/samples/persistent/sample_all.sh create mode 100644 unicorn_mode/samples/persistent/sample_inputs/sample1.bin create mode 100644 unicorn_mode/samples/persistent/sample_inputs/sample2.bin create mode 100644 unicorn_mode/samples/persistent/sample_inputs/sample3.bin create mode 100644 unicorn_mode/samples/persistent/sample_inputs/sample4.bin create mode 100644 unicorn_mode/samples/persistent/sample_inputs/sample5.bin create mode 100644 unicorn_mode/samples/persistent/simple_target_noncrashing.c create mode 100644 unicorn_mode/samples/persistent/simple_target_x86_64 diff --git a/unicorn_mode/samples/persistent/.gitignore b/unicorn_mode/samples/persistent/.gitignore new file mode 100644 index 00000000..3e446132 --- /dev/null +++ b/unicorn_mode/samples/persistent/.gitignore @@ -0,0 +1,3 @@ +harness +harness-debug +out diff --git a/unicorn_mode/samples/persistent/COMPILE.md b/unicorn_mode/samples/persistent/COMPILE.md new file mode 100644 index 00000000..781f15c0 --- /dev/null +++ b/unicorn_mode/samples/persistent/COMPILE.md @@ -0,0 +1,24 @@ +# C Sample + +This shows a simple persistent harness for unicornafl in C +In contrast to the normal c harness, this harness manually resets the unicorn state on each new input. +Thanks to this, we can rerun the testcase in unicorn multiple times, without the need to fork again. + +## Compiling sample.c + +The target can be built using the `make` command. +Just make sure you have built unicorn support first: +```bash +cd /path/to/afl/unicorn_mode +./build_unicorn_support.sh +``` + +## Compiling persistent_target.c + +You don't need to compile persistent_target.c since a X86_64 binary version is +pre-built and shipped in this sample folder. This file documents how the binary +was built in case you want to rebuild it or recompile it for any reason. + +The pre-built binary (persistent_target_x86_64.bin) was built using -g -O0 in gcc. + +We then load the binary we execute the main function directly. diff --git a/unicorn_mode/samples/persistent/Makefile b/unicorn_mode/samples/persistent/Makefile new file mode 100644 index 00000000..fe100490 --- /dev/null +++ b/unicorn_mode/samples/persistent/Makefile @@ -0,0 +1,42 @@ +# UnicornAFL Usage +# Original Unicorn Example Makefile by Nguyen Anh Quynh , 2015 +# Adapted for AFL++ by domenukk , 2020 + +UNAME_S := $(shell uname -s) + +LIBDIR = ../../unicornafl +BIN_EXT = +AR_EXT = a + +# Verbose output? +V ?= 0 + +CFLAGS += -Wall -Werror -I../../unicornafl/include + +LDFLAGS += -L$(LIBDIR) -lpthread -lm +ifeq ($(UNAME_S), Linux) +LDFLAGS += -lrt +endif + +ifneq ($(CROSS),) +CC = $(CROSS)gcc +endif + +.PHONY: all clean + +all: harness + +clean: + rm -rf *.o harness harness-debug + +harness.o: harness.c ../../unicornafl/include/unicorn/*.h + ${CC} ${CFLAGS} -O3 -c $< + +harness-debug.o: harness.c ../../unicornafl/include/unicorn/*.h + ${CC} ${CFLAGS} -g -c $< -o $@ + +harness: harness.o + ${CC} -L${LIBDIR} $< ../../unicornafl/libunicornafl.a $(LDFLAGS) -o $@ + +debug: harness-debug.o + ${CC} -L${LIBDIR} $< ../../unicornafl/libunicornafl.a $(LDFLAGS) -o harness-debug diff --git a/unicorn_mode/samples/persistent/harness.c b/unicorn_mode/samples/persistent/harness.c new file mode 100644 index 00000000..d8ebffbc --- /dev/null +++ b/unicorn_mode/samples/persistent/harness.c @@ -0,0 +1,269 @@ +/* + Persistent test harness for AFL++'s unicornafl c mode. + + This loads the persistent_target.bin binary (precompiled as X86_64 code) into + Unicorn's memory map for emulation, places the specified input into + the argv buffer (handed in as first parameter), and executes 'main()'. + Any crashes during emulation will automatically be handled by the afl-fuzz() function. + + Run under AFL as follows: + + $ cd /unicorn_mode/samples/persistent/ + $ make + $ ../../../afl-fuzz -m none -i sample_inputs -o out -- ./harness @@ + + (Re)run a simgle input with block tracing using: + + $ ./harness -t [inputfile] +*/ + +// This is not your everyday Unicorn. +#define UNICORN_AFL + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +// Path to the file containing the binary to emulate +#define BINARY_FILE ("persistent_target_x86_64") + +// Memory map for the code to be tested +// Arbitrary address where code to test will be loaded +static const int64_t BASE_ADDRESS = 0x100000; +static const int64_t CODE_ADDRESS = 0x101139; +static const int64_t END_ADDRESS = 0x10120d; +// Address of the stack (Some random address again) +static const int64_t STACK_ADDRESS = (((int64_t) 0x01) << 58); +// Size of the stack (arbitrarily chosen, just make it big enough) +static const int64_t STACK_SIZE = 0x10000; +// Location where the input will be placed (make sure the emulated program knows this somehow, too ;) ) +static const int64_t INPUT_LOCATION = 0x10000; +// Inside the location, we have an ofset in our special case +static const int64_t INPUT_OFFSET = 0x16; +// Maximum allowable size of mutated data from AFL +static const int64_t INPUT_SIZE_MAX = 0x10000; +// Alignment for unicorn mappings (seems to be needed) +static const int64_t ALIGNMENT = 0x1000; + +// In our special case, we emulate main(), so argc is needed. +static const uint64_t EMULATED_ARGC = 2; + +// The return from our fake strlen +static size_t current_input_len = 0; + +static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { + printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size); +} + +static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { + printf(">>> Tracing instruction at 0x%"PRIx64 ", instruction size = 0x%x\n", address, size); +} + +/* +The sample uses strlen, since we don't have a loader or libc, we'll fake it. +We know the strlen will return the lenght of argv[1] that we just planted. +It will be a lot faster than an actual strlen for this specific purpose. +*/ +static void hook_strlen(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { + //Hook + //116b: e8 c0 fe ff ff call 1030 + // We place the return at RAX + uc_reg_write(uc, UC_X86_REG_RAX, ¤t_input_len); + // We skip the actual call by updating RIP + //printf("Strlen hook at addr 0x%lx (size: 0x%x), result: %ld\n", address, size, current_input_len); + uint64_t next_addr = address + size; + uc_reg_write(uc, UC_X86_REG_RIP, &next_addr); +} + +/* Unicorn page needs to be 0x1000 aligned, apparently */ +static uint64_t pad(uint64_t size) { + if (size % ALIGNMENT == 0) return size; + return ((size / ALIGNMENT) + 1) * ALIGNMENT; +} + +/* returns the filesize in bytes, -1 or error. */ +static off_t afl_mmap_file(char *filename, char **buf_ptr) { + + off_t ret = -1; + + int fd = open(filename, O_RDONLY); + + struct stat st = {0}; + if (fstat(fd, &st)) goto exit; + + off_t in_len = st.st_size; + if (in_len == -1) { + /* This can only ever happen on 32 bit if the file is exactly 4gb. */ + fprintf(stderr, "Filesize of %s too large", filename); + goto exit; + } + + *buf_ptr = mmap(0, in_len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + + if (*buf_ptr != MAP_FAILED) ret = in_len; + +exit: + close(fd); + return ret; + +} + +/* Place the input at the right spot inside unicorn */ +static bool place_input_callback( + uc_engine *uc, + char *input, + size_t input_len, + uint32_t persistent_round, + void *data +){ + // printf("Placing input with len %ld to %x\n", input_len, DATA_ADDRESS); + if (input_len < 1 || input_len >= INPUT_SIZE_MAX - INPUT_OFFSET) { + // Test input too short or too long, ignore this testcase + return false; + } + + // For persistent mode, we have to set up stack and memory each time. + uc_reg_write(uc, UC_X86_REG_RIP, &CODE_ADDRESS); // Set the instruction pointer back + // Set up the function parameters accordingly RSI, RDI (see calling convention/disassembly) + uc_reg_write(uc, UC_X86_REG_RSI, &INPUT_LOCATION); // argv + uc_reg_write(uc, UC_X86_REG_RDI, &EMULATED_ARGC); // argc == 2 + + // We need a valid c string, make sure it never goes out of bounds. + input[input_len-1] = '\0'; + // Write the testcase to unicorn. + uc_mem_write(uc, INPUT_LOCATION + INPUT_OFFSET, input, input_len); + + // store input_len for the faux strlen hook + current_input_len = input_len; + + return true; +} + +static void mem_map_checked(uc_engine *uc, uint64_t addr, size_t size, uint32_t mode) { + size = pad(size); + //printf("SIZE %lx, align: %lx\n", size, ALIGNMENT); + uc_err err = uc_mem_map(uc, addr, size, mode); + if (err != UC_ERR_OK) { + printf("Error mapping %ld bytes at 0x%lx: %s (mode: %d)\n", size, addr, uc_strerror(err), mode); + exit(1); + } +} + +int main(int argc, char **argv, char **envp) { + if (argc == 1) { + printf("Test harness for simple_target.bin. Usage: harness [-t] \n"); + exit(1); + } + bool tracing = false; + char *filename = argv[1]; + if (argc > 2 && !strcmp(argv[1], "-t")) { + tracing = true; + filename = argv[2]; + } + + uc_engine *uc; + uc_err err; + uc_hook hooks[2]; + char *file_contents; + + // Initialize emulator in X86_64 mode + err = uc_open(UC_ARCH_X86, UC_MODE_64, &uc); + if (err) { + printf("Failed on uc_open() with error returned: %u (%s)\n", + err, uc_strerror(err)); + return -1; + } + + printf("Loading data input from %s\n", BINARY_FILE); + off_t len = afl_mmap_file(BINARY_FILE, &file_contents); + if (len < 0) { + perror("Could not read binary to emulate"); + return -2; + } + if (len == 0) { + fprintf(stderr, "File at '%s' is empty\n", BINARY_FILE); + return -3; + } + + // Map memory. + mem_map_checked(uc, BASE_ADDRESS, len, UC_PROT_ALL); + printf("Len: %lx", len); + fflush(stdout); + + // write machine code to be emulated to memory + if (uc_mem_write(uc, BASE_ADDRESS, file_contents, len) != UC_ERR_OK) { + printf("Error writing to CODE"); + } + + // Release copied contents + munmap(file_contents, len); + + // Set the program counter to the start of the code + uint64_t start_address = CODE_ADDRESS; // address of entry point of main() + uint64_t end_address = END_ADDRESS; // Address of last instruction in main() + uc_reg_write(uc, UC_X86_REG_RIP, &start_address); // address of entry point of main() + + // Setup the Stack + mem_map_checked(uc, STACK_ADDRESS - STACK_SIZE, STACK_SIZE, UC_PROT_READ | UC_PROT_WRITE); + uint64_t stack_val = STACK_ADDRESS; + printf("%ld", stack_val); + uc_reg_write(uc, UC_X86_REG_RSP, &stack_val); + + // reserve some space for our input data + mem_map_checked(uc, INPUT_LOCATION, INPUT_SIZE_MAX, UC_PROT_READ); + + // build a "dummy" argv with lenth 2 at 0x10000: + // 0x10000 argv[0] NULL + // 0x10008 argv[1] (char *)0x10016 --. points to the next offset. + // 0x10016 argv[1][0], ... <-^ contains the acutal input data. (INPUT_LOCATION + INPUT_OFFSET) + + uc_mem_write(uc, 0x10008, "\x16\x00\x01", 3); // little endian of 0x10016, see above + + + // If we want tracing output, set the callbacks here + if (tracing) { + // tracing all basic blocks with customized callback + uc_hook_add(uc, &hooks[0], UC_HOOK_BLOCK, hook_block, NULL, 1, 0); + uc_hook_add(uc, &hooks[1], UC_HOOK_CODE, hook_code, NULL, BASE_ADDRESS, BASE_ADDRESS + len - 1); + } + + // Add our strlen hook (for this specific testcase only) + int strlen_hook_pos = BASE_ADDRESS + 0x116b; + uc_hook strlen_hook; + uc_hook_add(uc, &strlen_hook, UC_HOOK_CODE, hook_strlen, NULL, strlen_hook_pos, strlen_hook_pos); + + printf("Starting to fuzz :)\n"); + fflush(stdout); + + // let's gooo + uc_afl_ret afl_ret = uc_afl_fuzz( + uc, // The unicorn instance we prepared + filename, // Filename of the input to process. In AFL this is usually the '@@' placeholder, outside it's any input file. + place_input_callback, // Callback that places the input (automatically loaded from the file at filename) in the unicorninstance + &end_address, // Where to exit (this is an array) + 1, // Count of end addresses + NULL, // Optional calback to run after each exec + false, // true, if the optional callback should be run also for non-crashes + 1000, // For persistent mode: How many rounds to run + NULL // additional data pointer + ); + switch(afl_ret) { + case UC_AFL_RET_ERROR: + printf("Error starting to fuzz"); + return -3; + break; + case UC_AFL_RET_NO_AFL: + printf("No AFL attached - We are done with a single run."); + break; + default: + break; + } + return 0; +} diff --git a/unicorn_mode/samples/persistent/persistent_target.c b/unicorn_mode/samples/persistent/persistent_target.c new file mode 100644 index 00000000..5b866f86 --- /dev/null +++ b/unicorn_mode/samples/persistent/persistent_target.c @@ -0,0 +1,39 @@ +/* + * Sample target file to test afl-unicorn fuzzing capabilities. + * This is a very trivial example that will crash pretty easily + * in several different exciting ways. + * + * Input is assumed to come from a buffer located at DATA_ADDRESS + * (0x00300000), so make sure that your Unicorn emulation of this + * puts user data there. + * + * Written by Nathan Voss + * Adapted by Lukas Seidel + */ +#include +#include + + +int main(int argc, char** argv) { + if (argc < 2) return -1; + + char *data_buf = argv[1]; + uint64_t data_len = strlen(data_buf); + if (data_len < 20) return -2; + + for (; data_len --> 0 ;) { + if (data_len >= 18) continue; + if (data_len > 2 && data_len < 18) { + ((char *)data_len)[(uint64_t)data_buf] = data_buf[data_len + 1]; + } else if (data_buf[9] == 0x90 && data_buf[10] != 0x00 && data_buf[11] == 0x90) { + // Cause a crash if data[10] is not zero, but [9] and [11] are zero + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } + } + if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) { + // Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2] + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } + + return 0; +} diff --git a/unicorn_mode/samples/persistent/persistent_target_x86_64 b/unicorn_mode/samples/persistent/persistent_target_x86_64 new file mode 100644 index 00000000..22e04357 Binary files /dev/null and b/unicorn_mode/samples/persistent/persistent_target_x86_64 differ diff --git a/unicorn_mode/samples/persistent/sample_all.sh b/unicorn_mode/samples/persistent/sample_all.sh new file mode 100644 index 00000000..01daf365 --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_all.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +[ -z "${UNAME}" ] && UNAME=$(uname) + +DIR=`dirname $0` + +if [ "$UNAME" = Darwin ]; then + export DYLD_LIBRARY_PATH=../../unicorn +else + export LD_LIBRARY_PATH=../../unicorn +fi + + + +if [ ! test -e $DIR/harness]; then + echo "[!] harness not found in $DIR" + exit 1 +fi \ No newline at end of file diff --git a/unicorn_mode/samples/persistent/sample_inputs/sample1.bin b/unicorn_mode/samples/persistent/sample_inputs/sample1.bin new file mode 100644 index 00000000..85df5078 --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_inputs/sample1.bin @@ -0,0 +1 @@ +abcd \ No newline at end of file diff --git a/unicorn_mode/samples/persistent/sample_inputs/sample2.bin b/unicorn_mode/samples/persistent/sample_inputs/sample2.bin new file mode 100644 index 00000000..f76dd238 Binary files /dev/null and b/unicorn_mode/samples/persistent/sample_inputs/sample2.bin differ diff --git a/unicorn_mode/samples/persistent/sample_inputs/sample3.bin b/unicorn_mode/samples/persistent/sample_inputs/sample3.bin new file mode 100644 index 00000000..6b2aaa76 --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_inputs/sample3.bin @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/unicorn_mode/samples/persistent/sample_inputs/sample4.bin b/unicorn_mode/samples/persistent/sample_inputs/sample4.bin new file mode 100644 index 00000000..71bd63e6 --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_inputs/sample4.bin @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/unicorn_mode/samples/persistent/sample_inputs/sample5.bin b/unicorn_mode/samples/persistent/sample_inputs/sample5.bin new file mode 100644 index 00000000..aed2973e --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_inputs/sample5.bin @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/unicorn_mode/samples/persistent/simple_target_noncrashing.c b/unicorn_mode/samples/persistent/simple_target_noncrashing.c new file mode 100644 index 00000000..00764473 --- /dev/null +++ b/unicorn_mode/samples/persistent/simple_target_noncrashing.c @@ -0,0 +1,33 @@ +/* + * Sample target file to test afl-unicorn fuzzing capabilities. + * This is a very trivial example that will crash pretty easily + * in several different exciting ways. + * + * Input is assumed to come from a buffer located at DATA_ADDRESS + * (0x00300000), so make sure that your Unicorn emulation of this + * puts user data there. + * + * Written by Nathan Voss + * Adapted by Lukas Seidel + */ + + +int main(int argc, char** argv) { + if(argc < 2){ + return -1; + } + + char *data_buf = argv[1]; + + if len(data_buf < 20) { + if (data_buf[20] != 0) { + printf("Not crashing"); + } else if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) { + printf("Also not crashing with databuf[0] == %c", data_buf[0]) + } else if (data_buf[9] == 0x00 && data_buf[10] != 0x00 && data_buf[11] == 0x00) { + // Cause a crash if data[10] is not zero, but [9] and [11] are zero + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } + + return 0; +} diff --git a/unicorn_mode/samples/persistent/simple_target_x86_64 b/unicorn_mode/samples/persistent/simple_target_x86_64 new file mode 100644 index 00000000..560264fd Binary files /dev/null and b/unicorn_mode/samples/persistent/simple_target_x86_64 differ -- cgit 1.4.1 From 83481f9460f684883a66fdb38b55f6240a687f85 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Mon, 27 Jan 2020 13:34:59 +0100 Subject: update binary_fuzzing doc --- docs/binaryonly_fuzzing.md | 212 +++++++++++++++++++++++---------------------- 1 file changed, 110 insertions(+), 102 deletions(-) diff --git a/docs/binaryonly_fuzzing.md b/docs/binaryonly_fuzzing.md index d22e4ce2..6eff30d7 100644 --- a/docs/binaryonly_fuzzing.md +++ b/docs/binaryonly_fuzzing.md @@ -1,153 +1,161 @@ -#Fuzzing binary-only programs with afl++ +# Fuzzing binary-only programs with afl++ -afl++, libfuzzer and others are great if you have the source code, and -it allows for very fast and coverage guided fuzzing. + afl++, libfuzzer and others are great if you have the source code, and + it allows for very fast and coverage guided fuzzing. -However, if there is only the binary program and no source code available, -then standard `afl-fuzz -n` (dumb mode) is not effective. + However, if there is only the binary program and no source code available, + then standard `afl-fuzz -n` (dumb mode) is not effective. -The following is a description of how these binaries can be fuzzed with afl++ + The following is a description of how these binaries can be fuzzed with afl++ -!!!!! -TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then - use afl -Q qemu_mode, or better: use both in parallel. -!!!!! + !!!!! + TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then + use afl -Q qemu_mode, or better: use both in parallel. + !!!!! -##QEMU -Qemu is the "native" solution to the program. -It is available in the ./qemu_mode/ directory and once compiled it can -be accessed by the afl-fuzz -Q command line option. -The speed decrease is at about 50% -It is the easiest to use alternative and even works for cross-platform binaries. +## QEMU -Note that there is also honggfuzz: [https://github.com/google/honggfuzz](https://github.com/google/honggfuzz) -which now has a qemu_mode, but its performance is just 1.5%! + Qemu is the "native" solution to the program. + It is available in the ./qemu_mode/ directory and once compiled it can + be accessed by the afl-fuzz -Q command line option. + The speed decrease is at about 50%. + It is the easiest to use alternative and even works for cross-platform binaries. -As it is included in afl++ this needs no URL. + Note that there is also honggfuzz: [https://github.com/google/honggfuzz](https://github.com/google/honggfuzz) + which now has a qemu_mode, but its performance is just 1.5%! + As it is included in afl++ this needs no URL. -##WINE+QEMU -Wine mode can run Win32 PE binaries with the QEMU instrumentation. -It needs Wine, python3 and the pefile python package installed. -As it is included in afl++ this needs no URL. +## WINE+QEMU + Wine mode can run Win32 PE binaries with the QEMU instrumentation. + It needs Wine, python3 and the pefile python package installed. -##UNICORN -Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar. -In contrast to QEMU, Unicorn does not offer a full system or even userland -emulation. Runtime environment and/or loaders have to be written from scratch, -if needed. On top, block chaining has been removed. This means the speed boost -introduced in the patched QEMU Mode of afl++ cannot simply be ported over to -Unicorn. For further information, check out ./unicorn_mode.txt. + As it is included in afl++ this needs no URL. -As it is included in afl++ this needs no URL. +## UNICORN -##DYNINST -Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio -(see far below). However whereas Pintool and Dynamorio work at runtime, dyninst -instruments the target at load time, and then let it run - or save the -binary with the changes. -This is great for some things, e.g. fuzzing, and not so effective for others, -e.g. malware analysis. + Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar. + In contrast to QEMU, Unicorn does not offer a full system or even userland + emulation. Runtime environment and/or loaders have to be written from scratch, + if needed. On top, block chaining has been removed. This means the speed boost + introduced in the patched QEMU Mode of afl++ cannot simply be ported over to + Unicorn. For further information, check out ./unicorn_mode.txt. -So what we can do with dyninst is taking every basic block, and put afl's -instrumention code in there - and then save the binary. -Afterwards we can just fuzz the newly saved target binary with afl-fuzz. -Sounds great? It is. The issue though - it is a non-trivial problem to -insert instructions, which change addresses in the process space, so that -everything is still working afterwards. Hence more often than not binaries -crash when they are run. + As it is included in afl++ this needs no URL. -The speed decrease is about 15-35%, depending on the optimization options -used with afl-dyninst. -So if dyninst works, it is the best option available. Otherwise it just doesn't -work well. +## DYNINST -[https://github.com/vanhauser-thc/afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) + Dyninst is a binary instrumentation framework similar to Pintool and + Dynamorio (see far below). However whereas Pintool and Dynamorio work at + runtime, dyninst instruments the target at load time, and then let it run - + or save the binary with the changes. + This is great for some things, e.g. fuzzing, and not so effective for others, + e.g. malware analysis. + So what we can do with dyninst is taking every basic block, and put afl's + instrumention code in there - and then save the binary. + Afterwards we can just fuzz the newly saved target binary with afl-fuzz. + Sounds great? It is. The issue though - it is a non-trivial problem to + insert instructions, which change addresses in the process space, so that + everything is still working afterwards. Hence more often than not binaries + crash when they are run. -##INTEL-PT -If you have a newer Intel CPU, you can make use of Intels processor trace. -The big issue with Intel's PT is the small buffer size and the complex -encoding of the debug information collected through PT. -This makes the decoding very CPU intensive and hence slow. -As a result, the overall speed decrease is about 70-90% (depending on -the implementation and other factors). + The speed decrease is about 15-35%, depending on the optimization options + used with afl-dyninst. -There are two afl intel-pt implementations: + So if Dyninst works, it is the best option available. Otherwise it just + doesn't work well. -1. [https://github.com/junxzm1990/afl-pt](https://github.com/junxzm1990/afl-pt) - => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel. + [https://github.com/vanhauser-thc/afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) -2. [https://github.com/hunter-ht-2018/ptfuzzer](https://github.com/hunter-ht-2018/ptfuzzer) - => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must - be used. This one is faster than the other. -Note that there is also honggfuzz: https://github.com/google/honggfuzz -But its IPT performance is just 6%! +## INTEL-PT + If you have a newer Intel CPU, you can make use of Intels processor trace. + The big issue with Intel's PT is the small buffer size and the complex + encoding of the debug information collected through PT. + This makes the decoding very CPU intensive and hence slow. + As a result, the overall speed decrease is about 70-90% (depending on + the implementation and other factors). -##CORESIGHT -Coresight is ARM's answer to Intel's PT. -There is no implementation so far which handle coresight and getting -it working on an ARM Linux is very difficult due to custom kernel building -on embedded systems is difficult. And finding one that has coresight in -the ARM chip is difficult too. -My guess is that it is slower than Qemu, but faster than Intel PT. + There are two afl intel-pt implementations: -If anyone finds any coresight implementation for afl please ping me: -vh@thc.org + 1. [https://github.com/junxzm1990/afl-pt](https://github.com/junxzm1990/afl-pt) + => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel. + 2. [https://github.com/hunter-ht-2018/ptfuzzer](https://github.com/hunter-ht-2018/ptfuzzer) + => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must + be used. This one is faster than the other. -##FRIDA -Frida is a dynamic instrumentation engine like Pintool, Dyninst and Dynamorio. -What is special is that it is written Python, and scripted with Javascript. -It is mostly used to reverse binaries on mobile phones however can be used -everywhere. + Note that there is also honggfuzz: https://github.com/google/honggfuzz + But its IPT performance is just 6%! -There is a WIP fuzzer available at [https://github.com/andreafioraldi/frida-fuzzer](https://github.com/andreafioraldi/frida-fuzzer) +## CORESIGHT -##PIN & DYNAMORIO -Pintool and Dynamorio are dynamic instrumentation engines, and they can be -used for getting basic block information at runtime. -Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows -whereas Dynamorio is additionally available for ARM and AARCH64. -Dynamorio is also 10x faster than Pintool. + Coresight is ARM's answer to Intel's PT. + There is no implementation so far which handle coresight and getting + it working on an ARM Linux is very difficult due to custom kernel building + on embedded systems is difficult. And finding one that has coresight in + the ARM chip is difficult too. + My guess is that it is slower than Qemu, but faster than Intel PT. -The big issue with Dynamorio (and therefore Pintool too) is speed. -Dynamorio has a speed decrease of 98-99% -Pintool has a speed decrease of 99.5% + If anyone finds any coresight implementation for afl please ping me: vh@thc.org -Hence Dynamorio is the option to go for if everything fails, and Pintool -only if Dynamorio fails too. -Dynamorio solutions: +## FRIDA + + Frida is a dynamic instrumentation engine like Pintool, Dyninst and Dynamorio. + What is special is that it is written Python, and scripted with Javascript. + It is mostly used to reverse binaries on mobile phones however can be used + everywhere. + + There is a WIP fuzzer available at [https://github.com/andreafioraldi/frida-fuzzer](https://github.com/andreafioraldi/frida-fuzzer) + + +## PIN & DYNAMORIO + + Pintool and Dynamorio are dynamic instrumentation engines, and they can be + used for getting basic block information at runtime. + Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows + whereas Dynamorio is additionally available for ARM and AARCH64. + Dynamorio is also 10x faster than Pintool. + + The big issue with Dynamorio (and therefore Pintool too) is speed. + Dynamorio has a speed decrease of 98-99% + Pintool has a speed decrease of 99.5% + + Hence Dynamorio is the option to go for if everything fails, and Pintool + only if Dynamorio fails too. + + Dynamorio solutions: * [https://github.com/vanhauser-thc/afl-dynamorio](https://github.com/vanhauser-thc/afl-dynamorio) * [https://github.com/mxmssh/drAFL](https://github.com/mxmssh/drAFL) * [https://github.com/googleprojectzero/winafl/](https://github.com/googleprojectzero/winafl/) <= very good but windows only -Pintool solutions: + Pintool solutions: * [https://github.com/vanhauser-thc/afl-pin](https://github.com/vanhauser-thc/afl-pin) * [https://github.com/mothran/aflpin](https://github.com/mothran/aflpin) * [https://github.com/spinpx/afl_pin_mode](https://github.com/spinpx/afl_pin_mode) <= only old Pintool version supported -##Non-AFL solutions -There are many binary-only fuzzing frameworks. -Some are great for CTFs but don't work with large binaries, others are very -slow but have good path discovery, some are very hard to set-up ... +## Non-AFL solutions + + There are many binary-only fuzzing frameworks. + Some are great for CTFs but don't work with large binaries, others are very + slow but have good path discovery, some are very hard to set-up ... -* QSYM: [https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym) -* Manticore: [https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore) -* S2E: [https://github.com/S2E](https://github.com/S2E) -* + * QSYM: [https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym) + * Manticore: [https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore) + * S2E: [https://github.com/S2E](https://github.com/S2E) + * ... please send me any missing that are good ## Closing words -That's it! News, corrections, updates? Send an email to vh@thc.org + That's it! News, corrections, updates? Send an email to vh@thc.org -- cgit 1.4.1 From d3dcc352da80929d2dfffc853a4aecd313175cb8 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Tue, 28 Jan 2020 09:17:55 +0000 Subject: First tests with LLVM 11 --- llvm_mode/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile index ebe6b9de..e952e5fb 100644 --- a/llvm_mode/Makefile +++ b/llvm_mode/Makefile @@ -29,14 +29,14 @@ ifeq "$(shell uname)" "OpenBSD" LLVM_CONFIG ?= $(BIN_PATH)/llvm-config HAS_OPT = $(shell test -x $(BIN_PATH)/opt && echo 0 || echo 1) ifeq "$(HAS_OPT)" "1" - $(error llvm_mode needs a complete llvm installation (versions 3.8.0 up to 10) -> e.g. "pkg_add llvm-7.0.1p9") + $(error llvm_mode needs a complete llvm installation (versions 3.8.0 up to 11) -> e.g. "pkg_add llvm-7.0.1p9") endif else LLVM_CONFIG ?= llvm-config endif LLVMVER = $(shell $(LLVM_CONFIG) --version 2>/dev/null ) -LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[1-9]' && echo 1 || echo 0 ) +LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[2-9]' && echo 1 || echo 0 ) LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 ) LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//') LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null) @@ -48,7 +48,7 @@ ifeq "$(LLVMVER)" "" endif ifeq "$(LLVM_UNSUPPORTED)" "1" - $(warning llvm_mode only supports llvm versions 3.8.0 up to 10) + $(warning llvm_mode only supports llvm versions 3.8.0 up to 11) endif ifeq "$(LLVM_MAJOR)" "9" -- cgit 1.4.1 From 465033b04a4ebfb7693925303620613a8d4a223e Mon Sep 17 00:00:00 2001 From: van Hauser Date: Tue, 28 Jan 2020 11:00:51 +0100 Subject: bump llvm version --- README.md | 8 ++++---- llvm_mode/README.md | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 2edca8af..dc43d5d2 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ get any feature improvements since November 2017. Among other changes afl++ has a more performant llvm_mode, supports - llvm up to version 10, QEMU 3.1, more speed and crashfixes for QEMU, + llvm up to version 11, QEMU 3.1, more speed and crashfixes for QEMU, better *BSD and Android support and much, much more. Additionally the following features and patches have been integrated: @@ -204,7 +204,7 @@ superior to blind fuzzing or coverage-only tools. PLEASE NOTE: llvm_mode compilation with afl-clang-fast/afl-clang-fast++ instead of afl-gcc/afl-g++ is much faster and has a few cool features. See llvm_mode/ - however few code does not compile with llvm. -We support llvm versions 3.8.0 to 10. +We support llvm versions 3.8.0 to 11. When source code is available, instrumentation can be injected by a companion tool that works as a drop-in replacement for gcc or clang in any standard build @@ -227,7 +227,7 @@ For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`. The clang wrappers (afl-clang and afl-clang++) can be used in the same way; clang users may also opt to leverage a higher-performance instrumentation mode, as described in [llvm_mode/README.md](llvm_mode/README.md). -Clang/LLVM has a much better performance and works with LLVM version 3.8.0 to 10. +Clang/LLVM has a much better performance and works with LLVM version 3.8.0 to 11. Using the LAF Intel performance enhancements are also recommended, see [llvm_mode/README.laf-intel.md](llvm_mode/README.laf-intel.md) @@ -272,7 +272,7 @@ $ ./build_qemu_support.sh For additional instructions and caveats, see [qemu_mode/README.md](qemu_mode/README.md). The mode is approximately 2-5x slower than compile-time instrumentation, is -less conductive to parallelization, and may have some other quirks. +less conducive to parallelization, and may have some other quirks. If [afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) works for your binary, then you can use afl-fuzz normally and it will have twice diff --git a/llvm_mode/README.md b/llvm_mode/README.md index 150d1a17..54788aba 100644 --- a/llvm_mode/README.md +++ b/llvm_mode/README.md @@ -5,7 +5,7 @@ ## 1) Introduction -! llvm_mode works with llvm versions 3.8.0 up to 10 ! +! llvm_mode works with llvm versions 3.8.0 up to 11 ! The code in this directory allows you to instrument programs for AFL using true compiler-level instrumentation, instead of the more crude -- cgit 1.4.1 From bb88d98ff8f8f1b1a434643ccd30dcd48b529a64 Mon Sep 17 00:00:00 2001 From: van Hauser Date: Tue, 28 Jan 2020 19:23:04 +0100 Subject: android: prefer bigcores --- docs/ChangeLog | 2 ++ llvm_mode/LLVMInsTrim.so.cc | 7 ++++--- src/afl-fuzz-init.c | 38 ++++++++++++++++++++++++++++---------- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/docs/ChangeLog b/docs/ChangeLog index c1d53379..5017a803 100644 --- a/docs/ChangeLog +++ b/docs/ChangeLog @@ -21,10 +21,12 @@ Version ++2.60d (develop): - afl-fuzz: - now prints the real python version support compiled in - set stronger performance compile options and little tweaks + - Android: prefer bigcores when selecting a CPU - afl-clang-fast: - show in the help output for which llvm version it was compiled for - now does not need to be recompiled between trace-pc and pass instrumentation. compile normally and set AFL_LLVM_USE_TRACE_PC :) + - llvm 11 is supported - afl-cmin is now a sh script (invoking awk) instead of bash for portability the original script is still present as afl-cmin.bash - added blacklist and whitelisting function check in all modules of llvm_mode diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc index 24df6d42..39b2dedd 100644 --- a/llvm_mode/LLVMInsTrim.so.cc +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -190,7 +190,8 @@ struct InsTrim : public ModulePass { if (instFilename.str().empty()) { - /* If the original location is empty, try using the inlined location */ + /* If the original location is empty, try using the inlined location + */ DILocation *oDILoc = cDILoc->getInlinedAt(); if (oDILoc) { @@ -424,8 +425,8 @@ struct InsTrim : public ModulePass { } - OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n", - total_instr, total_rs, total_hs, + OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n", total_instr, + total_rs, total_hs, getenv("AFL_HARDEN") ? "hardened" : ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c index 2ef2c4e7..6efa6227 100644 --- a/src/afl-fuzz-init.c +++ b/src/afl-fuzz-init.c @@ -184,11 +184,21 @@ void bind_to_free_cpu(void) { "For this platform we do not have free CPU binding code yet. If possible, please supply a PR to https://github.com/vanhauser-thc/AFLplusplus" #endif - for (i = 0; i < cpu_core_count; ++i) - if (!cpu_used[i]) break; + size_t cpu_start = 0; + try: +#ifndef __ANDROID__ + for (i = cpu_start; i < cpu_core_count; i++) + if (!cpu_used[i]) break; if (i == cpu_core_count) { +#else + for (i = cpu_core_count - cpu_start - 1; i > -1; i--) + if (!cpu_used[i]) break; + if (i == -1) { + +#endif + SAYF("\n" cLRD "[-] " cRST "Uh-oh, looks like all %d CPU cores on your system are allocated to\n" " other instances of afl-fuzz (or similar CPU-locked tasks). " @@ -197,12 +207,11 @@ void bind_to_free_cpu(void) { "you are\n" " absolutely sure, you can set AFL_NO_AFFINITY and try again.\n", cpu_core_count); - FATAL("No more free CPU cores"); } - OKF("Found a free CPU core, binding to #%u.", i); + OKF("Found a free CPU core, try binding to #%u.", i); cpu_aff = i; @@ -212,22 +221,31 @@ void bind_to_free_cpu(void) { #elif defined(__NetBSD__) c = cpuset_create(); if (c == NULL) PFATAL("cpuset_create failed"); - cpuset_set(i, c); #endif #if defined(__linux__) - if (sched_setaffinity(0, sizeof(c), &c)) PFATAL("sched_setaffinity failed"); + if (sched_setaffinity(0, sizeof(c), &c)) { + + if (cpu_start == cpu_core_count) + PFATAL("sched_setaffinity failed for CPU %d, exit", i); + WARNF("sched_setaffinity failed to CPU %d, trying next CPU", i); + cpu_start++; + goto try + ; + + } + #elif defined(__FreeBSD__) || defined(__DragonFly__) if (pthread_setaffinity_np(pthread_self(), sizeof(c), &c)) PFATAL("pthread_setaffinity failed"); #elif defined(__NetBSD__) - if (pthread_setaffinity_np(pthread_self(), cpuset_size(c), c)) - PFATAL("pthread_setaffinity failed"); +if (pthread_setaffinity_np(pthread_self(), cpuset_size(c), c)) + PFATAL("pthread_setaffinity failed"); - cpuset_destroy(c); +cpuset_destroy(c); #else - // this will need something for other platforms +// this will need something for other platforms #endif } -- cgit 1.4.1 From b13bb64c3b0fb938e7807ab999cbb79906a8c2a4 Mon Sep 17 00:00:00 2001 From: hexcoder Date: Tue, 28 Jan 2020 23:15:06 +0100 Subject: replace -maxdepth with posix -prune (portability) --- afl-cmin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/afl-cmin b/afl-cmin index e9d713aa..9179628e 100755 --- a/afl-cmin +++ b/afl-cmin @@ -330,7 +330,7 @@ BEGIN { } else { stat_format = "-f '%z %N'" # *BSD, MacOS } - cmdline = "cd "in_dir" && find . -maxdepth 1 -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" + cmdline = "cd "in_dir" && find . \\( ! -name . -a -type d -prune \\) -o -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" while (cmdline | getline) { infilesSmallToBig[i++] = $0 } -- cgit 1.4.1 From ceed66930ef15922cd25e70a4770eaa31309e0ce Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Thu, 30 Jan 2020 21:32:08 +0100 Subject: lower requirements for lower llvm/clang versions 3.7.1 works with the exception of InsTrim, 3.8.1 and above is ok --- llvm_mode/LLVMInsTrim.so.cc | 61 ++++++++++++- llvm_mode/Makefile | 4 +- llvm_mode/MarkNodes.cc | 11 +++ llvm_mode/afl-llvm-pass.so.cc | 71 +++++++++++++-- llvm_mode/compare-transform-pass.so.cc | 58 +++++++++++- llvm_mode/split-compares-pass.so.cc | 156 ++++++++++++++++++++++++--------- llvm_mode/split-switches-pass.so.cc | 90 +++++++++++++++++-- 7 files changed, 387 insertions(+), 64 deletions(-) diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc index 39b2dedd..5b7b79e1 100644 --- a/llvm_mode/LLVMInsTrim.so.cc +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -3,10 +3,23 @@ #include #include +#include "llvm/Config/llvm-config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 +typedef long double max_align_t; +#endif + #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/DebugInfo.h" +#else +#include "llvm/Support/CFG.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/DebugInfo.h" +#endif #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LegacyPassManager.h" @@ -16,9 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" #include #include #include @@ -97,7 +108,7 @@ struct InsTrim : public ModulePass { // ripped from aflgo static bool isBlacklisted(const Function *F) { - static const SmallVector Blacklist = { + static const char *Blacklist[] = { "asan.", "llvm.", @@ -173,6 +184,8 @@ struct InsTrim : public ModulePass { StringRef instFilename; unsigned int instLine = 0; +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) for (auto &BB : F) { BasicBlock::iterator IP = BB.getFirstInsertionPt(); @@ -227,6 +240,48 @@ struct InsTrim : public ModulePass { } +#else + for (auto &BB : F) { + + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + if (Loc.isUnknown()) Loc = IP->getDebugLoc(); + + } + + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + instLine = cDILoc.getLineNumber(); + instFilename = cDILoc.getFilename(); + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif /* Either we couldn't figure out our location or the location is * not whitelisted, so we skip instrumentation. */ if (!instrumentBlock) { diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile index e952e5fb..50b1d48c 100644 --- a/llvm_mode/Makefile +++ b/llvm_mode/Makefile @@ -36,7 +36,7 @@ else endif LLVMVER = $(shell $(LLVM_CONFIG) --version 2>/dev/null ) -LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[2-9]' && echo 1 || echo 0 ) +LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-3]|^1[2-9]' && echo 1 || echo 0 ) LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 ) LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//') LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null) @@ -201,7 +201,7 @@ endif ln -sf afl-clang-fast ../afl-clang-fast++ ../libLLVMInsTrim.so: LLVMInsTrim.so.cc MarkNodes.cc | test_deps - $(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL) + -$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL) ../afl-llvm-pass.so: afl-llvm-pass.so.cc | test_deps $(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL) diff --git a/llvm_mode/MarkNodes.cc b/llvm_mode/MarkNodes.cc index caa8cede..7b22bac0 100644 --- a/llvm_mode/MarkNodes.cc +++ b/llvm_mode/MarkNodes.cc @@ -3,11 +3,22 @@ #include #include #include + +#include "llvm/Config/llvm-config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 +typedef long double max_align_t; +#endif + #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/BasicBlock.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) #include "llvm/IR/CFG.h" +#else +#include "llvm/Support/CFG.h" +#endif #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc index 15cc6127..2cd23adf 100644 --- a/llvm_mode/afl-llvm-pass.so.cc +++ b/llvm_mode/afl-llvm-pass.so.cc @@ -37,14 +37,26 @@ #include #include -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/BasicBlock.h" +#include "llvm/Config/llvm-config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 +typedef long double max_align_t; +#endif + #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/CFG.h" +#else +#include "llvm/DebugInfo.h" +#include "llvm/Support/CFG.h" +#endif using namespace llvm; @@ -78,7 +90,7 @@ class AFLCoverage : public ModulePass { // ripped from aflgo static bool isBlacklisted(const Function *F) { - static const SmallVector Blacklist = { + static const char *Blacklist[] = { "asan.", "llvm.", @@ -197,6 +209,8 @@ bool AFLCoverage::runOnModule(Module &M) { * For now, just instrument the block if we are not able * to determine our location. */ DebugLoc Loc = IP->getDebugLoc(); +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) if (Loc) { DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); @@ -249,6 +263,47 @@ bool AFLCoverage::runOnModule(Module &M) { } +#else + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + unsigned int instLine = cDILoc.getLineNumber(); + StringRef instFilename = cDILoc.getFilename(); + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif + /* Either we couldn't figure out our location or the location is * not whitelisted, so we skip instrumentation. */ if (!instrumentBlock) continue; @@ -273,13 +328,19 @@ bool AFLCoverage::runOnModule(Module &M) { // result: a little more speed and less map pollution int more_than_one = -1; // fprintf(stderr, "BB %u: ", cur_loc); - for (BasicBlock *Pred : predecessors(&BB)) { + for (pred_iterator PI = pred_begin(&BB), E = pred_end(&BB); PI != E; + ++PI) { + + BasicBlock *Pred = *PI; int count = 0; if (more_than_one == -1) more_than_one = 0; // fprintf(stderr, " %p=>", Pred); - for (BasicBlock *Succ : successors(Pred)) { + for (succ_iterator SI = succ_begin(Pred), E = succ_end(Pred); SI != E; + ++SI) { + + BasicBlock *Succ = *SI; // if (count > 0) // fprintf(stderr, "|"); diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc index 5d924b63..e1332a9d 100644 --- a/llvm_mode/compare-transform-pass.so.cc +++ b/llvm_mode/compare-transform-pass.so.cc @@ -22,9 +22,9 @@ #include #include #include +#include "llvm/Config/llvm-config.h" #include "llvm/ADT/Statistic.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" @@ -32,10 +32,19 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) +#include "llvm/IR/Verifier.h" +#include "llvm/IR/DebugInfo.h" +#else +#include "llvm/Analysis/Verifier.h" +#include "llvm/DebugInfo.h" +#define nullptr 0 +#endif + #include using namespace llvm; @@ -115,7 +124,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, c = M.getOrInsertFunction("tolower", Int32Ty, Int32Ty #if LLVM_VERSION_MAJOR < 5 , - nullptr + NULL #endif ); #if LLVM_VERSION_MAJOR < 9 @@ -140,6 +149,8 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, * For now, just instrument the block if we are not able * to determine our location. */ DebugLoc Loc = IP->getDebugLoc(); +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) if (Loc) { DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); @@ -192,6 +203,47 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, } +#else + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + unsigned int instLine = cDILoc.getLineNumber(); + StringRef instFilename = cDILoc.getFilename(); + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif + /* Either we couldn't figure out our location or the location is * not whitelisted, so we skip instrumentation. */ if (!instrumentBlock) continue; diff --git a/llvm_mode/split-compares-pass.so.cc b/llvm_mode/split-compares-pass.so.cc index bc25b322..e16993d6 100644 --- a/llvm_mode/split-compares-pass.so.cc +++ b/llvm_mode/split-compares-pass.so.cc @@ -24,16 +24,25 @@ #include #include +#include "llvm/Config/llvm-config.h" + #include "llvm/Pass.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/Support/raw_ostream.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/Verifier.h" #include "llvm/IR/Module.h" #include "llvm/IR/IRBuilder.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) +#include "llvm/IR/Verifier.h" +#include "llvm/IR/DebugInfo.h" +#else +#include "llvm/Analysis/Verifier.h" +#include "llvm/DebugInfo.h" +#define nullptr 0 +#endif using namespace llvm; @@ -66,7 +75,7 @@ class SplitComparesTransform : public ModulePass { static bool isBlacklisted(const Function *F) { - static const SmallVector Blacklist = { + static const char *Blacklist[] = { "asan.", "llvm.", "sancov.", "__ubsan_handle_", "ign." @@ -139,6 +148,8 @@ bool SplitComparesTransform::simplifyCompares(Module &M) { * For now, just instrument the block if we are not able * to determine our location. */ DebugLoc Loc = IP->getDebugLoc(); +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) if (Loc) { DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); @@ -191,6 +202,47 @@ bool SplitComparesTransform::simplifyCompares(Module &M) { } +#else + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + unsigned int instLine = cDILoc.getLineNumber(); + StringRef instFilename = cDILoc.getFilename(); + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif + /* Either we couldn't figure out our location or the location is * not whitelisted, so we skip instrumentation. */ if (!instrumentBlock) continue; @@ -283,7 +335,8 @@ bool SplitComparesTransform::simplifyCompares(Module &M) { * block bb it is now at the position where the old IcmpInst was */ Instruction *icmp_np; icmp_np = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1); - bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_np); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_np); /* create a new basic block which holds the new EQ icmp */ Instruction *icmp_eq; @@ -348,7 +401,8 @@ bool SplitComparesTransform::simplifyCompares(Module &M) { * block bb it is now at the position where the old IcmpInst was */ Instruction *fcmp_np; fcmp_np = CmpInst::Create(Instruction::FCmp, new_pred, op0, op1); - bb->getInstList().insert(bb->getTerminator()->getIterator(), fcmp_np); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + fcmp_np); /* create a new basic block which holds the new EQ fcmp */ Instruction *fcmp_eq; @@ -469,20 +523,21 @@ bool SplitComparesTransform::simplifyIntSignedness(Module &M) { s_op0 = BinaryOperator::Create(Instruction::LShr, op0, ConstantInt::get(IntType, bitw - 1)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0); t_op0 = new TruncInst(s_op0, Int1Ty); - bb->getInstList().insert(bb->getTerminator()->getIterator(), t_op0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op0); s_op1 = BinaryOperator::Create(Instruction::LShr, op1, ConstantInt::get(IntType, bitw - 1)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1); t_op1 = new TruncInst(s_op1, Int1Ty); - bb->getInstList().insert(bb->getTerminator()->getIterator(), t_op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op1); /* compare of the sign bits */ icmp_sign_bit = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_op0, t_op1); - bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_sign_bit); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_sign_bit); /* create a new basic block which is executed if the signedness bit is * different */ @@ -557,6 +612,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { LLVMContext &C = M.getContext(); +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) const DataLayout &dl = M.getDataLayout(); /* define unions with floating point and (sign, exponent, mantissa) triples @@ -571,6 +628,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { } +#endif + std::vector fcomps; /* get all EQ, NE, GT, and LT fcmps. if the other two @@ -669,11 +728,11 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction *b_op0, *b_op1; b_op0 = CastInst::Create(Instruction::BitCast, op0, IntegerType::get(C, op_size)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), b_op0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), b_op0); b_op1 = CastInst::Create(Instruction::BitCast, op1, IntegerType::get(C, op_size)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), b_op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), b_op1); /* isolate signs of value of floating point type */ @@ -684,21 +743,22 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { s_s0 = BinaryOperator::Create(Instruction::LShr, b_op0, ConstantInt::get(b_op0->getType(), op_size - 1)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_s0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_s0); t_s0 = new TruncInst(s_s0, Int1Ty); - bb->getInstList().insert(bb->getTerminator()->getIterator(), t_s0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_s0); s_s1 = BinaryOperator::Create(Instruction::LShr, b_op1, ConstantInt::get(b_op1->getType(), op_size - 1)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_s1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_s1); t_s1 = new TruncInst(s_s1, Int1Ty); - bb->getInstList().insert(bb->getTerminator()->getIterator(), t_s1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_s1); /* compare of the sign bits */ icmp_sign_bit = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_s0, t_s1); - bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_sign_bit); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_sign_bit); /* create a new basic block which is executed if the signedness bits are * equal */ @@ -730,16 +790,16 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction::LShr, b_op1, ConstantInt::get(b_op1->getType(), shiftR_exponent)); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), s_e0); + BasicBlock::iterator(signequal_bb->getTerminator()), s_e0); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), s_e1); + BasicBlock::iterator(signequal_bb->getTerminator()), s_e1); t_e0 = new TruncInst(s_e0, IntExponentTy); t_e1 = new TruncInst(s_e1, IntExponentTy); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), t_e0); + BasicBlock::iterator(signequal_bb->getTerminator()), t_e0); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), t_e1); + BasicBlock::iterator(signequal_bb->getTerminator()), t_e1); if (sizeInBits - precision < exTySizeBytes * 8) { @@ -750,9 +810,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction::And, t_e1, ConstantInt::get(t_e1->getType(), mask_exponent)); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), m_e0); + BasicBlock::iterator(signequal_bb->getTerminator()), m_e0); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), m_e1); + BasicBlock::iterator(signequal_bb->getTerminator()), m_e1); } else { @@ -780,7 +840,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_exponent = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, m_e0, m_e1); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), icmp_exponent); + BasicBlock::iterator(signequal_bb->getTerminator()), icmp_exponent); icmp_exponent_result = BinaryOperator::Create(Instruction::Xor, icmp_exponent, t_s0); break; @@ -789,7 +849,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_exponent = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, m_e0, m_e1); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), icmp_exponent); + BasicBlock::iterator(signequal_bb->getTerminator()), icmp_exponent); icmp_exponent_result = BinaryOperator::Create(Instruction::Xor, icmp_exponent, t_s0); break; @@ -798,7 +858,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { } signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), icmp_exponent_result); + BasicBlock::iterator(signequal_bb->getTerminator()), + icmp_exponent_result); { @@ -822,19 +883,19 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { m_f1 = BinaryOperator::Create( Instruction::And, b_op1, ConstantInt::get(b_op1->getType(), mask_fraction)); - middle_bb->getInstList().insert(middle_bb->getTerminator()->getIterator(), - m_f0); - middle_bb->getInstList().insert(middle_bb->getTerminator()->getIterator(), - m_f1); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), m_f0); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), m_f1); if (needTrunc) { t_f0 = new TruncInst(m_f0, IntFractionTy); t_f1 = new TruncInst(m_f1, IntFractionTy); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), t_f0); + BasicBlock::iterator(middle_bb->getTerminator()), t_f0); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), t_f1); + BasicBlock::iterator(middle_bb->getTerminator()), t_f1); } else { @@ -850,9 +911,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { t_f0 = new TruncInst(b_op0, IntFractionTy); t_f1 = new TruncInst(b_op1, IntFractionTy); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), t_f0); + BasicBlock::iterator(middle_bb->getTerminator()), t_f0); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), t_f1); + BasicBlock::iterator(middle_bb->getTerminator()), t_f1); } else { @@ -882,7 +943,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_fraction = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), icmp_fraction); + BasicBlock::iterator(middle_bb->getTerminator()), icmp_fraction); icmp_fraction_result = BinaryOperator::Create(Instruction::Xor, icmp_fraction, t_s0); break; @@ -891,7 +952,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_fraction = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), icmp_fraction); + BasicBlock::iterator(middle_bb->getTerminator()), icmp_fraction); icmp_fraction_result = BinaryOperator::Create(Instruction::Xor, icmp_fraction, t_s0); break; @@ -899,8 +960,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { } - middle_bb->getInstList().insert(middle_bb->getTerminator()->getIterator(), - icmp_fraction_result); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), icmp_fraction_result); PHINode *PN = PHINode::Create(Int1Ty, 3, ""); @@ -1037,18 +1098,21 @@ size_t SplitComparesTransform::splitIntCompares(Module &M, unsigned bitw) { s_op0 = BinaryOperator::Create(Instruction::LShr, op0, ConstantInt::get(OldIntType, bitw / 2)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0); op0_high = new TruncInst(s_op0, NewIntType); - bb->getInstList().insert(bb->getTerminator()->getIterator(), op0_high); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + op0_high); s_op1 = BinaryOperator::Create(Instruction::LShr, op1, ConstantInt::get(OldIntType, bitw / 2)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1); op1_high = new TruncInst(s_op1, NewIntType); - bb->getInstList().insert(bb->getTerminator()->getIterator(), op1_high); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + op1_high); icmp_high = CmpInst::Create(Instruction::ICmp, pred, op0_high, op1_high); - bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_high); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_high); /* now we have to destinguish between == != and > < */ if (pred == CmpInst::ICMP_EQ || pred == CmpInst::ICMP_NE) { @@ -1194,13 +1258,19 @@ bool SplitComparesTransform::runOnModule(Module &M) { << "bit: " << splitIntCompares(M, bitw) << " splitted\n"; bitw >>= 1; +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) [[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */ +#endif case 32: errs() << "Split-integer-compare-pass " << bitw << "bit: " << splitIntCompares(M, bitw) << " splitted\n"; bitw >>= 1; +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) [[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */ +#endif case 16: errs() << "Split-integer-compare-pass " << bitw << "bit: " << splitIntCompares(M, bitw) << " splitted\n"; diff --git a/llvm_mode/split-switches-pass.so.cc b/llvm_mode/split-switches-pass.so.cc index 3a2838c0..9101dc26 100644 --- a/llvm_mode/split-switches-pass.so.cc +++ b/llvm_mode/split-switches-pass.so.cc @@ -23,8 +23,9 @@ #include #include +#include "llvm/Config/llvm-config.h" + #include "llvm/ADT/Statistic.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" @@ -32,10 +33,20 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) +#include "llvm/IR/Verifier.h" +#include "llvm/IR/DebugInfo.h" +#else +#include "llvm/Analysis/Verifier.h" +#include "llvm/DebugInfo.h" +#define nullptr 0 +#endif + #include using namespace llvm; @@ -69,7 +80,7 @@ class SplitSwitchesTransform : public ModulePass { static bool isBlacklisted(const Function *F) { - static const SmallVector Blacklist = { + static const char *Blacklist[] = { "asan.", "llvm.", "sancov.", "__ubsan_handle_", "ign." @@ -140,7 +151,7 @@ BasicBlock *SplitSwitchesTransform::switchConvert( IntegerType * ByteType = IntegerType::get(OrigBlock->getContext(), 8); unsigned BytesInValue = bytesChecked.size(); std::vector setSizes; - std::vector> byteSets(BytesInValue, std::set()); + std::vector > byteSets(BytesInValue, std::set()); assert(ValTypeBitWidth >= 8 && ValTypeBitWidth <= 64); @@ -213,8 +224,25 @@ BasicBlock *SplitSwitchesTransform::switchConvert( NewNode->getInstList().push_back(Comp); bytesChecked[smallestIndex] = true; - if (std::all_of(bytesChecked.begin(), bytesChecked.end(), - [](bool b) { return b; })) { + bool allBytesAreChecked = true; + + for (std::vector::iterator BCI = bytesChecked.begin(), + E = bytesChecked.end(); + BCI != E; ++BCI) { + + if (!*BCI) { + + allBytesAreChecked = false; + break; + + } + + } + + // if (std::all_of(bytesChecked.begin(), bytesChecked.end(), + // [](bool b) { return b; })) { + + if (allBytesAreChecked) { assert(Cases.size() == 1); BranchInst::Create(Cases[0].BB, NewDefault, Comp, NewNode); @@ -306,6 +334,10 @@ BasicBlock *SplitSwitchesTransform::switchConvert( bool SplitSwitchesTransform::splitSwitches(Module &M) { +#if (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7) + LLVMContext &C = M.getContext(); +#endif + std::vector switches; /* iterate over all functions, bbs and instruction and add @@ -327,6 +359,8 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) { * For now, just instrument the block if we are not able * to determine our location. */ DebugLoc Loc = IP->getDebugLoc(); +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) if (Loc) { DILocation *cDILoc = dyn_cast(Loc.getAsMDNode()); @@ -379,6 +413,47 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) { } +#else + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + unsigned int instLine = cDILoc.getLineNumber(); + StringRef instFilename = cDILoc.getFilename(); + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif + /* Either we couldn't figure out our location or the location is * not whitelisted, so we skip instrumentation. */ if (!instrumentBlock) continue; @@ -426,8 +501,7 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) { * if the default block is set as an unreachable we avoid creating one * because will never be a valid target.*/ BasicBlock *NewDefault = nullptr; - NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); - NewDefault->insertInto(F, Default); + NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault", F, Default); BranchInst::Create(Default, NewDefault); /* Prepare cases vector. */ -- cgit 1.4.1 From b050c1158398dd07e25a6cd65234da84e5656fa6 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Thu, 30 Jan 2020 21:50:57 +0100 Subject: for partial functionality ignore the LLVMInsTrim build result --- llvm_mode/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile index 50b1d48c..5f65b55e 100644 --- a/llvm_mode/Makefile +++ b/llvm_mode/Makefile @@ -36,7 +36,7 @@ else endif LLVMVER = $(shell $(LLVM_CONFIG) --version 2>/dev/null ) -LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-3]|^1[2-9]' && echo 1 || echo 0 ) +LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[2-9]' && echo 1 || echo 0 ) LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 ) LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//') LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null) -- cgit 1.4.1