From d6beac5235118b251deb18a6579aab85271eaa7b Mon Sep 17 00:00:00 2001 From: Andrea Fioraldi Date: Mon, 29 Jul 2019 16:09:28 +0200 Subject: compcov levels to enable the instrumentation of only immediates --- qemu_mode/libcompcov/README.compcov | 8 ++++-- qemu_mode/libcompcov/libcompcov.so.c | 54 ++++++++++++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 7 deletions(-) (limited to 'qemu_mode/libcompcov') diff --git a/qemu_mode/libcompcov/README.compcov b/qemu_mode/libcompcov/README.compcov index 2a4a0ee5..9be13d88 100644 --- a/qemu_mode/libcompcov/README.compcov +++ b/qemu_mode/libcompcov/README.compcov @@ -18,15 +18,19 @@ For optimized binaries this is an issue, those functions are often inlined and this module is not capable to log the coverage in this case. If you have the source code of the fuzzing target you should nto use this -library and QEMU but build ot with afl-clang-fast and the laf-intel options. +library and QEMU but build it with afl-clang-fast and the laf-intel options. To use this library make sure to preload it with AFL_PRELOAD. export AFL_PRELOAD=/path/to/libcompcov.so - export AFL_QEMU_COMPCOV=1 + export AFL_COMPCOV_LEVEL=1 afl-fuzz -Q -i input -o output -- +The AFL_COMPCOV_LEVEL tells to QEMU and libcompcov how to log comaprisons. +Level 1 logs just comparison with immediates / read-only memory and level 2 +logs all the comparisons. + The library make use of https://github.com/ouadev/proc_maps_parser and so it is Linux specific. However this is not a strict dependency, other UNIX operating systems can be supported simply replacing the code related to the diff --git a/qemu_mode/libcompcov/libcompcov.so.c b/qemu_mode/libcompcov/libcompcov.so.c index 582230db..92e4dbaa 100644 --- a/qemu_mode/libcompcov/libcompcov.so.c +++ b/qemu_mode/libcompcov/libcompcov.so.c @@ -45,6 +45,8 @@ static void *__compcov_code_start, static u8 *__compcov_afl_map; +static u32 __compcov_level; + static int (*__libc_strcmp)(const char*, const char*); static int (*__libc_strncmp)(const char*, const char*, size_t); static int (*__libc_strcasecmp)(const char*, const char*); @@ -54,6 +56,28 @@ static int (*__libc_memcmp)(const void*, const void*, size_t); static int debug_fd = -1; +#define MAX_MAPPINGS 1024 + +static struct mapping { + void *st, *en; +} __compcov_ro[MAX_MAPPINGS]; + +static u32 __compcov_ro_cnt; + + +/* Check an address against the list of read-only mappings. */ + +static u8 __compcov_is_ro(const void* ptr) { + + u32 i; + + for (i = 0; i < __compcov_ro_cnt; i++) + if (ptr >= __compcov_ro[i].st && ptr <= __compcov_ro[i].en) return 1; + + return 0; +} + + static size_t __strlen2(const char *s1, const char *s2, size_t max_length) { // from https://github.com/googleprojectzero/CompareCoverage @@ -71,6 +95,15 @@ static void __compcov_load(void) { __libc_strcasecmp = dlsym(RTLD_NEXT, "strcasecmp"); __libc_strncasecmp = dlsym(RTLD_NEXT, "strncasecmp"); __libc_memcmp = dlsym(RTLD_NEXT, "memcmp"); + + if (getenv("AFL_QEMU_COMPCOV")) { + + __compcov_level = 1; + } + if (getenv("AFL_COMPCOV_LEVEL")) { + + __compcov_level = atoi(getenv("AFL_COMPCOV_LEVEL")); + } char *id_str = getenv(SHM_ENV_VAR); int shm_id; @@ -110,6 +143,12 @@ static void __compcov_load(void) { __compcov_code_end = maps_tmp->addr_end; } } + + if ((maps_tmp->is_w && !maps_tmp->is_r) || __compcov_ro_cnt == MAX_MAPPINGS) + continue; + + __compcov_ro[__compcov_ro_cnt].st = maps_tmp->addr_start; + __compcov_ro[__compcov_ro_cnt].en = maps_tmp->addr_end; } pmparser_free(maps); @@ -149,7 +188,8 @@ int strcmp(const char* str1, const char* str2) { void* retaddr = __builtin_return_address(0); - if (__compcov_is_in_bound(retaddr)) { + if (__compcov_is_in_bound(retaddr) && !(__compcov_level < 2 && + !__compcov_is_ro(str1) && !__compcov_is_ro(str2))) { size_t n = __strlen2(str1, str2, MAX_CMP_LENGTH +1); @@ -173,7 +213,8 @@ int strncmp(const char* str1, const char* str2, size_t len) { void* retaddr = __builtin_return_address(0); - if (__compcov_is_in_bound(retaddr)) { + if (__compcov_is_in_bound(retaddr) && !(__compcov_level < 2 && + !__compcov_is_ro(str1) && !__compcov_is_ro(str2))) { size_t n = __strlen2(str1, str2, MAX_CMP_LENGTH +1); n = MIN(n, len); @@ -198,7 +239,8 @@ int strcasecmp(const char* str1, const char* str2) { void* retaddr = __builtin_return_address(0); - if (__compcov_is_in_bound(retaddr)) { + if (__compcov_is_in_bound(retaddr) && !(__compcov_level < 2 && + !__compcov_is_ro(str1) && !__compcov_is_ro(str2))) { /* Fallback to strcmp, maybe improve in future */ size_t n = __strlen2(str1, str2, MAX_CMP_LENGTH +1); @@ -223,7 +265,8 @@ int strncasecmp(const char* str1, const char* str2, size_t len) { void* retaddr = __builtin_return_address(0); - if (__compcov_is_in_bound(retaddr)) { + if (__compcov_is_in_bound(retaddr) && !(__compcov_level < 2 && + !__compcov_is_ro(str1) && !__compcov_is_ro(str2))) { /* Fallback to strncmp, maybe improve in future */ size_t n = __strlen2(str1, str2, MAX_CMP_LENGTH +1); @@ -249,7 +292,8 @@ int memcmp(const void* mem1, const void* mem2, size_t len) { void* retaddr = __builtin_return_address(0); - if (__compcov_is_in_bound(retaddr)) { + if (__compcov_is_in_bound(retaddr) && !(__compcov_level < 2 && + !__compcov_is_ro(mem1) && !__compcov_is_ro(mem2))) { size_t n = len; -- cgit 1.4.1 From 7b36afd5f16894257c92695d200e59eb51d08e1c Mon Sep 17 00:00:00 2001 From: Andrea Fioraldi Date: Fri, 30 Aug 2019 11:38:33 +0200 Subject: modernize docs and readme for qemu and unicorn --- docs/unicorn_mode.txt | 109 --------------- qemu_mode/README.md | 137 +++++++++++++++++++ qemu_mode/README.qemu | 146 --------------------- qemu_mode/build_qemu_support.sh | 11 +- qemu_mode/libcompcov/libcompcov.so.c | 2 +- qemu_mode/patches/afl-qemu-common.h | 15 +-- qemu_mode/patches/afl-qemu-cpu-inl.h | 15 +-- qemu_mode/patches/afl-qemu-cpu-translate-inl.h | 17 ++- qemu_mode/patches/afl-qemu-tcg-inl.h | 15 +-- qemu_mode/patches/afl-qemu-translate-inl.h | 15 +-- unicorn_mode/README.md | 130 +++++++++++++++--- unicorn_mode/build_unicorn_support.sh | 12 +- unicorn_mode/patches/afl-unicorn-common.h | 18 +-- unicorn_mode/patches/afl-unicorn-cpu-inl.h | 18 +-- .../patches/afl-unicorn-cpu-translate-inl.h | 18 +-- unicorn_mode/patches/afl-unicorn-tcg-op-inl.h | 18 +-- unicorn_mode/patches/afl-unicorn-tcg-runtime-inl.h | 18 +-- unicorn_mode/samples/compcov_x64/COMPILE.md | 3 +- unicorn_mode/samples/compcov_x64/compcov_target.c | 2 +- unicorn_mode/samples/simple/COMPILE.md | 5 +- 20 files changed, 353 insertions(+), 371 deletions(-) delete mode 100644 docs/unicorn_mode.txt create mode 100644 qemu_mode/README.md delete mode 100644 qemu_mode/README.qemu (limited to 'qemu_mode/libcompcov') diff --git a/docs/unicorn_mode.txt b/docs/unicorn_mode.txt deleted file mode 100644 index b691fff8..00000000 --- a/docs/unicorn_mode.txt +++ /dev/null @@ -1,109 +0,0 @@ -========================================================= -Unicorn-based binary-only instrumentation for afl-fuzz -========================================================= - -1) Introduction ---------------- - -The code in ./unicorn_mode allows you to build a standalone feature that -leverages the Unicorn Engine and allows callers to obtain instrumentation -output for black-box, closed-source binary code snippets. This mechanism -can be then used by afl-fuzz to stress-test targets that couldn't be built -with afl-gcc or used in QEMU mode, or with other extensions such as -TriforceAFL. - -There is a significant performance penalty compared to native AFL, -but at least we're able to use AFL on these binaries, right? - -The idea and much of the implementation comes from Nathan Voss . - -2) How to use -------------- - -Requirements: you need an installed python2 environment. - -*** Building AFL's Unicorn Mode *** - -First, make afl as usual. -Once that completes successfully you need to build and add in the Unicorn Mode -features: - - $ cd unicorn_mode - $ ./build_unicorn_support.sh - -NOTE: This script downloads a recent Unicorn Engine commit that has been tested -and is stable-ish from the Unicorn github page. If you are offline, you'll need -to hack up this script a little bit and supply your own copy of Unicorn's latest -stable release. It's not very hard, just check out the beginning of the -build_unicorn_support.sh script and adjust as necessary. - -Building Unicorn will take a little bit (~5-10 minutes). Once it completes -it automatically compiles a sample application and verify that it works. - -*** Fuzzing with Unicorn Mode *** - -To really use unicorn-mode effectively you need to prepare the following: - - * Relevant binary code to be fuzzed - * Knowledge of the memory map and good starting state - * Folder containing sample inputs to start fuzzing with - - Same ideas as any other AFL inputs - - Quality/speed of results will depend greatly on quality of starting - samples - - See AFL's guidance on how to create a sample corpus - * Unicorn-based test harness which: - - Adds memory map regions - - Loads binary code into memory - - Emulates at least one instruction* - - Yeah, this is lame. See 'Gotchas' section below for more info - - Loads and verifies data to fuzz from a command-line specified file - - AFL will provide mutated inputs by changing the file passed to - the test harness - - Presumably the data to be fuzzed is at a fixed buffer address - - If input constraints (size, invalid bytes, etc.) are known they - should be checked after the file is loaded. If a constraint - fails, just exit the test harness. AFL will treat the input as - 'uninteresting' and move on. - - Sets up registers and memory state for beginning of test - - Emulates the interested code from beginning to end - - If a crash is detected, the test harness must 'crash' by - throwing a signal (SIGSEGV, SIGKILL, SIGABORT, etc.) - -Once you have all those things ready to go you just need to run afl-fuzz in -'unicorn-mode' by passing in the '-U' flag: - - $ afl-fuzz -U -m none -i /path/to/inputs -o /path/to/results -- ./test_harness @@ - -The normal afl-fuzz command line format applies to everything here. Refer to -AFL's main documentation for more info about how to use afl-fuzz effectively. - -For a much clearer vision of what all of this looks like, please refer to the -sample provided in the 'unicorn_mode/samples' directory. There is also a blog -post that goes over the basics at: - -https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf - -The 'helper_scripts' directory also contains several helper scripts that allow you -to dump context from a running process, load it, and hook heap allocations. For details -on how to use this check out the follow-up blog post to the one linked above. - -A example use of AFL-Unicorn mode is discussed in the Paper Unicorefuzz: -https://www.usenix.org/conference/woot19/presentation/maier - -3) Gotchas, feedback, bugs --------------------------- - -To make sure that AFL's fork server starts up correctly the Unicorn test -harness script must emulate at least one instruction before loading the -data that will be fuzzed from the input file. It doesn't matter what the -instruction is, nor if it is valid. This is an artifact of how the fork-server -is started and could likely be fixed with some clever re-arranging of the -patches applied to Unicorn. - -Running the build script builds Unicorn and its python bindings and installs -them on your system. This installation will supersede any existing Unicorn -installation with the patched afl-unicorn version. - -Refer to the unicorn_mode/samples/arm_example/arm_tester.c for an example -of how to do this properly! If you don't get this right, AFL will not -load any mutated inputs and your fuzzing will be useless! diff --git a/qemu_mode/README.md b/qemu_mode/README.md new file mode 100644 index 00000000..610f6860 --- /dev/null +++ b/qemu_mode/README.md @@ -0,0 +1,137 @@ +# High-performance binary-only instrumentation for afl-fuzz + + (See ../docs/README for the general instruction manual.) + +## 1) Introduction + +The code in this directory allows you to build a standalone feature that +leverages the QEMU "user emulation" mode and allows callers to obtain +instrumentation output for black-box, closed-source binaries. This mechanism +can be then used by afl-fuzz to stress-test targets that couldn't be built +with afl-gcc. + +The usual performance cost is 2-5x, which is considerably better than +seen so far in experiments with tools such as DynamoRIO and PIN. + +The idea and much of the initial implementation comes from Andrew Griffiths. +The actual implementation on QEMU 3 (shipped with afl++) is from +Andrea Fioraldi. Special thanks to abiondo that re-enabled TCG chaining. + +## 2) How to use + +The feature is implemented with a patch to QEMU 3.1.0. The simplest way +to build it is to run ./build_qemu_support.sh. The script will download, +configure, and compile the QEMU binary for you. + +QEMU is a big project, so this will take a while, and you may have to +resolve a couple of dependencies (most notably, you will definitely need +libtool and glib2-devel). + +Once the binaries are compiled, you can leverage the QEMU tool by calling +afl-fuzz and all the related utilities with -Q in the command line. + +Note that QEMU requires a generous memory limit to run; somewhere around +200 MB is a good starting point, but considerably more may be needed for +more complex programs. The default -m limit will be automatically bumped up +to 200 MB when specifying -Q to afl-fuzz; be careful when overriding this. + +In principle, if you set CPU_TARGET before calling ./build_qemu_support.sh, +you should get a build capable of running non-native binaries (say, you +can try CPU_TARGET=arm). This is also necessary for running 32-bit binaries +on a 64-bit system (CPU_TARGET=i386). + +Note: if you want the QEMU helper to be installed on your system for all +users, you need to build it before issuing 'make install' in the parent +directory. + +## 3) Options + +There is ./libcompcov/ which implements laf-intel (splitting memcmp, +strncmp, etc. to make these conditions easier solvable by afl-fuzz). +Highly recommended. + +The option that enables QEMU CompareCoverage is AFL_COMPCOV_LEVEL. +AFL_COMPCOV_LEVEL=1 is to instrument comparisons with only immediate +values / read-only memory. AFL_COMPCOV_LEVEL=2 instruments all +comparison instructions and memory comparison functions when libcompcov +is preloaded. Comparison instructions are currently instrumented only +on the x86 and x86_64 targets. + +Another option is the environment variable AFL_ENTRYPOINT which allows +move the forkserver to a different part, e.g. just before the file is +opened (e.g. way after command line parsing and config file loading, etc) +which can be a huge speed improvement. Note that the specified address +must be an address of a basic block. + +## 4) Notes on linking + +The feature is supported only on Linux. Supporting BSD may amount to porting +the changes made to linux-user/elfload.c and applying them to +bsd-user/elfload.c, but I have not looked into this yet. + +The instrumentation follows only the .text section of the first ELF binary +encountered in the linking process. It does not trace shared libraries. In +practice, this means two things: + + - Any libraries you want to analyze *must* be linked statically into the + executed ELF file (this will usually be the case for closed-source + apps). + + - Standard C libraries and other stuff that is wasteful to instrument + should be linked dynamically - otherwise, AFL will have no way to avoid + peeking into them. + +Setting AFL_INST_LIBS=1 can be used to circumvent the .text detection logic +and instrument every basic block encountered. + +## 5) Benchmarking + +If you want to compare the performance of the QEMU instrumentation with that of +afl-gcc compiled code against the same target, you need to build the +non-instrumented binary with the same optimization flags that are normally +injected by afl-gcc, and make sure that the bits to be tested are statically +linked into the binary. A common way to do this would be: + +$ CFLAGS="-O3 -funroll-loops" ./configure --disable-shared +$ make clean all + +Comparative measurements of execution speed or instrumentation coverage will be +fairly meaningless if the optimization levels or instrumentation scopes don't +match. + +## 6) Gotchas, feedback, bugs + +If you need to fix up checksums or do other cleanup on mutated test cases, see +experimental/post_library/ for a viable solution. + +Do not mix QEMU mode with ASAN, MSAN, or the likes; QEMU doesn't appreciate +the "shadow VM" trick employed by the sanitizers and will probably just +run out of memory. + +Compared to fully-fledged virtualization, the user emulation mode is *NOT* a +security boundary. The binaries can freely interact with the host OS. If you +somehow need to fuzz an untrusted binary, put everything in a sandbox first. + +QEMU does not necessarily support all CPU or hardware features that your +target program may be utilizing. In particular, it does not appear to have +full support for AVX2 / FMA3. Using binaries for older CPUs, or recompiling them +with -march=core2, can help. + +Beyond that, this is an early-stage mechanism, so fields reports are welcome. +You can send them to . + +## 7) Alternatives: static rewriting + +Statically rewriting binaries just once, instead of attempting to translate +them at run time, can be a faster alternative. That said, static rewriting is +fraught with peril, because it depends on being able to properly and fully model +program control flow without actually executing each and every code path. + +The best implementation is this one: + + https://github.com/vanhauser-thc/afl-dyninst + +The issue however is Dyninst which is not rewriting the binaries so that +they run stable. a lot of crashes happen, especially in C++ programs that +use throw/catch. Try it first, and if it works for you be happy as it is +2-3x as fast as qemu_mode. diff --git a/qemu_mode/README.qemu b/qemu_mode/README.qemu deleted file mode 100644 index cd8559ad..00000000 --- a/qemu_mode/README.qemu +++ /dev/null @@ -1,146 +0,0 @@ -========================================================= -High-performance binary-only instrumentation for afl-fuzz -========================================================= - - (See ../docs/README for the general instruction manual.) - -1) Introduction ---------------- - -The code in this directory allows you to build a standalone feature that -leverages the QEMU "user emulation" mode and allows callers to obtain -instrumentation output for black-box, closed-source binaries. This mechanism -can be then used by afl-fuzz to stress-test targets that couldn't be built -with afl-gcc. - -The usual performance cost is 2-5x, which is considerably better than -seen so far in experiments with tools such as DynamoRIO and PIN. - -The idea and much of the initial implementation comes from Andrew Griffiths. -The actual implementation on QEMU 3 (shipped with afl++) is from -Andrea Fioraldi. Special thanks to abiondo that re-enabled TCG chaining. - -2) How to use -------------- - -The feature is implemented with a patch to QEMU 3.1.0. The simplest way -to build it is to run ./build_qemu_support.sh. The script will download, -configure, and compile the QEMU binary for you. - -QEMU is a big project, so this will take a while, and you may have to -resolve a couple of dependencies (most notably, you will definitely need -libtool and glib2-devel). - -Once the binaries are compiled, you can leverage the QEMU tool by calling -afl-fuzz and all the related utilities with -Q in the command line. - -Note that QEMU requires a generous memory limit to run; somewhere around -200 MB is a good starting point, but considerably more may be needed for -more complex programs. The default -m limit will be automatically bumped up -to 200 MB when specifying -Q to afl-fuzz; be careful when overriding this. - -In principle, if you set CPU_TARGET before calling ./build_qemu_support.sh, -you should get a build capable of running non-native binaries (say, you -can try CPU_TARGET=arm). This is also necessary for running 32-bit binaries -on a 64-bit system (CPU_TARGET=i386). - -Note: if you want the QEMU helper to be installed on your system for all -users, you need to build it before issuing 'make install' in the parent -directory. - -3) Options ----------- - -There is ./libcompcov/ which implements laf-intel (splitting memcmp, -strncmp, etc. to make these conditions easier solvable by afl-fuzz). -Highly recommended. - -The option that enables QEMU CompareCoverage is QEMU_COMPCOV_LEVEL. -QEMU_COMPCOV_LEVEL=1 is to instrument comparisons with only immediate -values / read-only memory. QEMU_COMPCOV_LEVEL=2 instruments all -comparison instructions and memory comparison functions when libcompcov -is preloaded. Comparison instructions are currently instrumented only -on the x86 and x86_64 targets. - -Another option is the environment variable AFL_ENTRYPOINT which allows -move the forkserver to a different part, e.g. just before the file is -opened (e.g. way after command line parsing and config file loading, etc) -which can be a huge speed improvement. Note that the specified address -must be an address of a basic block. - -4) Notes on linking -------------------- - -The feature is supported only on Linux. Supporting BSD may amount to porting -the changes made to linux-user/elfload.c and applying them to -bsd-user/elfload.c, but I have not looked into this yet. - -The instrumentation follows only the .text section of the first ELF binary -encountered in the linking process. It does not trace shared libraries. In -practice, this means two things: - - - Any libraries you want to analyze *must* be linked statically into the - executed ELF file (this will usually be the case for closed-source - apps). - - - Standard C libraries and other stuff that is wasteful to instrument - should be linked dynamically - otherwise, AFL will have no way to avoid - peeking into them. - -Setting AFL_INST_LIBS=1 can be used to circumvent the .text detection logic -and instrument every basic block encountered. - -5) Benchmarking ---------------- - -If you want to compare the performance of the QEMU instrumentation with that of -afl-gcc compiled code against the same target, you need to build the -non-instrumented binary with the same optimization flags that are normally -injected by afl-gcc, and make sure that the bits to be tested are statically -linked into the binary. A common way to do this would be: - -$ CFLAGS="-O3 -funroll-loops" ./configure --disable-shared -$ make clean all - -Comparative measurements of execution speed or instrumentation coverage will be -fairly meaningless if the optimization levels or instrumentation scopes don't -match. - -6) Gotchas, feedback, bugs --------------------------- - -If you need to fix up checksums or do other cleanup on mutated test cases, see -experimental/post_library/ for a viable solution. - -Do not mix QEMU mode with ASAN, MSAN, or the likes; QEMU doesn't appreciate -the "shadow VM" trick employed by the sanitizers and will probably just -run out of memory. - -Compared to fully-fledged virtualization, the user emulation mode is *NOT* a -security boundary. The binaries can freely interact with the host OS. If you -somehow need to fuzz an untrusted binary, put everything in a sandbox first. - -QEMU does not necessarily support all CPU or hardware features that your -target program may be utilizing. In particular, it does not appear to have -full support for AVX2 / FMA3. Using binaries for older CPUs, or recompiling them -with -march=core2, can help. - -Beyond that, this is an early-stage mechanism, so fields reports are welcome. -You can send them to . - -7) Alternatives: static rewriting ---------------------------------- - -Statically rewriting binaries just once, instead of attempting to translate -them at run time, can be a faster alternative. That said, static rewriting is -fraught with peril, because it depends on being able to properly and fully model -program control flow without actually executing each and every code path. - -The best implementation is this one: - - https://github.com/vanhauser-thc/afl-dyninst - -The issue however is Dyninst which is not rewriting the binaries so that -they run stable. a lot of crashes happen, especially in C++ programs that -use throw/catch. Try it first, and if it works for you be happy as it is -2-3x as fast as qemu_mode. diff --git a/qemu_mode/build_qemu_support.sh b/qemu_mode/build_qemu_support.sh index 78ad5680..35f5b8ca 100755 --- a/qemu_mode/build_qemu_support.sh +++ b/qemu_mode/build_qemu_support.sh @@ -3,10 +3,17 @@ # american fuzzy lop - QEMU build script # -------------------------------------- # -# Written by Andrew Griffiths and -# Michal Zalewski +# Originally written by Andrew Griffiths and +# Michal Zalewski +# +# TCG instrumentation and block chaining support by Andrea Biondo +# +# +# QEMU 3.1.0 port, TCG thread-safety, CompareCoverage and NeverZero +# counters by Andrea Fioraldi # # Copyright 2015, 2016, 2017 Google Inc. All rights reserved. +# Copyright 2019 AFLplusplus Project. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/qemu_mode/libcompcov/libcompcov.so.c b/qemu_mode/libcompcov/libcompcov.so.c index 92e4dbaa..0ccda927 100644 --- a/qemu_mode/libcompcov/libcompcov.so.c +++ b/qemu_mode/libcompcov/libcompcov.so.c @@ -5,7 +5,7 @@ Written and maintained by Andrea Fioraldi - Copyright 2019 Andrea Fioraldi. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/qemu_mode/patches/afl-qemu-common.h b/qemu_mode/patches/afl-qemu-common.h index 8013800d..c475cb58 100644 --- a/qemu_mode/patches/afl-qemu-common.h +++ b/qemu_mode/patches/afl-qemu-common.h @@ -1,19 +1,18 @@ /* - american fuzzy lop - high-performance binary-only instrumentation - ----------------------------------------------------------------- + american fuzzy lop++ - high-performance binary-only instrumentation + ------------------------------------------------------------------- - Written by Andrew Griffiths and - Michal Zalewski - - Idea & design very much by Andrew Griffiths. + Originally written by Andrew Griffiths and + Michal Zalewski TCG instrumentation and block chaining support by Andrea Biondo - QEMU 3.1.0 port, TCG thread-safety and CompareCoverage by Andrea Fioraldi - + QEMU 3.1.0 port, TCG thread-safety, CompareCoverage and NeverZero + counters by Andrea Fioraldi Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/qemu_mode/patches/afl-qemu-cpu-inl.h b/qemu_mode/patches/afl-qemu-cpu-inl.h index 04d9007d..4ad31b60 100644 --- a/qemu_mode/patches/afl-qemu-cpu-inl.h +++ b/qemu_mode/patches/afl-qemu-cpu-inl.h @@ -1,19 +1,18 @@ /* - american fuzzy lop - high-performance binary-only instrumentation - ----------------------------------------------------------------- + american fuzzy lop++ - high-performance binary-only instrumentation + ------------------------------------------------------------------- - Written by Andrew Griffiths and - Michal Zalewski - - Idea & design very much by Andrew Griffiths. + Originally written by Andrew Griffiths and + Michal Zalewski TCG instrumentation and block chaining support by Andrea Biondo - QEMU 3.1.0 port, TCG thread-safety and CompareCoverage by Andrea Fioraldi - + QEMU 3.1.0 port, TCG thread-safety, CompareCoverage and NeverZero + counters by Andrea Fioraldi Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/qemu_mode/patches/afl-qemu-cpu-translate-inl.h b/qemu_mode/patches/afl-qemu-cpu-translate-inl.h index fc78e652..09ecb9d2 100644 --- a/qemu_mode/patches/afl-qemu-cpu-translate-inl.h +++ b/qemu_mode/patches/afl-qemu-cpu-translate-inl.h @@ -1,19 +1,18 @@ /* - american fuzzy lop - high-performance binary-only instrumentation - ----------------------------------------------------------------- + american fuzzy lop++ - high-performance binary-only instrumentation + ------------------------------------------------------------------- - Written by Andrew Griffiths and - Michal Zalewski - - Idea & design very much by Andrew Griffiths. + Originally written by Andrew Griffiths and + Michal Zalewski TCG instrumentation and block chaining support by Andrea Biondo - - QEMU 3.1.0 port, TCG thread-safety and CompareCoverage by Andrea Fioraldi - + + QEMU 3.1.0 port, TCG thread-safety, CompareCoverage and NeverZero + counters by Andrea Fioraldi Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/qemu_mode/patches/afl-qemu-tcg-inl.h b/qemu_mode/patches/afl-qemu-tcg-inl.h index ff90d1b9..a9c53b8c 100644 --- a/qemu_mode/patches/afl-qemu-tcg-inl.h +++ b/qemu_mode/patches/afl-qemu-tcg-inl.h @@ -1,19 +1,18 @@ /* - american fuzzy lop - high-performance binary-only instrumentation - ----------------------------------------------------------------- + american fuzzy lop++ - high-performance binary-only instrumentation + ------------------------------------------------------------------- - Written by Andrew Griffiths and - Michal Zalewski - - Idea & design very much by Andrew Griffiths. + Originally written by Andrew Griffiths and + Michal Zalewski TCG instrumentation and block chaining support by Andrea Biondo - QEMU 3.1.0 port, TCG thread-safety and CompareCoverage by Andrea Fioraldi - + QEMU 3.1.0 port, TCG thread-safety, CompareCoverage and NeverZero + counters by Andrea Fioraldi Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/qemu_mode/patches/afl-qemu-translate-inl.h b/qemu_mode/patches/afl-qemu-translate-inl.h index d63c5167..ffe43dba 100644 --- a/qemu_mode/patches/afl-qemu-translate-inl.h +++ b/qemu_mode/patches/afl-qemu-translate-inl.h @@ -1,19 +1,18 @@ /* - american fuzzy lop - high-performance binary-only instrumentation - ----------------------------------------------------------------- + american fuzzy lop++ - high-performance binary-only instrumentation + ------------------------------------------------------------------- - Written by Andrew Griffiths and - Michal Zalewski - - Idea & design very much by Andrew Griffiths. + Originally written by Andrew Griffiths and + Michal Zalewski TCG instrumentation and block chaining support by Andrea Biondo - QEMU 3.1.0 port, TCG thread-safety and CompareCoverage by Andrea Fioraldi - + QEMU 3.1.0 port, TCG thread-safety, CompareCoverage and NeverZero + counters by Andrea Fioraldi Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/unicorn_mode/README.md b/unicorn_mode/README.md index 9ee975ef..ea3e3c9b 100644 --- a/unicorn_mode/README.md +++ b/unicorn_mode/README.md @@ -1,23 +1,119 @@ -``` - __ _ _ - __ _ / _| | _ _ _ __ (_) ___ ___ _ __ _ __ - / _` | |_| |___| | | | '_ \| |/ __/ _ \| '__| '_ \ -| (_| | _| |___| |_| | | | | | (_| (_) | | | | | | - \__,_|_| |_| \__,_|_| |_|_|\___\___/|_| |_| |_| - -``` +# Unicorn-based binary-only instrumentation for afl-fuzz -afl-unicorn lets you fuzz any piece of binary that can be emulated by -[Unicorn Engine](http://www.unicorn-engine.org/). +The idea and much of the original implementation comes from Nathan Voss . -Requirements: Python2 +The port to afl++ if by Dominik Maier . -For the full readme please see docs/unicorn_mode.txt +The CompareCoverage and NeverZero counters features by Andrea Fioraldi . -For an in-depth description of what this is, how to install it, and how to use -it check out this [blog post](https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf). +## 1) Introduction -For general help with AFL, please refer to the documents in the ./docs/ directory. +The code in ./unicorn_mode allows you to build a standalone feature that +leverages the Unicorn Engine and allows callers to obtain instrumentation +output for black-box, closed-source binary code snippets. This mechanism +can be then used by afl-fuzz to stress-test targets that couldn't be built +with afl-gcc or used in QEMU mode, or with other extensions such as +TriforceAFL. -Created by Nathan Voss, originally funded by -[Battelle](https://www.battelle.org/cyber). +There is a significant performance penalty compared to native AFL, +but at least we're able to use AFL on these binaries, right? + +## 2) How to use + +Requirements: you need an installed python2 environment. + +### Building AFL's Unicorn Mode + +First, make afl++ as usual. +Once that completes successfully you need to build and add in the Unicorn Mode +features: + + $ cd unicorn_mode + $ ./build_unicorn_support.sh + +NOTE: This script downloads a Unicorn Engine commit that has been tested +and is stable-ish from the Unicorn github page. If you are offline, you'll need +to hack up this script a little bit and supply your own copy of Unicorn's latest +stable release. It's not very hard, just check out the beginning of the +build_unicorn_support.sh script and adjust as necessary. + +Building Unicorn will take a little bit (~5-10 minutes). Once it completes +it automatically compiles a sample application and verify that it works. + +### Fuzzing with Unicorn Mode + +To really use unicorn-mode effectively you need to prepare the following: + + * Relevant binary code to be fuzzed + * Knowledge of the memory map and good starting state + * Folder containing sample inputs to start fuzzing with + + Same ideas as any other AFL inputs + + Quality/speed of results will depend greatly on quality of starting + samples + + See AFL's guidance on how to create a sample corpus + * Unicorn-based test harness which: + + Adds memory map regions + + Loads binary code into memory + + Emulates at least one instruction* + + Yeah, this is lame. See 'Gotchas' section below for more info + + Loads and verifies data to fuzz from a command-line specified file + + AFL will provide mutated inputs by changing the file passed to + the test harness + + Presumably the data to be fuzzed is at a fixed buffer address + + If input constraints (size, invalid bytes, etc.) are known they + should be checked after the file is loaded. If a constraint + fails, just exit the test harness. AFL will treat the input as + 'uninteresting' and move on. + + Sets up registers and memory state for beginning of test + + Emulates the interested code from beginning to end + + If a crash is detected, the test harness must 'crash' by + throwing a signal (SIGSEGV, SIGKILL, SIGABORT, etc.) + +Once you have all those things ready to go you just need to run afl-fuzz in +'unicorn-mode' by passing in the '-U' flag: + + $ afl-fuzz -U -m none -i /path/to/inputs -o /path/to/results -- ./test_harness @@ + +The normal afl-fuzz command line format applies to everything here. Refer to +AFL's main documentation for more info about how to use afl-fuzz effectively. + +For a much clearer vision of what all of this looks like, please refer to the +sample provided in the 'unicorn_mode/samples' directory. There is also a blog +post that goes over the basics at: + +https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf + +The 'helper_scripts' directory also contains several helper scripts that allow you +to dump context from a running process, load it, and hook heap allocations. For details +on how to use this check out the follow-up blog post to the one linked above. + +A example use of AFL-Unicorn mode is discussed in the Paper Unicorefuzz: +https://www.usenix.org/conference/woot19/presentation/maier + +## 3) Options + +As for the QEMU-based instrumentation, the afl-unicorn twist of afl++ +comes with a sub-instruction based instrumentation similar in purpose to laf-intel. + +The options that enables Unicorn CompareCoverage are the same used for QEMU. +AFL_COMPCOV_LEVEL=1 is to instrument comparisons with only immediate +values. QEMU_COMPCOV_LEVEL=2 instruments all +comparison instructions. Comparison instructions are currently instrumented only +on the x86 and x86_64 targets. + +## 4) Gotchas, feedback, bugs + +To make sure that AFL's fork server starts up correctly the Unicorn test +harness script must emulate at least one instruction before loading the +data that will be fuzzed from the input file. It doesn't matter what the +instruction is, nor if it is valid. This is an artifact of how the fork-server +is started and could likely be fixed with some clever re-arranging of the +patches applied to Unicorn. + +Running the build script builds Unicorn and its python bindings and installs +them on your system. This installation will supersede any existing Unicorn +installation with the patched afl-unicorn version. + +Refer to the unicorn_mode/samples/arm_example/arm_tester.c for an example +of how to do this properly! If you don't get this right, AFL will not +load any mutated inputs and your fuzzing will be useless! diff --git a/unicorn_mode/build_unicorn_support.sh b/unicorn_mode/build_unicorn_support.sh index 2c0fe4b1..1575f66c 100755 --- a/unicorn_mode/build_unicorn_support.sh +++ b/unicorn_mode/build_unicorn_support.sh @@ -1,16 +1,20 @@ #!/bin/sh # -# american fuzzy lop - Unicorn-Mode build script -# -------------------------------------- +# american fuzzy lop++ - unicorn mode build script +# ------------------------------------------------ # -# Written by Nathan Voss +# Originally written by Nathan Voss # # Adapted from code by Andrew Griffiths and # Michal Zalewski # -# Adapted for Afl++ by Dominik Maier +# Adapted for AFLplusplus by Dominik Maier +# +# CompareCoverage and NeverZero counters by Andrea Fioraldi +# # # Copyright 2017 Battelle Memorial Institute. All rights reserved. +# Copyright 2019 AFLplusplus Project. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/unicorn_mode/patches/afl-unicorn-common.h b/unicorn_mode/patches/afl-unicorn-common.h index 9a1b2a6c..6798832c 100644 --- a/unicorn_mode/patches/afl-unicorn-common.h +++ b/unicorn_mode/patches/afl-unicorn-common.h @@ -1,17 +1,17 @@ /* - american fuzzy lop - high-performance binary-only instrumentation - ----------------------------------------------------------------- + american fuzzy lop++ - unicorn instrumentation + ---------------------------------------------- - Written by Andrew Griffiths and - Michal Zalewski + Originally written by Andrew Griffiths and + Michal Zalewski - TCG instrumentation and block chaining support by Andrea Biondo - Adapted for afl-unicorn by Dominik Maier - Idea & design very much by Andrew Griffiths. + CompareCoverage and NeverZero counters by Andrea Fioraldi + - Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ to implement AFL-style instrumentation and to take care of the remaining parts of the AFL fork server logic. - The resulting QEMU binary is essentially a standalone instrumentation + The resulting libunicorn binary is essentially a standalone instrumentation tool; for an example of how to leverage it for other purposes, you can have a look at afl-showmap.c. diff --git a/unicorn_mode/patches/afl-unicorn-cpu-inl.h b/unicorn_mode/patches/afl-unicorn-cpu-inl.h index 90937a17..a713e4ca 100644 --- a/unicorn_mode/patches/afl-unicorn-cpu-inl.h +++ b/unicorn_mode/patches/afl-unicorn-cpu-inl.h @@ -1,17 +1,17 @@ /* - american fuzzy lop - high-performance binary-only instrumentation - ----------------------------------------------------------------- + american fuzzy lop++ - unicorn instrumentation + ---------------------------------------------- - Written by Andrew Griffiths and - Michal Zalewski + Originally written by Andrew Griffiths and + Michal Zalewski - TCG instrumentation and block chaining support by Andrea Biondo - Adapted for afl-unicorn by Dominik Maier - Idea & design very much by Andrew Griffiths. + CompareCoverage and NeverZero counters by Andrea Fioraldi + - Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ to implement AFL-style instrumentation and to take care of the remaining parts of the AFL fork server logic. - The resulting QEMU binary is essentially a standalone instrumentation + The resulting libunicorn binary is essentially a standalone instrumentation tool; for an example of how to leverage it for other purposes, you can have a look at afl-showmap.c. diff --git a/unicorn_mode/patches/afl-unicorn-cpu-translate-inl.h b/unicorn_mode/patches/afl-unicorn-cpu-translate-inl.h index 7e8f47c9..69877c6b 100644 --- a/unicorn_mode/patches/afl-unicorn-cpu-translate-inl.h +++ b/unicorn_mode/patches/afl-unicorn-cpu-translate-inl.h @@ -1,17 +1,17 @@ /* - american fuzzy lop - high-performance binary-only instrumentation - ----------------------------------------------------------------- + american fuzzy lop++ - unicorn instrumentation + ---------------------------------------------- - Written by Andrew Griffiths and - Michal Zalewski + Originally written by Andrew Griffiths and + Michal Zalewski - TCG instrumentation and block chaining support by Andrea Biondo - Adapted for afl-unicorn by Dominik Maier - Idea & design very much by Andrew Griffiths. + CompareCoverage and NeverZero counters by Andrea Fioraldi + - Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ to implement AFL-style instrumentation and to take care of the remaining parts of the AFL fork server logic. - The resulting QEMU binary is essentially a standalone instrumentation + The resulting libunicorn binary is essentially a standalone instrumentation tool; for an example of how to leverage it for other purposes, you can have a look at afl-showmap.c. diff --git a/unicorn_mode/patches/afl-unicorn-tcg-op-inl.h b/unicorn_mode/patches/afl-unicorn-tcg-op-inl.h index d5a29cce..fa4974d6 100644 --- a/unicorn_mode/patches/afl-unicorn-tcg-op-inl.h +++ b/unicorn_mode/patches/afl-unicorn-tcg-op-inl.h @@ -1,17 +1,17 @@ /* - american fuzzy lop - high-performance binary-only instrumentation - ----------------------------------------------------------------- + american fuzzy lop++ - unicorn instrumentation + ---------------------------------------------- - Written by Andrew Griffiths and - Michal Zalewski + Originally written by Andrew Griffiths and + Michal Zalewski - TCG instrumentation and block chaining support by Andrea Biondo - Adapted for afl-unicorn by Dominik Maier - Idea & design very much by Andrew Griffiths. + CompareCoverage and NeverZero counters by Andrea Fioraldi + - Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ to implement AFL-style instrumentation and to take care of the remaining parts of the AFL fork server logic. - The resulting QEMU binary is essentially a standalone instrumentation + The resulting libunicorn binary is essentially a standalone instrumentation tool; for an example of how to leverage it for other purposes, you can have a look at afl-showmap.c. diff --git a/unicorn_mode/patches/afl-unicorn-tcg-runtime-inl.h b/unicorn_mode/patches/afl-unicorn-tcg-runtime-inl.h index 0019bbfa..1f0667ce 100644 --- a/unicorn_mode/patches/afl-unicorn-tcg-runtime-inl.h +++ b/unicorn_mode/patches/afl-unicorn-tcg-runtime-inl.h @@ -1,17 +1,17 @@ /* - american fuzzy lop - high-performance binary-only instrumentation - ----------------------------------------------------------------- + american fuzzy lop++ - unicorn instrumentation + ---------------------------------------------- - Written by Andrew Griffiths and - Michal Zalewski + Originally written by Andrew Griffiths and + Michal Zalewski - TCG instrumentation and block chaining support by Andrea Biondo - Adapted for afl-unicorn by Dominik Maier - Idea & design very much by Andrew Griffiths. + CompareCoverage and NeverZero counters by Andrea Fioraldi + - Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ to implement AFL-style instrumentation and to take care of the remaining parts of the AFL fork server logic. - The resulting QEMU binary is essentially a standalone instrumentation + The resulting libunicorn binary is essentially a standalone instrumentation tool; for an example of how to leverage it for other purposes, you can have a look at afl-showmap.c. diff --git a/unicorn_mode/samples/compcov_x64/COMPILE.md b/unicorn_mode/samples/compcov_x64/COMPILE.md index db488d30..35de7ad8 100644 --- a/unicorn_mode/samples/compcov_x64/COMPILE.md +++ b/unicorn_mode/samples/compcov_x64/COMPILE.md @@ -1,5 +1,4 @@ -Compiling compcov_target.c -========================== +# Compiling compcov_target.c compcov_target.c was compiled without optimization, position-independent, and without standard libraries using the following command line: diff --git a/unicorn_mode/samples/compcov_x64/compcov_target.c b/unicorn_mode/samples/compcov_x64/compcov_target.c index 71b4cb0e..eb1205b1 100644 --- a/unicorn_mode/samples/compcov_x64/compcov_target.c +++ b/unicorn_mode/samples/compcov_x64/compcov_target.c @@ -7,7 +7,7 @@ * (0x00300000), so make sure that your Unicorn emulation of this * puts user data there. * - * Written by Nathan Voss + * Written by Andrea Fioraldi */ // Magic address where mutated data will be placed diff --git a/unicorn_mode/samples/simple/COMPILE.md b/unicorn_mode/samples/simple/COMPILE.md index bd4a66c6..f7bf5b50 100644 --- a/unicorn_mode/samples/simple/COMPILE.md +++ b/unicorn_mode/samples/simple/COMPILE.md @@ -1,5 +1,4 @@ -Compiling simple_target.c -========================== +# Compiling simple_target.c You shouldn't need to compile simple_target.c since a MIPS binary version is pre-built and shipped with afl-unicorn. This file documents how the binary @@ -38,4 +37,4 @@ mips-linux-gnu-gcc -o simple_target.elf simple_target.c -fPIC -O0 -nostdlib Note that the output of this is padded with nulls for 16-byte alignment. This is important when emulating it, as NOPs will be added after the return of main() -as necessary. \ No newline at end of file +as necessary. -- cgit 1.4.1