diff options
author | Khaled Yakdan <yakdan@code-intelligence.de> | 2019-09-04 23:20:18 +0200 |
---|---|---|
committer | Khaled Yakdan <yakdan@code-intelligence.de> | 2019-09-04 23:20:18 +0200 |
commit | b31dff6beec6a7aa17da6f7f8a2eef198c263ccc (patch) | |
tree | c039aeed3572b171c2b7108cd650a0ee53c1b0f6 /unicorn_mode | |
parent | 1b3f9713309d27c49b153f9b3af12d208076e93c (diff) | |
parent | abf61ecc8f1b4ea3de59f818d859139637b29f32 (diff) | |
download | afl++-b31dff6beec6a7aa17da6f7f8a2eef198c263ccc.tar.gz |
Merge branch 'master-upstream' into custom_mutator_docs
# Conflicts: # afl-fuzz.c
Diffstat (limited to 'unicorn_mode')
-rw-r--r-- | unicorn_mode/README.md | 130 | ||||
-rwxr-xr-x[-rw-r--r--] | unicorn_mode/build_unicorn_support.sh | 21 | ||||
-rw-r--r-- | unicorn_mode/patches/afl-unicorn-common.h | 48 | ||||
-rw-r--r-- | unicorn_mode/patches/afl-unicorn-cpu-inl.h | 141 | ||||
-rw-r--r-- | unicorn_mode/patches/afl-unicorn-cpu-translate-inl.h | 57 | ||||
-rw-r--r-- | unicorn_mode/patches/afl-unicorn-tcg-op-inl.h | 59 | ||||
-rw-r--r-- | unicorn_mode/patches/afl-unicorn-tcg-runtime-inl.h | 106 | ||||
-rw-r--r-- | unicorn_mode/patches/compcov.diff | 113 | ||||
-rw-r--r-- | unicorn_mode/samples/compcov_x64/COMPILE.md | 19 | ||||
-rw-r--r-- | unicorn_mode/samples/compcov_x64/compcov_target.bin | bin | 0 -> 86 bytes | |||
-rw-r--r-- | unicorn_mode/samples/compcov_x64/compcov_target.c | 28 | ||||
-rwxr-xr-x | unicorn_mode/samples/compcov_x64/compcov_target.elf | bin | 0 -> 5728 bytes | |||
-rw-r--r-- | unicorn_mode/samples/compcov_x64/compcov_test_harness.py | 170 | ||||
-rw-r--r-- | unicorn_mode/samples/compcov_x64/sample_inputs/sample1.bin | 1 | ||||
-rw-r--r-- | unicorn_mode/samples/simple/COMPILE.md | 5 |
15 files changed, 801 insertions, 97 deletions
diff --git a/unicorn_mode/README.md b/unicorn_mode/README.md index 9ee975ef..ea3e3c9b 100644 --- a/unicorn_mode/README.md +++ b/unicorn_mode/README.md @@ -1,23 +1,119 @@ -``` - __ _ _ - __ _ / _| | _ _ _ __ (_) ___ ___ _ __ _ __ - / _` | |_| |___| | | | '_ \| |/ __/ _ \| '__| '_ \ -| (_| | _| |___| |_| | | | | | (_| (_) | | | | | | - \__,_|_| |_| \__,_|_| |_|_|\___\___/|_| |_| |_| - -``` +# Unicorn-based binary-only instrumentation for afl-fuzz -afl-unicorn lets you fuzz any piece of binary that can be emulated by -[Unicorn Engine](http://www.unicorn-engine.org/). +The idea and much of the original implementation comes from Nathan Voss <njvoss299@gmail.com>. -Requirements: Python2 +The port to afl++ if by Dominik Maier <mail@dmnk.co>. -For the full readme please see docs/unicorn_mode.txt +The CompareCoverage and NeverZero counters features by Andrea Fioraldi <andreafioraldi@gmail.com>. -For an in-depth description of what this is, how to install it, and how to use -it check out this [blog post](https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf). +## 1) Introduction -For general help with AFL, please refer to the documents in the ./docs/ directory. +The code in ./unicorn_mode allows you to build a standalone feature that +leverages the Unicorn Engine and allows callers to obtain instrumentation +output for black-box, closed-source binary code snippets. This mechanism +can be then used by afl-fuzz to stress-test targets that couldn't be built +with afl-gcc or used in QEMU mode, or with other extensions such as +TriforceAFL. -Created by Nathan Voss, originally funded by -[Battelle](https://www.battelle.org/cyber). +There is a significant performance penalty compared to native AFL, +but at least we're able to use AFL on these binaries, right? + +## 2) How to use + +Requirements: you need an installed python2 environment. + +### Building AFL's Unicorn Mode + +First, make afl++ as usual. +Once that completes successfully you need to build and add in the Unicorn Mode +features: + + $ cd unicorn_mode + $ ./build_unicorn_support.sh + +NOTE: This script downloads a Unicorn Engine commit that has been tested +and is stable-ish from the Unicorn github page. If you are offline, you'll need +to hack up this script a little bit and supply your own copy of Unicorn's latest +stable release. It's not very hard, just check out the beginning of the +build_unicorn_support.sh script and adjust as necessary. + +Building Unicorn will take a little bit (~5-10 minutes). Once it completes +it automatically compiles a sample application and verify that it works. + +### Fuzzing with Unicorn Mode + +To really use unicorn-mode effectively you need to prepare the following: + + * Relevant binary code to be fuzzed + * Knowledge of the memory map and good starting state + * Folder containing sample inputs to start fuzzing with + + Same ideas as any other AFL inputs + + Quality/speed of results will depend greatly on quality of starting + samples + + See AFL's guidance on how to create a sample corpus + * Unicorn-based test harness which: + + Adds memory map regions + + Loads binary code into memory + + Emulates at least one instruction* + + Yeah, this is lame. See 'Gotchas' section below for more info + + Loads and verifies data to fuzz from a command-line specified file + + AFL will provide mutated inputs by changing the file passed to + the test harness + + Presumably the data to be fuzzed is at a fixed buffer address + + If input constraints (size, invalid bytes, etc.) are known they + should be checked after the file is loaded. If a constraint + fails, just exit the test harness. AFL will treat the input as + 'uninteresting' and move on. + + Sets up registers and memory state for beginning of test + + Emulates the interested code from beginning to end + + If a crash is detected, the test harness must 'crash' by + throwing a signal (SIGSEGV, SIGKILL, SIGABORT, etc.) + +Once you have all those things ready to go you just need to run afl-fuzz in +'unicorn-mode' by passing in the '-U' flag: + + $ afl-fuzz -U -m none -i /path/to/inputs -o /path/to/results -- ./test_harness @@ + +The normal afl-fuzz command line format applies to everything here. Refer to +AFL's main documentation for more info about how to use afl-fuzz effectively. + +For a much clearer vision of what all of this looks like, please refer to the +sample provided in the 'unicorn_mode/samples' directory. There is also a blog +post that goes over the basics at: + +https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf + +The 'helper_scripts' directory also contains several helper scripts that allow you +to dump context from a running process, load it, and hook heap allocations. For details +on how to use this check out the follow-up blog post to the one linked above. + +A example use of AFL-Unicorn mode is discussed in the Paper Unicorefuzz: +https://www.usenix.org/conference/woot19/presentation/maier + +## 3) Options + +As for the QEMU-based instrumentation, the afl-unicorn twist of afl++ +comes with a sub-instruction based instrumentation similar in purpose to laf-intel. + +The options that enables Unicorn CompareCoverage are the same used for QEMU. +AFL_COMPCOV_LEVEL=1 is to instrument comparisons with only immediate +values. QEMU_COMPCOV_LEVEL=2 instruments all +comparison instructions. Comparison instructions are currently instrumented only +on the x86 and x86_64 targets. + +## 4) Gotchas, feedback, bugs + +To make sure that AFL's fork server starts up correctly the Unicorn test +harness script must emulate at least one instruction before loading the +data that will be fuzzed from the input file. It doesn't matter what the +instruction is, nor if it is valid. This is an artifact of how the fork-server +is started and could likely be fixed with some clever re-arranging of the +patches applied to Unicorn. + +Running the build script builds Unicorn and its python bindings and installs +them on your system. This installation will supersede any existing Unicorn +installation with the patched afl-unicorn version. + +Refer to the unicorn_mode/samples/arm_example/arm_tester.c for an example +of how to do this properly! If you don't get this right, AFL will not +load any mutated inputs and your fuzzing will be useless! diff --git a/unicorn_mode/build_unicorn_support.sh b/unicorn_mode/build_unicorn_support.sh index 9dcf6773..1575f66c 100644..100755 --- a/unicorn_mode/build_unicorn_support.sh +++ b/unicorn_mode/build_unicorn_support.sh @@ -1,16 +1,20 @@ #!/bin/sh # -# american fuzzy lop - Unicorn-Mode build script -# -------------------------------------- +# american fuzzy lop++ - unicorn mode build script +# ------------------------------------------------ # -# Written by Nathan Voss <njvoss99@gmail.com> +# Originally written by Nathan Voss <njvoss99@gmail.com> # # Adapted from code by Andrew Griffiths <agriffiths@google.com> and # Michal Zalewski <lcamtuf@google.com> # -# Adapted for Afl++ by Dominik Maier <mail@dmnk.co> +# Adapted for AFLplusplus by Dominik Maier <mail@dmnk.co> +# +# CompareCoverage and NeverZero counters by Andrea Fioraldi +# <andreafioraldi@gmail.com> # # Copyright 2017 Battelle Memorial Institute. All rights reserved. +# Copyright 2019 AFLplusplus Project. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -127,12 +131,13 @@ tar xzf "$ARCHIVE" -C ./unicorn --strip-components=1 || exit 1 echo "[+] Unpacking successful." -rm -rf "$ARCHIVE" || exit 1 +#rm -rf "$ARCHIVE" || exit 1 echo "[*] Applying patches..." -cp patches/afl-unicorn-cpu-inl.h unicorn || exit 1 -patch -p1 --directory unicorn <patches/patches.diff || exit 1 +cp patches/*.h unicorn || exit 1 +patch -p1 --directory unicorn < patches/patches.diff || exit 1 +patch -p1 --directory unicorn < patches/compcov.diff || exit 1 echo "[+] Patching done." @@ -144,7 +149,7 @@ echo "[+] Configuration complete." echo "[*] Attempting to build Unicorn (fingers crossed!)..." -UNICORN_QEMU_FLAGS='--python=python2' make || exit 1 +UNICORN_QEMU_FLAGS='--python=python2' make -j `nproc` || exit 1 echo "[+] Build process successful!" diff --git a/unicorn_mode/patches/afl-unicorn-common.h b/unicorn_mode/patches/afl-unicorn-common.h new file mode 100644 index 00000000..fd88e21b --- /dev/null +++ b/unicorn_mode/patches/afl-unicorn-common.h @@ -0,0 +1,48 @@ +/* + american fuzzy lop++ - unicorn instrumentation + ---------------------------------------------- + + Originally written by Andrew Griffiths <agriffiths@google.com> and + Michal Zalewski <lcamtuf@google.com> + + Adapted for afl-unicorn by Dominik Maier <mail@dmnk.co> + + CompareCoverage and NeverZero counters by Andrea Fioraldi + <andreafioraldi@gmail.com> + + Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + This code is a shim patched into the separately-distributed source + code of Unicorn 1.0.1. It leverages the built-in QEMU tracing functionality + to implement AFL-style instrumentation and to take care of the remaining + parts of the AFL fork server logic. + + The resulting libunicorn binary is essentially a standalone instrumentation + tool; for an example of how to leverage it for other purposes, you can + have a look at afl-showmap.c. + + */ + +#include "../../config.h" + +/* NeverZero */ + +#if (defined(__x86_64__) || defined(__i386__)) && defined(AFL_QEMU_NOT_ZERO) +#define INC_AFL_AREA(loc) \ + asm volatile( \ + "incb (%0, %1, 1)\n" \ + "adcb $0, (%0, %1, 1)\n" \ + : /* no out */ \ + : "r"(afl_area_ptr), "r"(loc) \ + : "memory", "eax") +#else +#define INC_AFL_AREA(loc) afl_area_ptr[loc]++ +#endif + diff --git a/unicorn_mode/patches/afl-unicorn-cpu-inl.h b/unicorn_mode/patches/afl-unicorn-cpu-inl.h index 892c3f72..082d6d68 100644 --- a/unicorn_mode/patches/afl-unicorn-cpu-inl.h +++ b/unicorn_mode/patches/afl-unicorn-cpu-inl.h @@ -1,17 +1,17 @@ /* - american fuzzy lop - high-performance binary-only instrumentation - ----------------------------------------------------------------- + american fuzzy lop++ - unicorn instrumentation + ---------------------------------------------- - Written by Andrew Griffiths <agriffiths@google.com> and - Michal Zalewski <lcamtuf@google.com> + Originally written by Andrew Griffiths <agriffiths@google.com> and + Michal Zalewski <lcamtuf@google.com> - TCG instrumentation and block chaining support by Andrea Biondo - <andrea.biondo965@gmail.com> Adapted for afl-unicorn by Dominik Maier <mail@dmnk.co> - Idea & design very much by Andrew Griffiths. + CompareCoverage and NeverZero counters by Andrea Fioraldi + <andreafioraldi@gmail.com> - Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ to implement AFL-style instrumentation and to take care of the remaining parts of the AFL fork server logic. - The resulting QEMU binary is essentially a standalone instrumentation + The resulting libunicorn binary is essentially a standalone instrumentation tool; for an example of how to leverage it for other purposes, you can have a look at afl-showmap.c. @@ -33,7 +33,7 @@ #include <sys/shm.h> #include <sys/types.h> #include <sys/wait.h> -#include "../../config.h" +#include "afl-unicorn-common.h" /*************************** * VARIOUS AUXILIARY STUFF * @@ -44,21 +44,29 @@ it to translate within its own context, too (this avoids translation overhead in the next forked-off copy). */ -#define AFL_UNICORN_CPU_SNIPPET1 do { \ +#define AFL_UNICORN_CPU_SNIPPET1 \ + do { \ + \ afl_request_tsl(pc, cs_base, flags); \ + \ } while (0) /* This snippet kicks in when the instruction pointer is positioned at _start and does the usual forkserver stuff, not very different from regular instrumentation injected via afl-as.h. */ -#define AFL_UNICORN_CPU_SNIPPET2 do { \ - if(unlikely(afl_first_instr == 0)) { \ - afl_setup(); \ - afl_forkserver(env); \ - afl_first_instr = 1; \ - } \ - afl_maybe_log(tb->pc); \ +#define AFL_UNICORN_CPU_SNIPPET2 \ + do { \ + \ + if (unlikely(afl_first_instr == 0)) { \ + \ + afl_setup(env->uc); \ + afl_forkserver(env); \ + afl_first_instr = 1; \ + \ + } \ + afl_maybe_log(env->uc, tb->pc); \ + \ } while (0) /* We use one additional file descriptor to relay "needs translation" @@ -66,37 +74,31 @@ #define TSL_FD (FORKSRV_FD - 1) -/* This is equivalent to afl-as.h: */ - -static unsigned char *afl_area_ptr; - /* Set in the child process in forkserver mode: */ static unsigned char afl_fork_child; -static unsigned int afl_forksrv_pid; - -/* Instrumentation ratio: */ - -static unsigned int afl_inst_rms = MAP_SIZE; +static unsigned int afl_forksrv_pid; /* Function declarations. */ -static void afl_setup(void); -static void afl_forkserver(CPUArchState*); -static inline void afl_maybe_log(unsigned long); +static void afl_setup(struct uc_struct* uc); +static void afl_forkserver(CPUArchState*); +static inline void afl_maybe_log(struct uc_struct* uc, unsigned long); static void afl_wait_tsl(CPUArchState*, int); static void afl_request_tsl(target_ulong, target_ulong, uint64_t); -static TranslationBlock *tb_find_slow(CPUArchState*, target_ulong, - target_ulong, uint64_t); +static TranslationBlock* tb_find_slow(CPUArchState*, target_ulong, target_ulong, + uint64_t); /* Data structure passed around by the translate handlers: */ struct afl_tsl { + target_ulong pc; target_ulong cs_base; - uint64_t flags; + uint64_t flags; + }; /************************* @@ -105,10 +107,9 @@ struct afl_tsl { /* Set up SHM region and initialize other stuff. */ -static void afl_setup(void) { +static void afl_setup(struct uc_struct* uc) { - char *id_str = getenv(SHM_ENV_VAR), - *inst_r = getenv("AFL_INST_RATIO"); + char *id_str = getenv(SHM_ENV_VAR), *inst_r = getenv("AFL_INST_RATIO"); int shm_id; @@ -121,31 +122,45 @@ static void afl_setup(void) { if (r > 100) r = 100; if (!r) r = 1; - afl_inst_rms = MAP_SIZE * r / 100; + uc->afl_inst_rms = MAP_SIZE * r / 100; + + } else { + + uc->afl_inst_rms = MAP_SIZE; } if (id_str) { shm_id = atoi(id_str); - afl_area_ptr = shmat(shm_id, NULL, 0); + uc->afl_area_ptr = shmat(shm_id, NULL, 0); - if (afl_area_ptr == (void*)-1) exit(1); + if (uc->afl_area_ptr == (void*)-1) exit(1); /* With AFL_INST_RATIO set to a low value, we want to touch the bitmap so that the parent doesn't give up on us. */ - if (inst_r) afl_area_ptr[0] = 1; + if (inst_r) uc->afl_area_ptr[0] = 1; + } + + /* Maintain for compatibility */ + if (getenv("AFL_QEMU_COMPCOV")) { uc->afl_compcov_level = 1; } + if (getenv("AFL_COMPCOV_LEVEL")) { + + uc->afl_compcov_level = atoi(getenv("AFL_COMPCOV_LEVEL")); + + } + } /* Fork server logic, invoked once we hit first emulated instruction. */ -static void afl_forkserver(CPUArchState *env) { +static void afl_forkserver(CPUArchState* env) { static unsigned char tmp[4]; - if (!afl_area_ptr) return; + if (!env->uc->afl_area_ptr) return; /* Tell the parent that we're alive. If the parent doesn't want to talk, assume that we're not running in forkserver mode. */ @@ -159,13 +174,13 @@ static void afl_forkserver(CPUArchState *env) { while (1) { pid_t child_pid; - int status, t_fd[2]; + int status, t_fd[2]; /* Whoops, parent dead? */ if (read(FORKSRV_FD, tmp, 4) != 4) exit(2); - /* Establish a channel with child to grab translation commands. We'll + /* Establish a channel with child to grab translation commands. We'll read from t_fd[0], child will write to TSL_FD. */ if (pipe(t_fd) || dup2(t_fd[1], TSL_FD) < 0) exit(3); @@ -205,48 +220,36 @@ static void afl_forkserver(CPUArchState *env) { } - /* The equivalent of the tuple logging routine from afl-as.h. */ -static inline void afl_maybe_log(unsigned long cur_loc) { +static inline void afl_maybe_log(struct uc_struct* uc, unsigned long cur_loc) { static __thread unsigned long prev_loc; - // DEBUG - //printf("IN AFL_MAYBE_LOG 0x%lx\n", cur_loc); - - // MODIFIED FOR UNICORN MODE -> We want to log all addresses, - // so the checks for 'start < addr < end' are removed - if(!afl_area_ptr) - return; + u8* afl_area_ptr = uc->afl_area_ptr; - // DEBUG - //printf("afl_area_ptr = %p\n", afl_area_ptr); + if (!afl_area_ptr) return; /* Looks like QEMU always maps to fixed locations, so ASAN is not a concern. Phew. But instruction addresses may be aligned. Let's mangle the value to get something quasi-uniform. */ - cur_loc = (cur_loc >> 4) ^ (cur_loc << 8); + cur_loc = (cur_loc >> 4) ^ (cur_loc << 8); cur_loc &= MAP_SIZE - 1; /* Implement probabilistic instrumentation by looking at scrambled block address. This keeps the instrumented locations stable across runs. */ - // DEBUG - //printf("afl_inst_rms = 0x%lx\n", afl_inst_rms); + if (cur_loc >= uc->afl_inst_rms) return; - if (cur_loc >= afl_inst_rms) return; + register uintptr_t afl_idx = cur_loc ^ prev_loc; - // DEBUG - //printf("cur_loc = 0x%lx\n", cur_loc); + INC_AFL_AREA(afl_idx); - afl_area_ptr[cur_loc ^ prev_loc]++; prev_loc = cur_loc >> 1; } - /* This code is invoked whenever QEMU decides that it doesn't have a translation of a particular block and needs to compute it. When this happens, we tell the parent to mirror the operation, so that the next fork() has a @@ -258,20 +261,19 @@ static void afl_request_tsl(target_ulong pc, target_ulong cb, uint64_t flags) { if (!afl_fork_child) return; - t.pc = pc; + t.pc = pc; t.cs_base = cb; - t.flags = flags; + t.flags = flags; if (write(TSL_FD, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl)) return; } - /* This is the other side of the same channel. Since timeouts are handled by afl-fuzz simply killing the child, we can just wait until the pipe breaks. */ -static void afl_wait_tsl(CPUArchState *env, int fd) { +static void afl_wait_tsl(CPUArchState* env, int fd) { struct afl_tsl t; @@ -279,12 +281,13 @@ static void afl_wait_tsl(CPUArchState *env, int fd) { /* Broken pipe means it's time to return to the fork server routine. */ - if (read(fd, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl)) - break; + if (read(fd, &t, sizeof(struct afl_tsl)) != sizeof(struct afl_tsl)) break; tb_find_slow(env, t.pc, t.cs_base, t.flags); + } close(fd); + } diff --git a/unicorn_mode/patches/afl-unicorn-cpu-translate-inl.h b/unicorn_mode/patches/afl-unicorn-cpu-translate-inl.h new file mode 100644 index 00000000..7c84058f --- /dev/null +++ b/unicorn_mode/patches/afl-unicorn-cpu-translate-inl.h @@ -0,0 +1,57 @@ +/* + american fuzzy lop++ - unicorn instrumentation + ---------------------------------------------- + + Originally written by Andrew Griffiths <agriffiths@google.com> and + Michal Zalewski <lcamtuf@google.com> + + Adapted for afl-unicorn by Dominik Maier <mail@dmnk.co> + + CompareCoverage and NeverZero counters by Andrea Fioraldi + <andreafioraldi@gmail.com> + + Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + This code is a shim patched into the separately-distributed source + code of Unicorn 1.0.1. It leverages the built-in QEMU tracing functionality + to implement AFL-style instrumentation and to take care of the remaining + parts of the AFL fork server logic. + + The resulting libunicorn binary is essentially a standalone instrumentation + tool; for an example of how to leverage it for other purposes, you can + have a look at afl-showmap.c. + + */ + +#include "../../config.h" + +static void afl_gen_compcov(TCGContext *s, uint64_t cur_loc, TCGv_i64 arg1, + TCGv_i64 arg2, TCGMemOp ot, int is_imm) { + + if (!s->uc->afl_compcov_level || !s->uc->afl_area_ptr) return; + + if (!is_imm && s->uc->afl_compcov_level < 2) return; + + cur_loc = (cur_loc >> 4) ^ (cur_loc << 8); + cur_loc &= MAP_SIZE - 7; + + if (cur_loc >= s->uc->afl_inst_rms) return; + + switch (ot) { + + case MO_64: gen_afl_compcov_log_64(s, cur_loc, arg1, arg2); break; + case MO_32: gen_afl_compcov_log_32(s, cur_loc, arg1, arg2); break; + case MO_16: gen_afl_compcov_log_16(s, cur_loc, arg1, arg2); break; + default: return; + + } + +} + diff --git a/unicorn_mode/patches/afl-unicorn-tcg-op-inl.h b/unicorn_mode/patches/afl-unicorn-tcg-op-inl.h new file mode 100644 index 00000000..d21bbcc7 --- /dev/null +++ b/unicorn_mode/patches/afl-unicorn-tcg-op-inl.h @@ -0,0 +1,59 @@ +/* + american fuzzy lop++ - unicorn instrumentation + ---------------------------------------------- + + Originally written by Andrew Griffiths <agriffiths@google.com> and + Michal Zalewski <lcamtuf@google.com> + + Adapted for afl-unicorn by Dominik Maier <mail@dmnk.co> + + CompareCoverage and NeverZero counters by Andrea Fioraldi + <andreafioraldi@gmail.com> + + Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + This code is a shim patched into the separately-distributed source + code of Unicorn 1.0.1. It leverages the built-in QEMU tracing functionality + to implement AFL-style instrumentation and to take care of the remaining + parts of the AFL fork server logic. + + The resulting libunicorn binary is essentially a standalone instrumentation + tool; for an example of how to leverage it for other purposes, you can + have a look at afl-showmap.c. + + */ + +static inline void gen_afl_compcov_log_16(TCGContext *tcg_ctx, uint64_t cur_loc, + TCGv_i64 arg1, TCGv_i64 arg2) { + + TCGv_ptr tuc = tcg_const_ptr(tcg_ctx, tcg_ctx->uc); + TCGv_i64 tcur_loc = tcg_const_i64(tcg_ctx, cur_loc); + gen_helper_afl_compcov_log_16(tcg_ctx, tuc, tcur_loc, arg1, arg2); + +} + +static inline void gen_afl_compcov_log_32(TCGContext *tcg_ctx, uint64_t cur_loc, + TCGv_i64 arg1, TCGv_i64 arg2) { + + TCGv_ptr tuc = tcg_const_ptr(tcg_ctx, tcg_ctx->uc); + TCGv_i64 tcur_loc = tcg_const_i64(tcg_ctx, cur_loc); + gen_helper_afl_compcov_log_32(tcg_ctx, tuc, tcur_loc, arg1, arg2); + +} + +static inline void gen_afl_compcov_log_64(TCGContext *tcg_ctx, uint64_t cur_loc, + TCGv_i64 arg1, TCGv_i64 arg2) { + + TCGv_ptr tuc = tcg_const_ptr(tcg_ctx, tcg_ctx->uc); + TCGv_i64 tcur_loc = tcg_const_i64(tcg_ctx, cur_loc); + gen_helper_afl_compcov_log_64(tcg_ctx, tuc, tcur_loc, arg1, arg2); + +} + diff --git a/unicorn_mode/patches/afl-unicorn-tcg-runtime-inl.h b/unicorn_mode/patches/afl-unicorn-tcg-runtime-inl.h new file mode 100644 index 00000000..95e68302 --- /dev/null +++ b/unicorn_mode/patches/afl-unicorn-tcg-runtime-inl.h @@ -0,0 +1,106 @@ +/* + american fuzzy lop++ - unicorn instrumentation + ---------------------------------------------- + + Originally written by Andrew Griffiths <agriffiths@google.com> and + Michal Zalewski <lcamtuf@google.com> + + Adapted for afl-unicorn by Dominik Maier <mail@dmnk.co> + + CompareCoverage and NeverZero counters by Andrea Fioraldi + <andreafioraldi@gmail.com> + + Copyright 2015, 2016, 2017 Google Inc. All rights reserved. + Copyright 2019 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + + This code is a shim patched into the separately-distributed source + code of Unicorn 1.0.1. It leverages the built-in QEMU tracing functionality + to implement AFL-style instrumentation and to take care of the remaining + parts of the AFL fork server logic. + + The resulting libunicorn binary is essentially a standalone instrumentation + tool; for an example of how to leverage it for other purposes, you can + have a look at afl-showmap.c. + + */ + +#include "uc_priv.h" +#include "afl-unicorn-common.h" + +void HELPER(afl_compcov_log_16)(void* uc_ptr, uint64_t cur_loc, uint64_t arg1, + uint64_t arg2) { + + u8* afl_area_ptr = ((struct uc_struct*)uc_ptr)->afl_area_ptr; + + if ((arg1 & 0xff) == (arg2 & 0xff)) { INC_AFL_AREA(cur_loc); } + +} + +void HELPER(afl_compcov_log_32)(void* uc_ptr, uint64_t cur_loc, uint64_t arg1, + uint64_t arg2) { + + u8* afl_area_ptr = ((struct uc_struct*)uc_ptr)->afl_area_ptr; + + if ((arg1 & 0xff) == (arg2 & 0xff)) { + + INC_AFL_AREA(cur_loc); + if ((arg1 & 0xffff) == (arg2 & 0xffff)) { + + INC_AFL_AREA(cur_loc + 1); + if ((arg1 & 0xffffff) == (arg2 & 0xffffff)) { INC_AFL_AREA(cur_loc + 2); } + + } + + } + +} + +void HELPER(afl_compcov_log_64)(void* uc_ptr, uint64_t cur_loc, uint64_t arg1, + uint64_t arg2) { + + u8* afl_area_ptr = ((struct uc_struct*)uc_ptr)->afl_area_ptr; + + if ((arg1 & 0xff) == (arg2 & 0xff)) { + + INC_AFL_AREA(cur_loc); + if ((arg1 & 0xffff) == (arg2 & 0xffff)) { + + INC_AFL_AREA(cur_loc + 1); + if ((arg1 & 0xffffff) == (arg2 & 0xffffff)) { + + INC_AFL_AREA(cur_loc + 2); + if ((arg1 & 0xffffffff) == (arg2 & 0xffffffff)) { + + INC_AFL_AREA(cur_loc + 3); + if ((arg1 & 0xffffffffff) == (arg2 & 0xffffffffff)) { + + INC_AFL_AREA(cur_loc + 4); + if ((arg1 & 0xffffffffffff) == (arg2 & 0xffffffffffff)) { + + INC_AFL_AREA(cur_loc + 5); + if ((arg1 & 0xffffffffffffff) == (arg2 & 0xffffffffffffff)) { + + INC_AFL_AREA(cur_loc + 6); + + } + + } + + } + + } + + } + + } + + } + +} + diff --git a/unicorn_mode/patches/compcov.diff b/unicorn_mode/patches/compcov.diff new file mode 100644 index 00000000..8ec867d1 --- /dev/null +++ b/unicorn_mode/patches/compcov.diff @@ -0,0 +1,113 @@ +diff --git a/include/uc_priv.h b/include/uc_priv.h +index 22f494e..1aa7b3a 100644 +--- a/include/uc_priv.h ++++ b/include/uc_priv.h +@@ -245,6 +245,12 @@ struct uc_struct { + uint32_t target_page_align; + uint64_t next_pc; // save next PC for some special cases + bool hook_insert; // insert new hook at begin of the hook list (append by default) ++ ++#ifdef UNICORN_AFL ++ unsigned char *afl_area_ptr; ++ int afl_compcov_level; ++ unsigned int afl_inst_rms; ++#endif + }; + + // Metadata stub for the variable-size cpu context used with uc_context_*() +diff --git a/qemu/target-i386/translate.c b/qemu/target-i386/translate.c +index 36fae09..196d346 100644 +--- a/qemu/target-i386/translate.c ++++ b/qemu/target-i386/translate.c +@@ -33,6 +33,12 @@ + + #include "uc_priv.h" + ++#if defined(UNICORN_AFL) ++#include "../../afl-unicorn-cpu-translate-inl.h" ++#else ++#define afl_gen_compcov(a,b,c,d,e,f) do {} while (0) ++#endif ++ + #define PREFIX_REPZ 0x01 + #define PREFIX_REPNZ 0x02 + #define PREFIX_LOCK 0x04 +@@ -1555,6 +1561,7 @@ static void gen_op(DisasContext *s, int op, TCGMemOp ot, int d) + case OP_SUBL: + tcg_gen_mov_tl(tcg_ctx, cpu_cc_srcT, *cpu_T[0]); + tcg_gen_sub_tl(tcg_ctx, *cpu_T[0], *cpu_T[0], *cpu_T[1]); ++ afl_gen_compcov(tcg_ctx, s->pc, *cpu_T[0], *cpu_T[1], ot, d == OR_EAX); + gen_op_st_rm_T0_A0(s, ot, d); + gen_op_update2_cc(tcg_ctx); + set_cc_op(s, CC_OP_SUBB + ot); +@@ -1582,6 +1589,7 @@ static void gen_op(DisasContext *s, int op, TCGMemOp ot, int d) + tcg_gen_mov_tl(tcg_ctx, cpu_cc_src, *cpu_T[1]); + tcg_gen_mov_tl(tcg_ctx, cpu_cc_srcT, *cpu_T[0]); + tcg_gen_sub_tl(tcg_ctx, cpu_cc_dst, *cpu_T[0], *cpu_T[1]); ++ afl_gen_compcov(tcg_ctx, s->pc, *cpu_T[0], *cpu_T[1], ot, d == OR_EAX); + set_cc_op(s, CC_OP_SUBB + ot); + break; + } +diff --git a/qemu/tcg-runtime.c b/qemu/tcg-runtime.c +index 21b022a..14d7891 100644 +--- a/qemu/tcg-runtime.c ++++ b/qemu/tcg-runtime.c +@@ -31,9 +31,14 @@ + + #define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) \ + dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2)); ++#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) \ ++ dh_ctype(ret) HELPER(name) (dh_ctype(t1), dh_ctype(t2), dh_ctype(t3), dh_ctype(t4)); + + #include "tcg-runtime.h" + ++#ifdef UNICORN_AFL ++#include "../afl-unicorn-tcg-runtime-inl.h" ++#endif + + /* 32-bit helpers */ + +diff --git a/qemu/tcg/tcg-op.h b/qemu/tcg/tcg-op.h +index 38b7dd9..c5a9af9 100644 +--- a/qemu/tcg/tcg-op.h ++++ b/qemu/tcg/tcg-op.h +@@ -27,6 +27,10 @@ + + int gen_new_label(TCGContext *); + ++#ifdef UNICORN_AFL ++#include "../../afl-unicorn-tcg-op-inl.h" ++#endif ++ + static inline void gen_uc_tracecode(TCGContext *tcg_ctx, int32_t size, int32_t type, void *uc, uint64_t pc) + { + TCGv_i32 tsize = tcg_const_i32(tcg_ctx, size); +diff --git a/qemu/tcg/tcg-runtime.h b/qemu/tcg/tcg-runtime.h +index 23a0c37..90b993c 100644 +--- a/qemu/tcg/tcg-runtime.h ++++ b/qemu/tcg/tcg-runtime.h +@@ -14,3 +14,9 @@ DEF_HELPER_FLAGS_2(sar_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64) + + DEF_HELPER_FLAGS_2(mulsh_i64, TCG_CALL_NO_RWG_SE, s64, s64, s64) + DEF_HELPER_FLAGS_2(muluh_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64) ++ ++#ifdef UNICORN_AFL ++DEF_HELPER_FLAGS_4(afl_compcov_log_16, 0, void, ptr, i64, i64, i64) ++DEF_HELPER_FLAGS_4(afl_compcov_log_32, 0, void, ptr, i64, i64, i64) ++DEF_HELPER_FLAGS_4(afl_compcov_log_64, 0, void, ptr, i64, i64, i64) ++#endif +diff --git a/qemu/unicorn_common.h b/qemu/unicorn_common.h +index 8dcbb3e..11e18b4 100644 +--- a/qemu/unicorn_common.h ++++ b/qemu/unicorn_common.h +@@ -84,6 +84,10 @@ static inline void uc_common_init(struct uc_struct* uc) + + if (!uc->release) + uc->release = release_common; ++ ++#ifdef UNICORN_AFL ++ uc->afl_area_ptr = 0; ++#endif + } + + #endif diff --git a/unicorn_mode/samples/compcov_x64/COMPILE.md b/unicorn_mode/samples/compcov_x64/COMPILE.md new file mode 100644 index 00000000..35de7ad8 --- /dev/null +++ b/unicorn_mode/samples/compcov_x64/COMPILE.md @@ -0,0 +1,19 @@ +# Compiling compcov_target.c + +compcov_target.c was compiled without optimization, position-independent, +and without standard libraries using the following command line: + +``` +gcc -o compcov_target.elf compcov_target.c -fPIC -O0 -nostdlib +``` + +The .text section from the resulting ELF binary was then extracted to create +the raw binary blob that is loaded and emulated by compcov_test_harness.py: + +``` +objcopy -O binary --only-section=.text compcov_target.elf compcov_target.bin +``` + +Note that the output of this is padded with nulls for 16-byte alignment. This is +important when emulating it, as NOPs will be added after the return of main() +as necessary. diff --git a/unicorn_mode/samples/compcov_x64/compcov_target.bin b/unicorn_mode/samples/compcov_x64/compcov_target.bin new file mode 100644 index 00000000..091bf1db --- /dev/null +++ b/unicorn_mode/samples/compcov_x64/compcov_target.bin Binary files differdiff --git a/unicorn_mode/samples/compcov_x64/compcov_target.c b/unicorn_mode/samples/compcov_x64/compcov_target.c new file mode 100644 index 00000000..eb1205b1 --- /dev/null +++ b/unicorn_mode/samples/compcov_x64/compcov_target.c @@ -0,0 +1,28 @@ +/* + * Sample target file to test afl-unicorn fuzzing capabilities. + * This is a very trivial example that will crash pretty easily + * in several different exciting ways. + * + * Input is assumed to come from a buffer located at DATA_ADDRESS + * (0x00300000), so make sure that your Unicorn emulation of this + * puts user data there. + * + * Written by Andrea Fioraldi + */ + +// Magic address where mutated data will be placed +#define DATA_ADDRESS 0x00300000 + +int main(void) { + unsigned int *data_buf = (unsigned int *) DATA_ADDRESS; + + if (data_buf[0] == 0xabadcafe) { + // Cause an 'invalid read' crash if data[0..3] == '\x01\x02\x03\x04' + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } else if (data_buf[1] == data_buf[2] + 0x4141) { + // Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2] + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } + + return 0; +} diff --git a/unicorn_mode/samples/compcov_x64/compcov_target.elf b/unicorn_mode/samples/compcov_x64/compcov_target.elf new file mode 100755 index 00000000..7015fb46 --- /dev/null +++ b/unicorn_mode/samples/compcov_x64/compcov_target.elf Binary files differdiff --git a/unicorn_mode/samples/compcov_x64/compcov_test_harness.py b/unicorn_mode/samples/compcov_x64/compcov_test_harness.py new file mode 100644 index 00000000..5698cbc8 --- /dev/null +++ b/unicorn_mode/samples/compcov_x64/compcov_test_harness.py @@ -0,0 +1,170 @@ +""" + Simple test harness for AFL's Unicorn Mode. + + This loads the compcov_target.bin binary (precompiled as MIPS code) into + Unicorn's memory map for emulation, places the specified input into + compcov_target's buffer (hardcoded to be at 0x300000), and executes 'main()'. + If any crashes occur during emulation, this script throws a matching signal + to tell AFL that a crash occurred. + + Run under AFL as follows: + + $ cd <afl_path>/unicorn_mode/samples/simple/ + $ ../../../afl-fuzz -U -m none -i ./sample_inputs -o ./output -- python compcov_test_harness.py @@ +""" + +import argparse +import os +import signal + +from unicorn import * +from unicorn.x86_const import * + +# Path to the file containing the binary to emulate +BINARY_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'compcov_target.bin') + +# Memory map for the code to be tested +CODE_ADDRESS = 0x00100000 # Arbitrary address where code to test will be loaded +CODE_SIZE_MAX = 0x00010000 # Max size for the code (64kb) +STACK_ADDRESS = 0x00200000 # Address of the stack (arbitrarily chosen) +STACK_SIZE = 0x00010000 # Size of the stack (arbitrarily chosen) +DATA_ADDRESS = 0x00300000 # Address where mutated data will be placed +DATA_SIZE_MAX = 0x00010000 # Maximum allowable size of mutated data + +try: + # If Capstone is installed then we'll dump disassembly, otherwise just dump the binary. + from capstone import * + cs = Cs(CS_ARCH_X86, CS_MODE_64) + def unicorn_debug_instruction(uc, address, size, user_data): + mem = uc.mem_read(address, size) + for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size): + print(" Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr)) +except ImportError: + def unicorn_debug_instruction(uc, address, size, user_data): + print(" Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size)) + +def unicorn_debug_block(uc, address, size, user_data): + print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size)) + +def unicorn_debug_mem_access(uc, access, address, size, value, user_data): + if access == UC_MEM_WRITE: + print(" >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value)) + else: + print(" >>> Read: addr=0x{0:016x} size={1}".format(address, size)) + +def unicorn_debug_mem_invalid_access(uc, access, address, size, value, user_data): + if access == UC_MEM_WRITE_UNMAPPED: + print(" >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value)) + else: + print(" >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size)) + +def force_crash(uc_error): + # This function should be called to indicate to AFL that a crash occurred during emulation. + # Pass in the exception received from Uc.emu_start() + mem_errors = [ + UC_ERR_READ_UNMAPPED, UC_ERR_READ_PROT, UC_ERR_READ_UNALIGNED, + UC_ERR_WRITE_UNMAPPED, UC_ERR_WRITE_PROT, UC_ERR_WRITE_UNALIGNED, + UC_ERR_FETCH_UNMAPPED, UC_ERR_FETCH_PROT, UC_ERR_FETCH_UNALIGNED, + ] + if uc_error.errno in mem_errors: + # Memory error - throw SIGSEGV + os.kill(os.getpid(), signal.SIGSEGV) + elif uc_error.errno == UC_ERR_INSN_INVALID: + # Invalid instruction - throw SIGILL + os.kill(os.getpid(), signal.SIGILL) + else: + # Not sure what happened - throw SIGABRT + os.kill(os.getpid(), signal.SIGABRT) + +def main(): + + parser = argparse.ArgumentParser(description="Test harness for compcov_target.bin") + parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input to load") + parser.add_argument('-d', '--debug', default=False, action="store_true", help="Enables debug tracing") + args = parser.parse_args() + + # Instantiate a MIPS32 big endian Unicorn Engine instance + uc = Uc(UC_ARCH_X86, UC_MODE_64) + + if args.debug: + uc.hook_add(UC_HOOK_BLOCK, unicorn_debug_block) + uc.hook_add(UC_HOOK_CODE, unicorn_debug_instruction) + uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, unicorn_debug_mem_access) + uc.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, unicorn_debug_mem_invalid_access) + + #--------------------------------------------------- + # Load the binary to emulate and map it into memory + + print("Loading data input from {}".format(args.input_file)) + binary_file = open(BINARY_FILE, 'rb') + binary_code = binary_file.read() + binary_file.close() + + # Apply constraints to the mutated input + if len(binary_code) > CODE_SIZE_MAX: + print("Binary code is too large (> {} bytes)".format(CODE_SIZE_MAX)) + return + + # Write the mutated command into the data buffer + uc.mem_map(CODE_ADDRESS, CODE_SIZE_MAX) + uc.mem_write(CODE_ADDRESS, binary_code) + + # Set the program counter to the start of the code + start_address = CODE_ADDRESS # Address of entry point of main() + end_address = CODE_ADDRESS + 0x55 # Address of last instruction in main() + uc.reg_write(UC_X86_REG_RIP, start_address) + + #----------------- + # Setup the stack + + uc.mem_map(STACK_ADDRESS, STACK_SIZE) + uc.reg_write(UC_X86_REG_RSP, STACK_ADDRESS + STACK_SIZE) + + #----------------------------------------------------- + # Emulate 1 instruction to kick off AFL's fork server + # THIS MUST BE DONE BEFORE LOADING USER DATA! + # If this isn't done every single run, the AFL fork server + # will not be started appropriately and you'll get erratic results! + # It doesn't matter what this returns with, it just has to execute at + # least one instruction in order to get the fork server started. + + # Execute 1 instruction just to startup the forkserver + print("Starting the AFL forkserver by executing 1 instruction") + try: + uc.emu_start(uc.reg_read(UC_X86_REG_RIP), 0, 0, count=1) + except UcError as e: + print("ERROR: Failed to execute a single instruction (error: {})!".format(e)) + return + + #----------------------------------------------- + # Load the mutated input and map it into memory + + # Load the mutated input from disk + print("Loading data input from {}".format(args.input_file)) + input_file = open(args.input_file, 'rb') + input = input_file.read() + input_file.close() + + # Apply constraints to the mutated input + if len(input) > DATA_SIZE_MAX: + print("Test input is too long (> {} bytes)".format(DATA_SIZE_MAX)) + return + + # Write the mutated command into the data buffer + uc.mem_map(DATA_ADDRESS, DATA_SIZE_MAX) + uc.mem_write(DATA_ADDRESS, input) + + #------------------------------------------------------------ + # Emulate the code, allowing it to process the mutated input + + print("Executing until a crash or execution reaches 0x{0:016x}".format(end_address)) + try: + result = uc.emu_start(uc.reg_read(UC_X86_REG_RIP), end_address, timeout=0, count=0) + except UcError as e: + print("Execution failed with error: {}".format(e)) + force_crash(e) + + print("Done.") + +if __name__ == "__main__": + main() diff --git a/unicorn_mode/samples/compcov_x64/sample_inputs/sample1.bin b/unicorn_mode/samples/compcov_x64/sample_inputs/sample1.bin new file mode 100644 index 00000000..445c7245 --- /dev/null +++ b/unicorn_mode/samples/compcov_x64/sample_inputs/sample1.bin @@ -0,0 +1 @@ +00000000000000000000000000000000 \ No newline at end of file diff --git a/unicorn_mode/samples/simple/COMPILE.md b/unicorn_mode/samples/simple/COMPILE.md index bd4a66c6..f7bf5b50 100644 --- a/unicorn_mode/samples/simple/COMPILE.md +++ b/unicorn_mode/samples/simple/COMPILE.md @@ -1,5 +1,4 @@ -Compiling simple_target.c -========================== +# Compiling simple_target.c You shouldn't need to compile simple_target.c since a MIPS binary version is pre-built and shipped with afl-unicorn. This file documents how the binary @@ -38,4 +37,4 @@ mips-linux-gnu-gcc -o simple_target.elf simple_target.c -fPIC -O0 -nostdlib Note that the output of this is padded with nulls for 16-byte alignment. This is important when emulating it, as NOPs will be added after the return of main() -as necessary. \ No newline at end of file +as necessary. |