diff options
Diffstat (limited to 'llvm_mode')
-rw-r--r-- | llvm_mode/GNUmakefile | 16 | ||||
-rw-r--r-- | llvm_mode/LLVMInsTrim.so.cc | 18 | ||||
-rw-r--r-- | llvm_mode/Makefile | 2 | ||||
-rw-r--r-- | llvm_mode/README.instrim.md | 1 | ||||
-rw-r--r-- | llvm_mode/README.laf-intel.md | 3 | ||||
-rw-r--r-- | llvm_mode/README.lto.md | 65 | ||||
-rw-r--r-- | llvm_mode/README.md | 108 | ||||
-rw-r--r-- | llvm_mode/README.persistent_mode.md | 167 | ||||
-rw-r--r-- | llvm_mode/afl-clang-fast.c | 124 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-lto-instrim.so.cc | 11 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-lto-instrumentation.so.cc | 11 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-lto-whitelist.so.cc | 125 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-pass.so.cc | 20 | ||||
-rw-r--r-- | llvm_mode/afl-llvm-rt.o.c | 109 | ||||
-rw-r--r-- | llvm_mode/cmplog-routines-pass.cc | 15 | ||||
-rw-r--r-- | llvm_mode/compare-transform-pass.so.cc | 152 |
16 files changed, 617 insertions, 330 deletions
diff --git a/llvm_mode/GNUmakefile b/llvm_mode/GNUmakefile index 01c83787..50a6be2b 100644 --- a/llvm_mode/GNUmakefile +++ b/llvm_mode/GNUmakefile @@ -32,7 +32,7 @@ ifeq "$(shell uname)" "OpenBSD" LLVM_CONFIG ?= $(BIN_PATH)/llvm-config HAS_OPT = $(shell test -x $(BIN_PATH)/opt && echo 0 || echo 1) ifeq "$(HAS_OPT)" "1" - $(error llvm_mode needs a complete llvm installation (versions 3.8.0 up to 11) -> e.g. "pkg_add llvm-7.0.1p9") + $(error llvm_mode needs a complete llvm installation (versions 3.4 up to 11) -> e.g. "pkg_add llvm-7.0.1p9") endif else LLVM_CONFIG ?= llvm-config @@ -53,7 +53,7 @@ ifeq "$(LLVMVER)" "" endif ifeq "$(LLVM_UNSUPPORTED)" "1" - $(warning llvm_mode only supports llvm versions 3.8.0 up to 11) + $(warning llvm_mode only supports llvm versions 3.4 up to 11) endif ifeq "$(LLVM_MAJOR)" "9" @@ -160,9 +160,9 @@ endif # After we set CC/CXX we can start makefile magic tests -ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" - CFLAGS_OPT = -march=native -endif +#ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" +# CFLAGS_OPT = -march=native +#endif ifeq "$(shell echo 'int main() {return 0; }' | $(CLANG_BIN) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" AFL_CLANG_FLTO ?= -flto=full @@ -355,15 +355,15 @@ endif $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) afl-llvm-common.o ../afl-llvm-rt.o: afl-llvm-rt.o.c | test_deps - $(CC) $(CFLAGS) -Wno-unused-result -fPIC -c $< -o $@ + $(CLANG_BIN) $(CFLAGS) -Wno-unused-result -fPIC -c $< -o $@ ../afl-llvm-rt-32.o: afl-llvm-rt.o.c | test_deps @printf "[*] Building 32-bit variant of the runtime (-m32)... " - @$(CC) $(CFLAGS) -Wno-unused-result -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi + @$(CC_SAVE) $(CFLAGS) -Wno-unused-result -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi ../afl-llvm-rt-64.o: afl-llvm-rt.o.c | test_deps @printf "[*] Building 64-bit variant of the runtime (-m64)... " - @$(CC) $(CFLAGS) -Wno-unused-result -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi + @$(CC_SAVE) $(CFLAGS) -Wno-unused-result -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi test_build: $(PROGS) @echo "[*] Testing the CC wrapper and instrumentation output..." diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc index 7dc96bc3..ced1f383 100644 --- a/llvm_mode/LLVMInsTrim.so.cc +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -160,21 +160,23 @@ struct InsTrim : public ModulePass { else #else if (ngram_size_str) -#ifdef LLVM_VERSION_STRING + #ifdef LLVM_VERSION_STRING FATAL( "Sorry, NGRAM branch coverage is not supported with llvm version %s!", LLVM_VERSION_STRING); -#else -#ifndef LLVM_VERSION_PATCH + #else + #ifndef LLVM_VERSION_PATCH FATAL( - "Sorry, NGRAM branch coverage is not supported with llvm version %d.%d.%d!", + "Sorry, NGRAM branch coverage is not supported with llvm version " + "%d.%d.%d!", LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, 0); -#else + #else FATAL( - "Sorry, NGRAM branch coverage is not supported with llvm version %d.%d.%d!", + "Sorry, NGRAM branch coverage is not supported with llvm version " + "%d.%d.%d!", LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERISON_PATCH); -#endif -#endif + #endif + #endif #endif PrevLocSize = 1; diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile index 0b306dde..3666a74d 100644 --- a/llvm_mode/Makefile +++ b/llvm_mode/Makefile @@ -1,2 +1,2 @@ all: - @echo please use GNU make, thanks! + @gmake all || echo please install GNUmake diff --git a/llvm_mode/README.instrim.md b/llvm_mode/README.instrim.md index b905af11..53a518a9 100644 --- a/llvm_mode/README.instrim.md +++ b/llvm_mode/README.instrim.md @@ -6,6 +6,7 @@ InsTrim: Lightweight Instrumentation for Coverage-guided Fuzzing InsTrim uses CFG and markers to instrument just what is necessary in the binary in llvm_mode. It is about 10-15% faster without disadvantages. +It requires at least llvm version 3.8.0. ## Usage diff --git a/llvm_mode/README.laf-intel.md b/llvm_mode/README.laf-intel.md index 462c7bac..2fa4bc26 100644 --- a/llvm_mode/README.laf-intel.md +++ b/llvm_mode/README.laf-intel.md @@ -37,3 +37,6 @@ series of sign, exponent and mantissa comparisons followed by splitting each of them into 8 bit comparisons when necessary. It is activated with the `AFL_LLVM_LAF_SPLIT_FLOATS` setting, available only when `AFL_LLVM_LAF_SPLIT_COMPARES` is set. + +You can also set `AFL_LLVM_LAF_ALL` and have all of the above enabled :-) + diff --git a/llvm_mode/README.lto.md b/llvm_mode/README.lto.md index fa5b8665..48c587eb 100644 --- a/llvm_mode/README.lto.md +++ b/llvm_mode/README.lto.md @@ -6,8 +6,6 @@ This version requires a current llvm 11 compiled from the github master. 1. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better coverage than anything else that is out there in the AFL world - 1a. Set AFL_LLVM_INSTRUMENT=CFG if you want the InsTrimLTO version - (recommended) 2. You can use it together with llvm_mode: laf-intel and whitelisting features and can be combined with cmplog/Redqueen @@ -20,7 +18,6 @@ This version requires a current llvm 11 compiled from the github master. note that if that target uses _init functions or early constructors then also set `AFL_LLVM_MAP_DYNAMIC=1` as your target will crash otherwise - ## Introduction and problem description A big issue with how afl/afl++ works is that the basic block IDs that are @@ -50,7 +47,8 @@ and many dead ends until we got to this: The result: * 10-25% speed gain compared to llvm_mode * guaranteed non-colliding edge coverage :-) - * The compile time especially for libraries can be longer + * The compile time especially for binaries to an instrumented library can be + much longer Example build output from a libtiff build: ``` @@ -61,24 +59,46 @@ AUTODICTIONARY: 11 strings found [+] Instrumented 12071 locations with no collisions (on average 1046 collisions would be in afl-gcc/afl-clang-fast) (non-hardened mode). ``` -## Building llvm 11 +## Getting llvm 11 + +### Installing llvm 11 +Installing the llvm snapshot builds is easy and mostly painless: + +In the follow line change `NAME` for your Debian or Ubuntu release name +(e.g. buster, focal, eon, etc.): +``` +echo deb http://apt.llvm.org/NAME/ llvm-toolchain-NAME NAME >> /etc/apt/sources.list +``` +then add the pgp key of llvm and install the packages: +``` +wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - +apt-get update && apt-get upgrade -y +apt-get install -y clang-11 clang-tools-11 libc++1-11 libc++-11-dev \ + libc++abi1-11 libc++abi-11-dev libclang1-11 libclang-11-dev \ + libclang-common-11-dev libclang-cpp11 libclang-cpp11-dev liblld-11 \ + liblld-11-dev liblldb-11 liblldb-11-dev libllvm11 libomp-11-dev \ + libomp5-11 lld-11 lldb-11 llvm-11 llvm-11-dev llvm-11-runtime llvm-11-tools +``` + +### Building llvm 11 +Building llvm from github takes quite some long time and is not painless: ``` -$ sudo apt install binutils-dev # this is *essential*! -$ git clone https://github.com/llvm/llvm-project -$ cd llvm-project -$ mkdir build -$ cd build -$ cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/ -$ make -j $(nproc) -$ export PATH=`pwd`/bin:$PATH -$ export LLVM_CONFIG=`pwd`/bin/llvm-config -$ cd /path/to/AFLplusplus/ -$ make -$ cd llvm_mode -$ make -$ cd .. -$ make install +sudo apt install binutils-dev # this is *essential*! +git clone https://github.com/llvm/llvm-project +cd llvm-project +mkdir build +cd build +cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/ +make -j $(nproc) +export PATH=`pwd`/bin:$PATH +export LLVM_CONFIG=`pwd`/bin/llvm-config +cd /path/to/AFLplusplus/ +make +cd llvm_mode +make +cd .. +make install ``` ## How to use afl-clang-lto @@ -160,11 +180,6 @@ target will likely crash when started. This can be avoided by compiling with This can e.g. happen with OpenSSL. -## Upcoming Work - -1. Currently the LTO whitelist feature does not allow to instrument main, - start and init functions - ## History This was originally envisioned by hexcoder- in Summer 2019, however we saw no diff --git a/llvm_mode/README.md b/llvm_mode/README.md index 0bff1ff1..c24aef49 100644 --- a/llvm_mode/README.md +++ b/llvm_mode/README.md @@ -6,7 +6,7 @@ ## 1) Introduction -! llvm_mode works with llvm versions 3.8.0 up to 11 ! +! llvm_mode works with llvm versions 3.4 up to 11 ! The code in this directory allows you to instrument programs for AFL using true compiler-level instrumentation, instead of the more crude @@ -35,7 +35,7 @@ Once this implementation is shown to be sufficiently robust and portable, it will probably replace afl-clang. For now, it can be built separately and co-exists with the original code. -The idea and much of the implementation comes from Laszlo Szekeres. +The idea and much of the intial implementation came from Laszlo Szekeres. ## 2a) How to use this - short @@ -56,6 +56,8 @@ LLVM_CONFIG=llvm-config-7 REAL_CC=gcc REAL_CXX=g++ make It is highly recommended to use the newest clang version you can put your hands on :) +Then look at [README.persistent_mode.md](README.persistent_mode.md). + ## 2b) How to use this - long In order to leverage this mechanism, you need to have clang installed on your @@ -159,96 +161,13 @@ See [README.snapshot](README.snapshot.md) This is an early-stage mechanism, so field reports are welcome. You can send bug reports to <afl-users@googlegroups.com>. -## 6) Bonus feature #1: deferred initialization - -AFL tries to optimize performance by executing the targeted binary just once, -stopping it just before main(), and then cloning this "master" process to get -a steady supply of targets to fuzz. - -Although this approach eliminates much of the OS-, linker- and libc-level -costs of executing the program, it does not always help with binaries that -perform other time-consuming initialization steps - say, parsing a large config -file before getting to the fuzzed data. - -In such cases, it's beneficial to initialize the forkserver a bit later, once -most of the initialization work is already done, but before the binary attempts -to read the fuzzed input and parse it; in some cases, this can offer a 10x+ -performance gain. You can implement delayed initialization in LLVM mode in a -fairly simple way. - -First, find a suitable location in the code where the delayed cloning can -take place. This needs to be done with *extreme* care to avoid breaking the -binary. In particular, the program will probably malfunction if you select -a location after: - - - The creation of any vital threads or child processes - since the forkserver - can't clone them easily. - - - The initialization of timers via setitimer() or equivalent calls. - - - The creation of temporary files, network sockets, offset-sensitive file - descriptors, and similar shared-state resources - but only provided that - their state meaningfully influences the behavior of the program later on. - - - Any access to the fuzzed input, including reading the metadata about its - size. - -With the location selected, add this code in the appropriate spot: - -```c -#ifdef __AFL_HAVE_MANUAL_CONTROL - __AFL_INIT(); -#endif -``` - -You don't need the #ifdef guards, but including them ensures that the program -will keep working normally when compiled with a tool other than afl-clang-fast. - -Finally, recompile the program with afl-clang-fast (afl-gcc or afl-clang will -*not* generate a deferred-initialization binary) - and you should be all set! - -## 7) Bonus feature #2: persistent mode - -Some libraries provide APIs that are stateless, or whose state can be reset in -between processing different input files. When such a reset is performed, a -single long-lived process can be reused to try out multiple test cases, -eliminating the need for repeated fork() calls and the associated OS overhead. - -The basic structure of the program that does this would be: - -```c - while (__AFL_LOOP(1000)) { - - /* Read input data. */ - /* Call library code to be fuzzed. */ - /* Reset state. */ - - } - - /* Exit normally */ -``` - -The numerical value specified within the loop controls the maximum number -of iterations before AFL will restart the process from scratch. This minimizes -the impact of memory leaks and similar glitches; 1000 is a good starting point, -and going much higher increases the likelihood of hiccups without giving you -any real performance benefits. - -A more detailed template is shown in ../examples/persistent_demo/. -Similarly to the previous mode, the feature works only with afl-clang-fast; #ifdef -guards can be used to suppress it when using other compilers. - -Note that as with the previous mode, the feature is easy to misuse; if you -do not fully reset the critical state, you may end up with false positives or -waste a whole lot of CPU power doing nothing useful at all. Be particularly -wary of memory leaks and of the state of file descriptors. +## 6) deferred initialization, persistent mode, shared memory fuzzing -PS. Because there are task switches still involved, the mode isn't as fast as -"pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot -faster than the normal fork() model, and compared to in-process fuzzing, -should be a lot more robust. +This is the most powerful and effective fuzzing you can do. +Please see [README.persistent_mode.md](README.persistent_mode.md) for a +full explanation. -## 8) Bonus feature #3: 'trace-pc-guard' mode +## 7) Bonus feature: 'trace-pc-guard' mode LLVM is shipping with a built-in execution tracing feature that provides AFL with the necessary tracing data without the need to @@ -260,11 +179,8 @@ If you have not an outdated compiler and want to give it a try, build targets this way: ``` - libtarget-1.0 $ AFL_LLVM_USE_TRACE_PC=1 make +AFL_LLVM_INSTRUMENT=PCGUARD make ``` -Note that this mode is about 20% slower than "vanilla" afl-clang-fast, -and about 5-10% slower than afl-clang. This is likely because the -instrumentation is not inlined, and instead involves a function call. -On systems that support it, compiling your target with -flto can help -a bit. +Note that this us currently the default, as it is the best mode. +If you have llvm 11 and compiled afl-clang-lto - this is the only better mode. diff --git a/llvm_mode/README.persistent_mode.md b/llvm_mode/README.persistent_mode.md new file mode 100644 index 00000000..7aae8faa --- /dev/null +++ b/llvm_mode/README.persistent_mode.md @@ -0,0 +1,167 @@ +# llvm_mode persistent mode + +## 1) Introduction + +The most effective way is to fuzz in persistent mode, as the speed can easily +be x10 or x20 times faster without any disadvanges. +*All professionel fuzzing is using this mode.* + +This requires that the target can be called in a (or several) function(s), +and that the state can be resetted so that multiple calls be be performed +without memory leaking and former runs having no impact on following runs +(this can be seen by the `stability` indicator in the `afl-fuzz` UI). + +Examples can be found in [examples/persistent_mode](../examples/persistent_mode). + +## 2) TLDR; + +Example `fuzz_target.c`: +``` +#include "what_you_need_for_your_target.h" + +__AFL_FUZZ_INIT(); + +main() { + +#ifdef __AFL_HAVE_MANUAL_CONTROL + __AFL_INIT(); +#endif + + unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; // must be after __AFL_INIT + + while (__AFL_LOOP(10000)) { + + int len = __AFL_FUZZ_TESTCASE_LEN; + if (len < 8) continue; // check for a required/useful minimum input length + + /* Setup function call, e.g. struct target *tmp = libtarget_init() */ + /* Call function to be fuzzed, e.g.: */ + target_function(buf, len); + /* Reset state. e.g. libtarget_free(tmp) */ + + } + + return 0; + +} +``` +And then compile: +``` +afl-clang-fast -o fuzz_target fuzz_target.c -lwhat_you_need_for_your_target +``` +And that is it! +The speed increase is usually x10 to x20. + +## 3) deferred initialization + +AFL tries to optimize performance by executing the targeted binary just once, +stopping it just before main(), and then cloning this "master" process to get +a steady supply of targets to fuzz. + +Although this approach eliminates much of the OS-, linker- and libc-level +costs of executing the program, it does not always help with binaries that +perform other time-consuming initialization steps - say, parsing a large config +file before getting to the fuzzed data. + +In such cases, it's beneficial to initialize the forkserver a bit later, once +most of the initialization work is already done, but before the binary attempts +to read the fuzzed input and parse it; in some cases, this can offer a 10x+ +performance gain. You can implement delayed initialization in LLVM mode in a +fairly simple way. + +First, find a suitable location in the code where the delayed cloning can +take place. This needs to be done with *extreme* care to avoid breaking the +binary. In particular, the program will probably malfunction if you select +a location after: + + - The creation of any vital threads or child processes - since the forkserver + can't clone them easily. + + - The initialization of timers via setitimer() or equivalent calls. + + - The creation of temporary files, network sockets, offset-sensitive file + descriptors, and similar shared-state resources - but only provided that + their state meaningfully influences the behavior of the program later on. + + - Any access to the fuzzed input, including reading the metadata about its + size. + +With the location selected, add this code in the appropriate spot: + +```c +#ifdef __AFL_HAVE_MANUAL_CONTROL + __AFL_INIT(); +#endif +``` + +You don't need the #ifdef guards, but including them ensures that the program +will keep working normally when compiled with a tool other than afl-clang-fast. + +Finally, recompile the program with afl-clang-fast (afl-gcc or afl-clang will +*not* generate a deferred-initialization binary) - and you should be all set! + +## 4) persistent mode + +Some libraries provide APIs that are stateless, or whose state can be reset in +between processing different input files. When such a reset is performed, a +single long-lived process can be reused to try out multiple test cases, +eliminating the need for repeated fork() calls and the associated OS overhead. + +The basic structure of the program that does this would be: + +```c + while (__AFL_LOOP(1000)) { + + /* Read input data. */ + /* Call library code to be fuzzed. */ + /* Reset state. */ + + } + + /* Exit normally */ +``` + +The numerical value specified within the loop controls the maximum number +of iterations before AFL will restart the process from scratch. This minimizes +the impact of memory leaks and similar glitches; 1000 is a good starting point, +and going much higher increases the likelihood of hiccups without giving you +any real performance benefits. + +A more detailed template is shown in ../examples/persistent_demo/. +Similarly to the previous mode, the feature works only with afl-clang-fast; #ifdef +guards can be used to suppress it when using other compilers. + +Note that as with the previous mode, the feature is easy to misuse; if you +do not fully reset the critical state, you may end up with false positives or +waste a whole lot of CPU power doing nothing useful at all. Be particularly +wary of memory leaks and of the state of file descriptors. + +PS. Because there are task switches still involved, the mode isn't as fast as +"pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot +faster than the normal fork() model, and compared to in-process fuzzing, +should be a lot more robust. + +## 5) shared memory fuzzing + +You can speed up the fuzzing process even more by receiving the fuzzing data +via shared memory instead of stdin or files. +This is a further speed multiplier of about 2x. + +Setting this up is very easy: + +After the includes set the following macro: + +``` +__AFL_FUZZ_INIT(); +``` +Directly at the start of main - or if you are using the deferred forkserver +with `__AFL_INIT()` then *after* `__AFL_INIT? : +``` + unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF; +``` + +Then as first line after the `__AFL_LOOP` while loop: +``` + int len = __AFL_FUZZ_TESTCASE_LEN; +``` +and that is all! diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c index 49dc6c1c..0b081ae6 100644 --- a/llvm_mode/afl-clang-fast.c +++ b/llvm_mode/afl-clang-fast.c @@ -45,30 +45,30 @@ static u32 cc_par_cnt = 1; /* Param count, including argv0 */ static u8 llvm_fullpath[PATH_MAX]; static u8 instrument_mode, instrument_opt_mode, ngram_size, lto_mode, cpp_mode; static u8 *lto_flag = AFL_CLANG_FLTO; -static u8 *march_opt = CFLAGS_OPT; static u8 debug; static u8 cwd[4096]; static u8 cmplog_mode; u8 use_stdin = 0; /* dummy */ +// static u8 *march_opt = CFLAGS_OPT; enum { - INSTRUMENT_CLASSIC = 0, - INSTRUMENT_AFL = 0, - INSTRUMENT_DEFAULT = 0, - INSTRUMENT_PCGUARD = 1, - INSTRUMENT_INSTRIM = 2, - INSTRUMENT_CFG = 2, - INSTRUMENT_LTO = 3, - INSTRUMENT_OPT_CTX = 4, - INSTRUMENT_OPT_NGRAM = 8 + INSTURMENT_DEFAULT = 0, + INSTRUMENT_CLASSIC = 1, + INSTRUMENT_AFL = 1, + INSTRUMENT_PCGUARD = 2, + INSTRUMENT_INSTRIM = 3, + INSTRUMENT_CFG = 3, + INSTRUMENT_LTO = 4, + INSTRUMENT_OPT_CTX = 8, + INSTRUMENT_OPT_NGRAM = 16 }; -char instrument_mode_string[10][16] = { +char instrument_mode_string[18][18] = { - "CLASSIC", "PCGUARD", "CFG", "LTO", "CTX", "", - "", "", "NGRAM", "" + "DEFAULT", "CLASSIC", "PCGUARD", "CFG", "LTO", "", "", "", "CTX", "", + "", "", "", "", "", "", "NGRAM", "" }; @@ -206,6 +206,8 @@ static void edit_params(u32 argc, char **argv, char **envp) { } + cc_params[cc_par_cnt++] = "-Wno-unused-command-line-argument"; + if (lto_mode && cpp_mode) cc_params[cc_par_cnt++] = "-lc++"; // needed by fuzzbench, early @@ -220,6 +222,20 @@ static void edit_params(u32 argc, char **argv, char **envp) { afl-clang-lto(++) */ + if (lto_mode) { + + if (getenv("AFL_LLVM_WHITELIST") != NULL) { + + cc_params[cc_par_cnt++] = "-Xclang"; + cc_params[cc_par_cnt++] = "-load"; + cc_params[cc_par_cnt++] = "-Xclang"; + cc_params[cc_par_cnt++] = + alloc_printf("%s/afl-llvm-lto-whitelist.so", obj_path); + + } + + } + // laf if (getenv("LAF_SPLIT_SWITCHES") || getenv("AFL_LLVM_LAF_SPLIT_SWITCHES")) { @@ -289,16 +305,6 @@ static void edit_params(u32 argc, char **argv, char **envp) { if (lto_mode) { - if (getenv("AFL_LLVM_WHITELIST") != NULL) { - - cc_params[cc_par_cnt++] = "-Xclang"; - cc_params[cc_par_cnt++] = "-load"; - cc_params[cc_par_cnt++] = "-Xclang"; - cc_params[cc_par_cnt++] = - alloc_printf("%s/afl-llvm-lto-whitelist.so", obj_path); - - } - cc_params[cc_par_cnt++] = alloc_printf("-fuse-ld=%s", AFL_REAL_LD); cc_params[cc_par_cnt++] = "-Wl,--allow-multiple-definition"; if (instrument_mode == INSTRUMENT_CFG) @@ -331,7 +337,7 @@ static void edit_params(u32 argc, char **argv, char **envp) { } - cc_params[cc_par_cnt++] = "-Qunused-arguments"; + // cc_params[cc_par_cnt++] = "-Qunused-arguments"; // in case LLVM is installed not via a package manager or "make install" // e.g. compiled download or compiled from github then it's ./lib directory @@ -436,8 +442,8 @@ static void edit_params(u32 argc, char **argv, char **envp) { cc_params[cc_par_cnt++] = "-g"; cc_params[cc_par_cnt++] = "-O3"; cc_params[cc_par_cnt++] = "-funroll-loops"; - if (strlen(march_opt) > 1 && march_opt[0] == '-') - cc_params[cc_par_cnt++] = march_opt; + // if (strlen(march_opt) > 1 && march_opt[0] == '-') + // cc_params[cc_par_cnt++] = march_opt; } @@ -486,6 +492,19 @@ static void edit_params(u32 argc, char **argv, char **envp) { */ cc_params[cc_par_cnt++] = + "-D__AFL_FUZZ_INIT()=" + "int __afl_sharedmem_fuzzing = 1;" + "extern unsigned int __afl_fuzz_len;" + "extern unsigned char *__afl_fuzz_ptr;" + "unsigned char *__afl_fuzz_alt_ptr;"; + cc_params[cc_par_cnt++] = + "-D__AFL_FUZZ_TESTCASE_BUF=(__afl_fuzz_ptr ? __afl_fuzz_ptr : " + "(__afl_fuzz_alt_ptr = malloc(1 * 1024 * 1024)))"; + cc_params[cc_par_cnt++] = + "-D__AFL_FUZZ_TESTCASE_LEN=(__afl_fuzz_ptr ? __afl_fuzz_len : read(0, " + "__afl_fuzz_alt_ptr, 1 * 1024 * 1024))"; + + cc_params[cc_par_cnt++] = "-D__AFL_LOOP(_A)=" "({ static volatile char *_B __attribute__((used)); " " _B = (char*)\"" PERSIST_SIG @@ -584,10 +603,6 @@ int main(int argc, char **argv, char **envp) { be_quiet = 1; -#ifdef USE_TRACE_PC - instrument_mode = INSTRUMENT_PCGUARD; -#endif - if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") || getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) { @@ -629,12 +644,11 @@ int main(int argc, char **argv, char **envp) { while (ptr) { - if (strncasecmp(ptr, "default", strlen("default")) == 0 || - strncasecmp(ptr, "afl", strlen("afl")) == 0 || + if (strncasecmp(ptr, "afl", strlen("afl")) == 0 || strncasecmp(ptr, "classic", strlen("classic")) == 0) { - if (!instrument_mode || instrument_mode == INSTRUMENT_DEFAULT) - instrument_mode = INSTRUMENT_DEFAULT; + if (!instrument_mode || instrument_mode == INSTRUMENT_AFL) + instrument_mode = INSTRUMENT_AFL; else FATAL("main instrumentation mode already set with %s", instrument_mode_string[instrument_mode]); @@ -740,6 +754,17 @@ int main(int argc, char **argv, char **envp) { } + if (instrument_mode == 0) { + +#ifndef USE_TRACE_PC + if (getenv("AFL_LLVM_WHITELIST")) + instrument_mode = INSTRUMENT_AFL; + else +#endif + instrument_mode = INSTRUMENT_PCGUARD; + + } + if (instrument_opt_mode && lto_mode) FATAL( "CTX and NGRAM can not be used in LTO mode (and would make LTO " @@ -780,6 +805,9 @@ int main(int argc, char **argv, char **envp) { "AFL_LLVM_NOT_ZERO and AFL_LLVM_SKIP_NEVERZERO can not be set " "together"); + if (instrument_mode == INSTRUMENT_PCGUARD && getenv("AFL_LLVM_WHITELIST")) + WARNF("Instrumentation type PCGUARD does not support AFL_LLVM_WHITELIST!"); + if (argc < 2 || strcmp(argv[1], "-h") == 0) { if (!lto_mode) @@ -820,14 +848,14 @@ int main(int argc, char **argv, char **envp) { "AFL_LLVM_NOT_ZERO: use cycling trace counters that skip zero\n" "AFL_LLVM_SKIP_NEVERZERO: do not skip zero on trace counters\n" "AFL_LLVM_LAF_SPLIT_COMPARES: enable cascaded comparisons\n" - "AFL_LLVM_LAF_SPLIT_FLOATS: transform floating point comp. to " - "cascaded " - "comp.\n" + "AFL_LLVM_LAF_SPLIT_COMPARES_BITW: size limit (default 8)\n" "AFL_LLVM_LAF_SPLIT_SWITCHES: casc. comp. in 'switch'\n" " to cascaded comparisons\n" + "AFL_LLVM_LAF_SPLIT_FLOATS: transform floating point comp. to " + "cascaded comp.\n" "AFL_LLVM_LAF_TRANSFORM_COMPARES: transform library comparison " "function calls\n" - "AFL_LLVM_LAF_SPLIT_COMPARES_BITW: size limit (default 8)\n" + "AFL_LLVM_LAF_ALL: enables all LAF splits/transforms\n" "AFL_LLVM_WHITELIST: enable whitelisting (selective " "instrumentation)\n" "AFL_NO_BUILTIN: compile for use with libtokencap.so\n" @@ -843,12 +871,13 @@ int main(int argc, char **argv, char **envp) { SAYF( "\nafl-clang-fast specific environment variables:\n" "AFL_LLVM_CMPLOG: log operands of comparisons (RedQueen mutator)\n" - "AFL_LLVM_INSTRUMENT: set instrumentation mode: DEFAULT, CFG " - "(INSTRIM), PCGUARD, LTO, CTX, NGRAM-2 ... NGRAM-16\n" - " You can also use the old environment variables instead:" - " AFL_LLVM_USE_TRACE_PC: use LLVM trace-pc-guard instrumentation\n" + "AFL_LLVM_INSTRUMENT: set instrumentation mode: AFL, CFG " + "(INSTRIM), PCGUARD [DEFAULT], LTO, CTX, NGRAM-2 ... NGRAM-16\n" + " You can also use the old environment variables instead:\n" + " AFL_LLVM_USE_TRACE_PC: use LLVM trace-pc-guard instrumentation " + "[DEFAULT]\n" " AFL_LLVM_INSTRIM: use light weight instrumentation InsTrim\n" - " AFL_LLVM_INSTRIM_LOOPHEAD: optimize loop tracing for speed (sub " + " AFL_LLVM_INSTRIM_LOOPHEAD: optimize loop tracing for speed (" "option to INSTRIM)\n" " AFL_LLVM_CTX: use context sensitive coverage\n" " AFL_LLVM_NGRAM_SIZE: use ngram prev_loc count coverage\n"); @@ -915,6 +944,15 @@ int main(int argc, char **argv, char **envp) { check_environment_vars(envp); + if (getenv("AFL_LLVM_LAF_ALL")) { + + setenv("AFL_LLVM_LAF_SPLIT_SWITCHES", "1", 1); + setenv("AFL_LLVM_LAF_SPLIT_COMPARES", "1", 1); + setenv("AFL_LLVM_LAF_SPLIT_FLOATS", "1", 1); + setenv("AFL_LLVM_LAF_TRANSFORM_COMPARES", "1", 1); + + } + cmplog_mode = getenv("AFL_CMPLOG") || getenv("AFL_LLVM_CMPLOG"); if (!be_quiet && cmplog_mode) printf("CmpLog mode by <andreafioraldi@gmail.com>\n"); diff --git a/llvm_mode/afl-llvm-lto-instrim.so.cc b/llvm_mode/afl-llvm-lto-instrim.so.cc index a7d9b756..27504e8d 100644 --- a/llvm_mode/afl-llvm-lto-instrim.so.cc +++ b/llvm_mode/afl-llvm-lto-instrim.so.cc @@ -561,6 +561,17 @@ struct InsTrimLTO : public ModulePass { if (F.size() < function_minimum_size) continue; if (isBlacklisted(&F)) continue; + // whitelist check + AttributeList Attrs = F.getAttributes(); + if (Attrs.hasAttribute(-1, StringRef("skipinstrument"))) { + + if (debug) + fprintf(stderr, "DEBUG: Function %s is not whitelisted\n", + F.getName().str().c_str()); + continue; + + } + std::unordered_set<BasicBlock *> MS; if (!MarkSetOpt) { diff --git a/llvm_mode/afl-llvm-lto-instrumentation.so.cc b/llvm_mode/afl-llvm-lto-instrumentation.so.cc index f44b336e..cbe68171 100644 --- a/llvm_mode/afl-llvm-lto-instrumentation.so.cc +++ b/llvm_mode/afl-llvm-lto-instrumentation.so.cc @@ -197,6 +197,17 @@ bool AFLLTOPass::runOnModule(Module &M) { if (F.size() < function_minimum_size) continue; if (isBlacklisted(&F)) continue; + // whitelist check + AttributeList Attrs = F.getAttributes(); + if (Attrs.hasAttribute(-1, StringRef("skipinstrument"))) { + + if (debug) + fprintf(stderr, "DEBUG: Function %s is not whitelisted\n", + F.getName().str().c_str()); + continue; + + } + std::vector<BasicBlock *> InsBlocks; if (autodictionary) { diff --git a/llvm_mode/afl-llvm-lto-whitelist.so.cc b/llvm_mode/afl-llvm-lto-whitelist.so.cc index a116c4ea..8856ce21 100644 --- a/llvm_mode/afl-llvm-lto-whitelist.so.cc +++ b/llvm_mode/afl-llvm-lto-whitelist.so.cc @@ -122,64 +122,65 @@ bool AFLwhitelist::runOnModule(Module &M) { for (auto &F : M) { + if (F.size() < 1) continue; + // fprintf(stderr, "F:%s\n", F.getName().str().c_str()); if (isBlacklisted(&F)) continue; - for (auto &BB : F) { + BasicBlock::iterator IP = F.getEntryBlock().getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); - BasicBlock::iterator IP = BB.getFirstInsertionPt(); - IRBuilder<> IRB(&(*IP)); + if (!myWhitelist.empty()) { - if (!myWhitelist.empty()) { + bool instrumentFunction = false; - bool instrumentBlock = false; + /* Get the current location using debug information. + * For now, just instrument the block if we are not able + * to determine our location. */ + DebugLoc Loc = IP->getDebugLoc(); + if (Loc) { - /* Get the current location using debug information. - * For now, just instrument the block if we are not able - * to determine our location. */ - DebugLoc Loc = IP->getDebugLoc(); - if (Loc) { + DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode()); - DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode()); + unsigned int instLine = cDILoc->getLine(); + StringRef instFilename = cDILoc->getFilename(); - unsigned int instLine = cDILoc->getLine(); - StringRef instFilename = cDILoc->getFilename(); + if (instFilename.str().empty()) { - if (instFilename.str().empty()) { + /* If the original location is empty, try using the inlined location + */ + DILocation *oDILoc = cDILoc->getInlinedAt(); + if (oDILoc) { - /* If the original location is empty, try using the inlined location - */ - DILocation *oDILoc = cDILoc->getInlinedAt(); - if (oDILoc) { - - instFilename = oDILoc->getFilename(); - instLine = oDILoc->getLine(); - - } + instFilename = oDILoc->getFilename(); + instLine = oDILoc->getLine(); } - (void)instLine; + } - /* Continue only if we know where we actually are */ - if (!instFilename.str().empty()) { + (void)instLine; - for (std::list<std::string>::iterator it = myWhitelist.begin(); - it != myWhitelist.end(); ++it) { + if (debug) + SAYF(cMGN "[D] " cRST "function %s is in file %s\n", + F.getName().str().c_str(), instFilename.str().c_str()); + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { - /* We don't check for filename equality here because - * filenames might actually be full paths. Instead we - * check that the actual filename ends in the filename - * specified in the list. */ - if (instFilename.str().length() >= it->length()) { + for (std::list<std::string>::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { - if (instFilename.str().compare( - instFilename.str().length() - it->length(), - it->length(), *it) == 0) { + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { - instrumentBlock = true; - break; + if (instFilename.str().compare( + instFilename.str().length() - it->length(), it->length(), + *it) == 0) { - } + instrumentFunction = true; + break; } @@ -189,43 +190,35 @@ bool AFLwhitelist::runOnModule(Module &M) { } - /* Either we couldn't figure out our location or the location is - * not whitelisted, so we skip instrumentation. - * We do this by renaming the function. */ - if (!instrumentBlock) { - - if (F.getName().compare("main") == 0 || - F.getName().compare("start") == 0 || - F.getName().compare("_start") == 0 || - F.getName().compare("init") == 0 || - F.getName().compare("_init") == 0) { - - // We do not honor be_quiet for this one - WARNF("Cannot ignore functions main/init/start"); - - } else { - - // StringRef newName = StringRef("ign.") + F.getName(); - if (debug) - SAYF(cMGN "[D] " cRST "renamed %s to ign.%s\n", - F.getName().str().c_str(), F.getName().str().c_str()); - Function *_F(&F); - _F->setName("ign." + F.getName()); - - } + } - } else if (debug) + /* Either we couldn't figure out our location or the location is + * not whitelisted, so we skip instrumentation. + * We do this by renaming the function. */ + if (instrumentFunction == true) { + if (debug) SAYF(cMGN "[D] " cRST "function %s is in whitelist\n", F.getName().str().c_str()); } else { - PFATAL("Whitelist is empty"); + if (debug) + SAYF(cMGN "[D] " cRST "function %s is NOT in whitelist\n", + F.getName().str().c_str()); + + auto & Ctx = F.getContext(); + AttributeList Attrs = F.getAttributes(); + AttrBuilder NewAttrs; + NewAttrs.addAttribute("skipinstrument"); + F.setAttributes( + Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs)); } - break; + } else { + + PFATAL("Whitelist is empty"); } diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc index 5bf705f8..82dece75 100644 --- a/llvm_mode/afl-llvm-pass.so.cc +++ b/llvm_mode/afl-llvm-pass.so.cc @@ -211,15 +211,17 @@ bool AFLCoverage::runOnModule(Module &M) { else #else if (ngram_size_str) -#ifndef LLVM_VERSION_PATCH - FATAL("Sorry, NGRAM branch coverage is not supported with llvm version %d.%d.%d!", - LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, - 0); -#else - FATAL("Sorry, NGRAM branch coverage is not supported with llvm version %d.%d.%d!", - LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, - LLVM_VERSION_PATCH); -#endif + #ifndef LLVM_VERSION_PATCH + FATAL( + "Sorry, NGRAM branch coverage is not supported with llvm version " + "%d.%d.%d!", + LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, 0); + #else + FATAL( + "Sorry, NGRAM branch coverage is not supported with llvm version " + "%d.%d.%d!", + LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH); + #endif #endif PrevLocSize = 1; diff --git a/llvm_mode/afl-llvm-rt.o.c b/llvm_mode/afl-llvm-rt.o.c index c0d1569d..3a0584e4 100644 --- a/llvm_mode/afl-llvm-rt.o.c +++ b/llvm_mode/afl-llvm-rt.o.c @@ -63,13 +63,21 @@ is used for instrumentation output before __afl_map_shm() has a chance to run. It will end up as .comm, so it shouldn't be too wasteful. */ +#if MAP_SIZE <= 65536 + #define MAP_INITIAL_SIZE 256000 +#else + #define MAP_INITIAL_SIZE MAP_SIZE +#endif + #ifdef AFL_REAL_LD -u8 __afl_area_initial[256000]; +u8 __afl_area_initial[MAP_INITIAL_SIZE]; #else u8 __afl_area_initial[MAP_SIZE]; #endif u8 *__afl_area_ptr = __afl_area_initial; u8 *__afl_dictionary; +u8 *__afl_fuzz_ptr; +u32 __afl_fuzz_len; u32 __afl_final_loc; u32 __afl_map_size = MAP_SIZE; @@ -86,6 +94,8 @@ __thread u32 __afl_prev_ctx; __thread u32 __afl_cmp_counter; #endif +int __afl_sharedmem_fuzzing __attribute__((weak)); + struct cmp_map *__afl_cmp_map; /* Running in persistent mode? */ @@ -103,6 +113,59 @@ void send_forkserver_error(int error) { } +/* SHM fuzzing setup. */ + +static void __afl_map_shm_fuzz() { + + char *id_str = getenv(SHM_FUZZ_ENV_VAR); + + if (id_str) { + +#ifdef USEMMAP + const char * shm_file_path = id_str; + int shm_fd = -1; + unsigned char *shm_base = NULL; + + /* create the shared memory segment as if it was a file */ + shm_fd = shm_open(shm_file_path, O_RDWR, 0600); + if (shm_fd == -1) { + + fprintf(stderr, "shm_open() failed for fuzz\n"); + send_forkserver_error(FS_ERROR_SHM_OPEN); + exit(1); + + } + + __afl_fuzz_ptr = mmap(0, MAX_FILE, PROT_READ, MAP_SHARED, shm_fd, 0); + +#else + u32 shm_id = atoi(id_str); + + __afl_fuzz_ptr = shmat(shm_id, NULL, 0); + +#endif + + /* Whooooops. */ + + if (__afl_fuzz_ptr == (void *)-1) { + + fprintf(stderr, "Error: could not access fuzzing shared memory\n"); + exit(1); + + } + + if (getenv("AFL_DEBUG")) + fprintf(stderr, "DEBUG: successfully got fuzzing shared memory\n"); + + } else { + + fprintf(stderr, "Error: variable for fuzzing shared memory is not set\n"); + exit(1); + + } + +} + /* SHM setup. */ static void __afl_map_shm(void) { @@ -304,17 +367,25 @@ static void __afl_start_snapshots(void) { assume we're not running in forkserver mode and just execute program. */ status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT); + if (__afl_sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ; if (__afl_map_size <= FS_OPT_MAX_MAPSIZE) status |= (FS_OPT_SET_MAPSIZE(__afl_map_size) | FS_OPT_MAPSIZE); - if (__afl_dictionary_len > 0 && __afl_dictionary) status |= FS_OPT_AUTODICT; + if (__afl_dictionary_len && __afl_dictionary) status |= FS_OPT_AUTODICT; memcpy(tmp, &status, 4); if (write(FORKSRV_FD + 1, tmp, 4) != 4) return; - if (__afl_dictionary_len > 0 && __afl_dictionary) { + if (__afl_sharedmem_fuzzing || (__afl_dictionary_len && __afl_dictionary)) { if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + if ((was_killed & (0xffffffff & (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ))) == + (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) { + + __afl_map_shm_fuzz(); + + } + if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) == (FS_OPT_ENABLED | FS_OPT_AUTODICT)) { @@ -351,7 +422,7 @@ static void __afl_start_snapshots(void) { // uh this forkserver master does not understand extended option passing // or does not want the dictionary - already_read_first = 1; + if (!__afl_fuzz_ptr) already_read_first = 1; } @@ -372,6 +443,9 @@ static void __afl_start_snapshots(void) { } + __afl_fuzz_len = (was_killed >> 8); + was_killed = (was_killed & 0xff); + /* If we stopped the child in persistent mode, but there was a race condition and afl-fuzz already issued SIGKILL, write off the old process. */ @@ -445,7 +519,7 @@ static void __afl_start_snapshots(void) { static void __afl_start_forkserver(void) { #ifdef __linux__ - if (!is_persistent && !__afl_cmp_map && !getenv("AFL_NO_SNAPSHOT") && + if (/*!is_persistent &&*/ !__afl_cmp_map && !getenv("AFL_NO_SNAPSHOT") && afl_snapshot_init() >= 0) { __afl_start_snapshots(); @@ -467,7 +541,8 @@ static void __afl_start_forkserver(void) { if (__afl_map_size <= FS_OPT_MAX_MAPSIZE) status |= (FS_OPT_SET_MAPSIZE(__afl_map_size) | FS_OPT_MAPSIZE); - if (__afl_dictionary_len > 0 && __afl_dictionary) status |= FS_OPT_AUTODICT; + if (__afl_dictionary_len && __afl_dictionary) status |= FS_OPT_AUTODICT; + if (__afl_sharedmem_fuzzing != 0) status |= FS_OPT_SHDMEM_FUZZ; if (status) status |= (FS_OPT_ENABLED); memcpy(tmp, &status, 4); @@ -476,10 +551,17 @@ static void __afl_start_forkserver(void) { if (write(FORKSRV_FD + 1, tmp, 4) != 4) return; - if (__afl_dictionary_len > 0 && __afl_dictionary) { + if (__afl_sharedmem_fuzzing || (__afl_dictionary_len && __afl_dictionary)) { if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1); + if ((was_killed & (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) == + (FS_OPT_ENABLED | FS_OPT_SHDMEM_FUZZ)) { + + __afl_map_shm_fuzz(); + + } + if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) == (FS_OPT_ENABLED | FS_OPT_AUTODICT)) { @@ -516,7 +598,7 @@ static void __afl_start_forkserver(void) { // uh this forkserver master does not understand extended option passing // or does not want the dictionary - already_read_first = 1; + if (!__afl_fuzz_ptr) already_read_first = 1; } @@ -538,6 +620,9 @@ static void __afl_start_forkserver(void) { } + __afl_fuzz_len = (was_killed >> 8); + was_killed = (was_killed & 0xff); + /* If we stopped the child in persistent mode, but there was a race condition and afl-fuzz already issued SIGKILL, write off the old process. */ @@ -703,13 +788,13 @@ void __sanitizer_cov_trace_pc_guard(uint32_t *guard) { void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) { - u32 inst_ratio = 100; - u8 *x; + u32 inst_ratio = 100; + char *x; if (start == stop || *start) return; x = getenv("AFL_INST_RATIO"); - if (x) inst_ratio = atoi(x); + if (x) inst_ratio = (u32)atoi(x); if (!inst_ratio || inst_ratio > 100) { @@ -892,7 +977,7 @@ static int area_is_mapped(void *ptr, size_t len) { } -void __cmplog_rtn_hook(void *ptr1, void *ptr2) { +void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) { if (!__afl_cmp_map) return; diff --git a/llvm_mode/cmplog-routines-pass.cc b/llvm_mode/cmplog-routines-pass.cc index bb78273a..e05a1843 100644 --- a/llvm_mode/cmplog-routines-pass.cc +++ b/llvm_mode/cmplog-routines-pass.cc @@ -93,16 +93,17 @@ bool CmpLogRoutines::hookRtns(Module &M) { std::vector<CallInst *> calls; LLVMContext & C = M.getContext(); - Type * VoidTy = Type::getVoidTy(C); - PointerType *VoidPtrTy = PointerType::get(VoidTy, 0); + Type *VoidTy = Type::getVoidTy(C); + // PointerType *VoidPtrTy = PointerType::get(VoidTy, 0); + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + PointerType *i8PtrTy = PointerType::get(Int8Ty, 0); #if LLVM_VERSION_MAJOR < 9 Constant * #else FunctionCallee #endif - c = M.getOrInsertFunction("__cmplog_rtn_hook", VoidTy, VoidPtrTy, - VoidPtrTy + c = M.getOrInsertFunction("__cmplog_rtn_hook", VoidTy, i8PtrTy, i8PtrTy #if LLVM_VERSION_MAJOR < 5 , NULL @@ -163,8 +164,10 @@ bool CmpLogRoutines::hookRtns(Module &M) { IRB.SetInsertPoint(callInst); std::vector<Value *> args; - args.push_back(v1P); - args.push_back(v2P); + Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy); + Value * v2Pcasted = IRB.CreatePointerCast(v2P, i8PtrTy); + args.push_back(v1Pcasted); + args.push_back(v2Pcasted); IRB.CreateCall(cmplogHookFn, args, "tmp"); diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc index 1ebc54d7..4e99aafb 100644 --- a/llvm_mode/compare-transform-pass.so.cc +++ b/llvm_mode/compare-transform-pass.so.cc @@ -304,17 +304,27 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, if (!(HasStr1 || HasStr2)) continue; if (isMemcmp || isStrncmp || isStrncasecmp) { - /* check if third operand is a constant integer * strlen("constStr") and sizeof() are treated as constant */ Value * op2 = callInst->getArgOperand(2); ConstantInt *ilen = dyn_cast<ConstantInt>(op2); - if (!ilen) continue; - /* final precaution: if size of compare is larger than constant - * string skip it*/ - uint64_t literalLength = HasStr1 ? Str1.size() : Str2.size(); - if (literalLength + 1 < ilen->getZExtValue()) continue; - + if (ilen) { + uint64_t len = ilen->getZExtValue(); + // if len is zero this is a pointless call but allow real + // implementation to worry about that + if (!len) continue; + + if (isMemcmp) { + // if size of compare is larger than constant string this is + // likely a bug but allow real implementation to worry about + // that + uint64_t literalLength = HasStr1 ? Str1.size() : Str2.size(); + if (literalLength + 1 < ilen->getZExtValue()) continue; + } + } else if (isMemcmp) + // this *may* supply a len greater than the constant string at + // runtime so similarly we don't want to have to handle that + continue; } calls.push_back(callInst); @@ -341,7 +351,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, Value * VarStr; bool HasStr1 = getConstantStringInfo(Str1P, Str1); bool HasStr2 = getConstantStringInfo(Str2P, Str2); - uint64_t constLen, sizedLen; + uint64_t constStrLen, constSizedLen, unrollLen; bool isMemcmp = !callInst->getCalledFunction()->getName().compare(StringRef("memcmp")); bool isSizedcmp = isMemcmp || @@ -349,23 +359,13 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, StringRef("strncmp")) || !callInst->getCalledFunction()->getName().compare( StringRef("strncasecmp")); + Value *sizedValue = isSizedcmp ? callInst->getArgOperand(2) : NULL; + bool isConstSized = sizedValue && isa<ConstantInt>(sizedValue); bool isCaseInsensitive = !callInst->getCalledFunction()->getName().compare( StringRef("strcasecmp")) || !callInst->getCalledFunction()->getName().compare( StringRef("strncasecmp")); - if (isSizedcmp) { - - Value * op2 = callInst->getArgOperand(2); - ConstantInt *ilen = dyn_cast<ConstantInt>(op2); - sizedLen = ilen->getZExtValue(); - - } else { - - sizedLen = 0; - - } - if (!(HasStr1 || HasStr2)) { // do we have a saved local or global variable initialization? @@ -389,93 +389,133 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, } + if (isConstSized) { + + constSizedLen = dyn_cast<ConstantInt>(sizedValue)->getZExtValue(); + + } + if (HasStr1) { TmpConstStr = Str1.str(); VarStr = Str2P; - constLen = isMemcmp ? sizedLen : TmpConstStr.length(); } else { TmpConstStr = Str2.str(); VarStr = Str1P; - constLen = isMemcmp ? sizedLen : TmpConstStr.length(); } - /* properly handle zero terminated C strings by adding the terminating 0 to - * the StringRef (in comparison to std::string a StringRef has built-in - * runtime bounds checking, which makes debugging easier) */ + // add null termination character implicit in c strings TmpConstStr.append("\0", 1); - if (!sizedLen) constLen++; + + // in the unusual case the const str has embedded null + // characters, the string comparison functions should terminate + // at the first null + if (!isMemcmp) + TmpConstStr.assign(TmpConstStr, 0, TmpConstStr.find('\0') + 1); + + constStrLen = TmpConstStr.length(); + // prefer use of StringRef (in comparison to std::string a StringRef has + // built-in runtime bounds checking, which makes debugging easier) ConstStr = StringRef(TmpConstStr); - // fprintf(stderr, "issized: %d, const > sized ? %u > %u\n", isSizedcmp, - // constLen, sizedLen); - if (isSizedcmp && constLen > sizedLen && sizedLen) constLen = sizedLen; - if (constLen > TmpConstStr.length()) constLen = TmpConstStr.length(); - if (!constLen) constLen = TmpConstStr.length(); - if (!constLen) continue; + + if (isConstSized) + unrollLen = constSizedLen < constStrLen ? constSizedLen : constStrLen; + else + unrollLen = constStrLen; if (!be_quiet) - errs() << callInst->getCalledFunction()->getName() << ": len " << constLen + errs() << callInst->getCalledFunction()->getName() << ": unroll len " << unrollLen + << ((isSizedcmp && !isConstSized) ? ", variable n" : "") << ": " << ConstStr << "\n"; /* split before the call instruction */ BasicBlock *bb = callInst->getParent(); BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(callInst)); - BasicBlock *next_bb = + + BasicBlock *next_lenchk_bb = NULL; + if (isSizedcmp && !isConstSized) { + next_lenchk_bb = BasicBlock::Create(C, "len_check", end_bb->getParent(), end_bb); + BranchInst::Create(end_bb, next_lenchk_bb); + } + BasicBlock *next_cmp_bb = BasicBlock::Create(C, "cmp_added", end_bb->getParent(), end_bb); - BranchInst::Create(end_bb, next_bb); - PHINode *PN = PHINode::Create(Int32Ty, constLen + 1, "cmp_phi"); + BranchInst::Create(end_bb, next_cmp_bb); + PHINode *PN = PHINode::Create(Int32Ty, (next_lenchk_bb ? 2 : 1) * unrollLen + 1, "cmp_phi"); + #if LLVM_VERSION_MAJOR < 8 TerminatorInst *term = bb->getTerminator(); #else Instruction *term = bb->getTerminator(); #endif - BranchInst::Create(next_bb, bb); + BranchInst::Create(next_lenchk_bb ? next_lenchk_bb : next_cmp_bb, bb); term->eraseFromParent(); - for (uint64_t i = 0; i < constLen; i++) { + for (uint64_t i = 0; i < unrollLen; i++) { + + BasicBlock *cur_cmp_bb = next_cmp_bb, *cur_lenchk_bb = next_lenchk_bb; + unsigned char c; + + if (cur_lenchk_bb) { - BasicBlock *cur_bb = next_bb; + IRBuilder<> cur_lenchk_IRB(&*(cur_lenchk_bb->getFirstInsertionPt())); + Value *icmp = cur_lenchk_IRB.CreateICmpEQ( + sizedValue, ConstantInt::get(Int64Ty, i)); + cur_lenchk_IRB.CreateCondBr(icmp, end_bb, cur_cmp_bb); + cur_lenchk_bb->getTerminator()->eraseFromParent(); - char c = isCaseInsensitive ? tolower(ConstStr[i]) : ConstStr[i]; + PN->addIncoming(ConstantInt::get(Int32Ty, 0), cur_lenchk_bb); - BasicBlock::iterator IP = next_bb->getFirstInsertionPt(); - IRBuilder<> IRB(&*IP); + } + + if (isCaseInsensitive) + c = (unsigned char)(tolower((int)ConstStr[i]) & 0xff); + else + c = (unsigned char)ConstStr[i]; + + IRBuilder<> cur_cmp_IRB(&*(cur_cmp_bb->getFirstInsertionPt())); Value *v = ConstantInt::get(Int64Ty, i); - Value *ele = IRB.CreateInBoundsGEP(VarStr, v, "empty"); - Value *load = IRB.CreateLoad(ele); + Value *ele = cur_cmp_IRB.CreateInBoundsGEP(VarStr, v, "empty"); + Value *load = cur_cmp_IRB.CreateLoad(ele); + if (isCaseInsensitive) { // load >= 'A' && load <= 'Z' ? load | 0x020 : load + load = cur_cmp_IRB.CreateZExt(load, Int32Ty); std::vector<Value *> args; args.push_back(load); - load = IRB.CreateCall(tolowerFn, args, "tmp"); - load = IRB.CreateTrunc(load, Int8Ty); + load = cur_cmp_IRB.CreateCall(tolowerFn, args, "tmp"); + load = cur_cmp_IRB.CreateTrunc(load, Int8Ty); } Value *isub; if (HasStr1) - isub = IRB.CreateSub(ConstantInt::get(Int8Ty, c), load); + isub = cur_cmp_IRB.CreateSub(ConstantInt::get(Int8Ty, c), load); else - isub = IRB.CreateSub(load, ConstantInt::get(Int8Ty, c)); + isub = cur_cmp_IRB.CreateSub(load, ConstantInt::get(Int8Ty, c)); + + Value *sext = cur_cmp_IRB.CreateSExt(isub, Int32Ty); + PN->addIncoming(sext, cur_cmp_bb); - Value *sext = IRB.CreateSExt(isub, Int32Ty); - PN->addIncoming(sext, cur_bb); + if (i < unrollLen - 1) { - if (i < constLen - 1) { + if (cur_lenchk_bb) { + next_lenchk_bb = BasicBlock::Create(C, "len_check", end_bb->getParent(), end_bb); + BranchInst::Create(end_bb, next_lenchk_bb); + } - next_bb = + next_cmp_bb = BasicBlock::Create(C, "cmp_added", end_bb->getParent(), end_bb); - BranchInst::Create(end_bb, next_bb); + BranchInst::Create(end_bb, next_cmp_bb); - Value *icmp = IRB.CreateICmpEQ(isub, ConstantInt::get(Int8Ty, 0)); - IRB.CreateCondBr(icmp, next_bb, end_bb); - cur_bb->getTerminator()->eraseFromParent(); + Value *icmp = cur_cmp_IRB.CreateICmpEQ(isub, ConstantInt::get(Int8Ty, 0)); + cur_cmp_IRB.CreateCondBr(icmp, next_lenchk_bb ? next_lenchk_bb : next_cmp_bb, end_bb); + cur_cmp_bb->getTerminator()->eraseFromParent(); } else { |