From 500a378fdf8664aea42f557f60c9842bb15f06a0 Mon Sep 17 00:00:00 2001 From: Andrea Fioraldi Date: Sat, 31 Aug 2019 11:23:48 +0200 Subject: modernize some readmes --- libdislocator/README.dislocator | 60 ----------------------------------- libdislocator/README.md | 60 +++++++++++++++++++++++++++++++++++ libtokencap/README.md | 63 +++++++++++++++++++++++++++++++++++++ libtokencap/README.tokencap | 61 ----------------------------------- qemu_mode/libcompcov/README.compcov | 37 ---------------------- qemu_mode/libcompcov/README.md | 37 ++++++++++++++++++++++ 6 files changed, 160 insertions(+), 158 deletions(-) delete mode 100644 libdislocator/README.dislocator create mode 100644 libdislocator/README.md create mode 100644 libtokencap/README.md delete mode 100644 libtokencap/README.tokencap delete mode 100644 qemu_mode/libcompcov/README.compcov create mode 100644 qemu_mode/libcompcov/README.md diff --git a/libdislocator/README.dislocator b/libdislocator/README.dislocator deleted file mode 100644 index 837e7466..00000000 --- a/libdislocator/README.dislocator +++ /dev/null @@ -1,60 +0,0 @@ -=================================== -libdislocator, an abusive allocator -=================================== - - (See ../docs/README for the general instruction manual.) - -This is a companion library that can be used as a drop-in replacement for the -libc allocator in the fuzzed binaries. It improves the odds of bumping into -heap-related security bugs in several ways: - - - It allocates all buffers so that they are immediately adjacent to a - subsequent PROT_NONE page, causing most off-by-one reads and writes to - immediately segfault, - - - It adds a canary immediately below the allocated buffer, to catch writes - to negative offsets (won't catch reads, though), - - - It sets the memory returned by malloc() to garbage values, improving the - odds of crashing when the target accesses uninitialized data, - - - It sets freed memory to PROT_NONE and does not actually reuse it, causing - most use-after-free bugs to segfault right away, - - - It forces all realloc() calls to return a new address - and sets - PROT_NONE on the original block. This catches use-after-realloc bugs, - - - It checks for calloc() overflows and can cause soft or hard failures - of alloc requests past a configurable memory limit (AFL_LD_LIMIT_MB, - AFL_LD_HARD_FAIL). - -Basically, it is inspired by some of the non-default options available for the -OpenBSD allocator - see malloc.conf(5) on that platform for reference. It is -also somewhat similar to several other debugging libraries, such as gmalloc -and DUMA - but is simple, plug-and-play, and designed specifically for fuzzing -jobs. - -Note that it does nothing for stack-based memory handling errors. The --fstack-protector-all setting for GCC / clang, enabled when using AFL_HARDEN, -can catch some subset of that. - -The allocator is slow and memory-intensive (even the tiniest allocation uses up -4 kB of physical memory and 8 kB of virtual mem), making it completely unsuitable -for "production" uses; but it can be faster and more hassle-free than ASAN / MSAN -when fuzzing small, self-contained binaries. - -To use this library, run AFL like so: - -AFL_PRELOAD=/path/to/libdislocator.so ./afl-fuzz [...other params...] - -You *have* to specify path, even if it's just ./libdislocator.so or -$PWD/libdislocator.so. - -Similarly to afl-tmin, the library is not "proprietary" and can be used with -other fuzzers or testing tools without the need for any code tweaks. It does not -require AFL-instrumented binaries to work. - -Note that the AFL_PRELOAD approach (which AFL internally maps to LD_PRELOAD or -DYLD_INSERT_LIBRARIES, depending on the OS) works only if the target binary is -dynamically linked. Otherwise, attempting to use the library will have no -effect. diff --git a/libdislocator/README.md b/libdislocator/README.md new file mode 100644 index 00000000..5d5a1464 --- /dev/null +++ b/libdislocator/README.md @@ -0,0 +1,60 @@ +# libdislocator, an abusive allocator + + (See ../docs/README for the general instruction manual.) + +This is a companion library that can be used as a drop-in replacement for the +libc allocator in the fuzzed binaries. It improves the odds of bumping into +heap-related security bugs in several ways: + + - It allocates all buffers so that they are immediately adjacent to a + subsequent PROT_NONE page, causing most off-by-one reads and writes to + immediately segfault, + + - It adds a canary immediately below the allocated buffer, to catch writes + to negative offsets (won't catch reads, though), + + - It sets the memory returned by malloc() to garbage values, improving the + odds of crashing when the target accesses uninitialized data, + + - It sets freed memory to PROT_NONE and does not actually reuse it, causing + most use-after-free bugs to segfault right away, + + - It forces all realloc() calls to return a new address - and sets + PROT_NONE on the original block. This catches use-after-realloc bugs, + + - It checks for calloc() overflows and can cause soft or hard failures + of alloc requests past a configurable memory limit (AFL_LD_LIMIT_MB, + AFL_LD_HARD_FAIL). + +Basically, it is inspired by some of the non-default options available for the +OpenBSD allocator - see malloc.conf(5) on that platform for reference. It is +also somewhat similar to several other debugging libraries, such as gmalloc +and DUMA - but is simple, plug-and-play, and designed specifically for fuzzing +jobs. + +Note that it does nothing for stack-based memory handling errors. The +-fstack-protector-all setting for GCC / clang, enabled when using AFL_HARDEN, +can catch some subset of that. + +The allocator is slow and memory-intensive (even the tiniest allocation uses up +4 kB of physical memory and 8 kB of virtual mem), making it completely unsuitable +for "production" uses; but it can be faster and more hassle-free than ASAN / MSAN +when fuzzing small, self-contained binaries. + +To use this library, run AFL like so: + +``` +AFL_PRELOAD=/path/to/libdislocator.so ./afl-fuzz [...other params...] +``` + +You *have* to specify path, even if it's just ./libdislocator.so or +$PWD/libdislocator.so. + +Similarly to afl-tmin, the library is not "proprietary" and can be used with +other fuzzers or testing tools without the need for any code tweaks. It does not +require AFL-instrumented binaries to work. + +Note that the AFL_PRELOAD approach (which AFL internally maps to LD_PRELOAD or +DYLD_INSERT_LIBRARIES, depending on the OS) works only if the target binary is +dynamically linked. Otherwise, attempting to use the library will have no +effect. diff --git a/libtokencap/README.md b/libtokencap/README.md new file mode 100644 index 00000000..baf69da1 --- /dev/null +++ b/libtokencap/README.md @@ -0,0 +1,63 @@ +# strcmp() / memcmp() token capture library + + (See ../docs/README for the general instruction manual.) + +This Linux-only companion library allows you to instrument `strcmp()`, `memcmp()`, +and related functions to automatically extract syntax tokens passed to any of +these libcalls. The resulting list of tokens may be then given as a starting +dictionary to afl-fuzz (the -x option) to improve coverage on subsequent +fuzzing runs. + +This may help improving coverage in some targets, and do precisely nothing in +others. In some cases, it may even make things worse: if libtokencap picks up +syntax tokens that are not used to process the input data, but that are a part +of - say - parsing a config file... well, you're going to end up wasting a lot +of CPU time on trying them out in the input stream. In other words, use this +feature with care. Manually screening the resulting dictionary is almost +always a necessity. + +As for the actual operation: the library stores tokens, without any deduping, +by appending them to a file specified via AFL_TOKEN_FILE. If the variable is not +set, the tool uses stderr (which is probably not what you want). + +Similarly to afl-tmin, the library is not "proprietary" and can be used with +other fuzzers or testing tools without the need for any code tweaks. It does not +require AFL-instrumented binaries to work. + +To use the library, you *need* to make sure that your fuzzing target is compiled +with -fno-builtin and is linked dynamically. If you wish to automate the first +part without mucking with CFLAGS in Makefiles, you can set AFL_NO_BUILTIN=1 +when using afl-gcc. This setting specifically adds the following flags: + +``` + -fno-builtin-strcmp -fno-builtin-strncmp -fno-builtin-strcasecmp + -fno-builtin-strcasencmp -fno-builtin-memcmp -fno-builtin-strstr + -fno-builtin-strcasestr +``` + +The next step is simply loading this library via LD_PRELOAD. The optimal usage +pattern is to allow afl-fuzz to fuzz normally for a while and build up a corpus, +and then fire off the target binary, with libtokencap.so loaded, on every file +found by AFL in that earlier run. This demonstrates the basic principle: + +``` + export AFL_TOKEN_FILE=$PWD/temp_output.txt + + for i in /queue/id*; do + LD_PRELOAD=/path/to/libtokencap.so \ + /path/to/target/program [...params, including $i...] + done + + sort -u temp_output.txt >afl_dictionary.txt +``` + +If you don't get any results, the target library is probably not using strcmp() +and memcmp() to parse input; or you haven't compiled it with -fno-builtin; or +the whole thing isn't dynamically linked, and LD_PRELOAD is having no effect. + +PS. The library is Linux-only because there is probably no particularly portable +and non-invasive way to distinguish between read-only and read-write memory +mappings. The `__tokencap_load_mappings()` function is the only thing that would +need to be changed for other OSes. Porting to platforms with /proc//maps +(e.g., FreeBSD) should be trivial. + diff --git a/libtokencap/README.tokencap b/libtokencap/README.tokencap deleted file mode 100644 index 650739f2..00000000 --- a/libtokencap/README.tokencap +++ /dev/null @@ -1,61 +0,0 @@ -========================================= -strcmp() / memcmp() token capture library -========================================= - - (See ../docs/README for the general instruction manual.) - -This Linux-only companion library allows you to instrument strcmp(), memcmp(), -and related functions to automatically extract syntax tokens passed to any of -these libcalls. The resulting list of tokens may be then given as a starting -dictionary to afl-fuzz (the -x option) to improve coverage on subsequent -fuzzing runs. - -This may help improving coverage in some targets, and do precisely nothing in -others. In some cases, it may even make things worse: if libtokencap picks up -syntax tokens that are not used to process the input data, but that are a part -of - say - parsing a config file... well, you're going to end up wasting a lot -of CPU time on trying them out in the input stream. In other words, use this -feature with care. Manually screening the resulting dictionary is almost -always a necessity. - -As for the actual operation: the library stores tokens, without any deduping, -by appending them to a file specified via AFL_TOKEN_FILE. If the variable is not -set, the tool uses stderr (which is probably not what you want). - -Similarly to afl-tmin, the library is not "proprietary" and can be used with -other fuzzers or testing tools without the need for any code tweaks. It does not -require AFL-instrumented binaries to work. - -To use the library, you *need* to make sure that your fuzzing target is compiled -with -fno-builtin and is linked dynamically. If you wish to automate the first -part without mucking with CFLAGS in Makefiles, you can set AFL_NO_BUILTIN=1 -when using afl-gcc. This setting specifically adds the following flags: - - -fno-builtin-strcmp -fno-builtin-strncmp -fno-builtin-strcasecmp - -fno-builtin-strcasencmp -fno-builtin-memcmp -fno-builtin-strstr - -fno-builtin-strcasestr - -The next step is simply loading this library via LD_PRELOAD. The optimal usage -pattern is to allow afl-fuzz to fuzz normally for a while and build up a corpus, -and then fire off the target binary, with libtokencap.so loaded, on every file -found by AFL in that earlier run. This demonstrates the basic principle: - - export AFL_TOKEN_FILE=$PWD/temp_output.txt - - for i in /queue/id*; do - LD_PRELOAD=/path/to/libtokencap.so \ - /path/to/target/program [...params, including $i...] - done - - sort -u temp_output.txt >afl_dictionary.txt - -If you don't get any results, the target library is probably not using strcmp() -and memcmp() to parse input; or you haven't compiled it with -fno-builtin; or -the whole thing isn't dynamically linked, and LD_PRELOAD is having no effect. - -PS. The library is Linux-only because there is probably no particularly portable -and non-invasive way to distinguish between read-only and read-write memory -mappings. The __tokencap_load_mappings() function is the only thing that would -need to be changed for other OSes. Porting to platforms with /proc//maps -(e.g., FreeBSD) should be trivial. - diff --git a/qemu_mode/libcompcov/README.compcov b/qemu_mode/libcompcov/README.compcov deleted file mode 100644 index 9be13d88..00000000 --- a/qemu_mode/libcompcov/README.compcov +++ /dev/null @@ -1,37 +0,0 @@ -================================================================ -strcmp() / memcmp() CompareCoverage library for AFLplusplus-QEMU -================================================================ - - Written by Andrea Fioraldi - -This Linux-only companion library allows you to instrument strcmp(), memcmp(), -and related functions to log the CompareCoverage of these libcalls. - -Use this with caution. While this can speedup a lot the bypass of hard -branch conditions it can also waste a lot of time and take up unnecessary space -in the shared memory when logging the coverage related to functions that -doesn't process input-related data. - -To use the library, you *need* to make sure that your fuzzing target is linked -dynamically and make use of strcmp(), memcmp(), and related functions. -For optimized binaries this is an issue, those functions are often inlined -and this module is not capable to log the coverage in this case. - -If you have the source code of the fuzzing target you should nto use this -library and QEMU but build it with afl-clang-fast and the laf-intel options. - -To use this library make sure to preload it with AFL_PRELOAD. - - export AFL_PRELOAD=/path/to/libcompcov.so - export AFL_COMPCOV_LEVEL=1 - - afl-fuzz -Q -i input -o output -- - -The AFL_COMPCOV_LEVEL tells to QEMU and libcompcov how to log comaprisons. -Level 1 logs just comparison with immediates / read-only memory and level 2 -logs all the comparisons. - -The library make use of https://github.com/ouadev/proc_maps_parser and so it is -Linux specific. However this is not a strict dependency, other UNIX operating -systems can be supported simply replacing the code related to the -/proc/self/maps parsing. diff --git a/qemu_mode/libcompcov/README.md b/qemu_mode/libcompcov/README.md new file mode 100644 index 00000000..5aaa3dd8 --- /dev/null +++ b/qemu_mode/libcompcov/README.md @@ -0,0 +1,37 @@ +# strcmp() / memcmp() CompareCoverage library for afl++ QEMU + + Written by Andrea Fioraldi + +This Linux-only companion library allows you to instrument `strcmp()`, `memcmp()`, +and related functions to log the CompareCoverage of these libcalls. + +Use this with caution. While this can speedup a lot the bypass of hard +branch conditions it can also waste a lot of time and take up unnecessary space +in the shared memory when logging the coverage related to functions that +doesn't process input-related data. + +To use the library, you *need* to make sure that your fuzzing target is linked +dynamically and make use of strcmp(), memcmp(), and related functions. +For optimized binaries this is an issue, those functions are often inlined +and this module is not capable to log the coverage in this case. + +If you have the source code of the fuzzing target you should nto use this +library and QEMU but build it with afl-clang-fast and the laf-intel options. + +To use this library make sure to preload it with AFL_PRELOAD. + +``` + export AFL_PRELOAD=/path/to/libcompcov.so + export AFL_COMPCOV_LEVEL=1 + + afl-fuzz -Q -i input -o output -- +``` + +The AFL_COMPCOV_LEVEL tells to QEMU and libcompcov how to log comaprisons. +Level 1 logs just comparison with immediates / read-only memory and level 2 +logs all the comparisons. + +The library make use of https://github.com/ouadev/proc_maps_parser and so it is +Linux specific. However this is not a strict dependency, other UNIX operating +systems can be supported simply replacing the code related to the +/proc/self/maps parsing. -- cgit 1.4.1 From 659037eef53efc539a077331e52cd2657114d437 Mon Sep 17 00:00:00 2001 From: Andrea Fioraldi Date: Sat, 31 Aug 2019 11:31:51 +0200 Subject: modernize llvm_mode readmes --- llvm_mode/README.instrim | 26 ----- llvm_mode/README.instrim.md | 24 +++++ llvm_mode/README.laf-intel | 20 ---- llvm_mode/README.laf-intel.md | 25 +++++ llvm_mode/README.llvm | 217 ------------------------------------------ llvm_mode/README.md | 216 +++++++++++++++++++++++++++++++++++++++++ llvm_mode/README.neverzero | 22 ----- llvm_mode/README.neverzero.md | 24 +++++ llvm_mode/README.whitelist | 75 --------------- llvm_mode/README.whitelist.md | 75 +++++++++++++++ 10 files changed, 364 insertions(+), 360 deletions(-) delete mode 100644 llvm_mode/README.instrim create mode 100644 llvm_mode/README.instrim.md delete mode 100644 llvm_mode/README.laf-intel create mode 100644 llvm_mode/README.laf-intel.md delete mode 100644 llvm_mode/README.llvm create mode 100644 llvm_mode/README.md delete mode 100644 llvm_mode/README.neverzero create mode 100644 llvm_mode/README.neverzero.md delete mode 100644 llvm_mode/README.whitelist create mode 100644 llvm_mode/README.whitelist.md diff --git a/llvm_mode/README.instrim b/llvm_mode/README.instrim deleted file mode 100644 index 956a9856..00000000 --- a/llvm_mode/README.instrim +++ /dev/null @@ -1,26 +0,0 @@ -# InsTrim -InsTrim: Lightweight Instrumentation for Coverage-guided Fuzzing - - -## Introduction - -InsTrim uses CFG and markers to instrument just what is necessary in the -binary in llvm_mode. It is about 20-25% faster but as a cost has a lower -path discovery. - - -## Usage - -Set the environment variable AFL_LLVM_INSTRIM=1 - -There is also an advanced mode which instruments loops in a way so that -afl-fuzz can see which loop path has been selected but not being able to -see how often the loop has been rerun. -This again is a tradeoff for speed for less path information. -To enable this mode set AFL_LLVM_INSTRIM_LOOPHEAD=1 - - -## Background - -The paper: [InsTrim: Lightweight Instrumentation for Coverage-guided Fuzzing] -(https://www.ndss-symposium.org/wp-content/uploads/2018/07/bar2018_14_Hsu_paper.pdf) diff --git a/llvm_mode/README.instrim.md b/llvm_mode/README.instrim.md new file mode 100644 index 00000000..e5e3614d --- /dev/null +++ b/llvm_mode/README.instrim.md @@ -0,0 +1,24 @@ +# InsTrim + +InsTrim: Lightweight Instrumentation for Coverage-guided Fuzzing + +## Introduction + +InsTrim uses CFG and markers to instrument just what is necessary in the +binary in llvm_mode. It is about 20-25% faster but as a cost has a lower +path discovery. + +## Usage + +Set the environment variable `AFL_LLVM_INSTRIM=1`. + +There is also an advanced mode which instruments loops in a way so that +afl-fuzz can see which loop path has been selected but not being able to +see how often the loop has been rerun. +This again is a tradeoff for speed for less path information. +To enable this mode set `AFL_LLVM_INSTRIM_LOOPHEAD=1`. + +## Background + +The paper: [InsTrim: Lightweight Instrumentation for Coverage-guided Fuzzing] +(https://www.ndss-symposium.org/wp-content/uploads/2018/07/bar2018_14_Hsu_paper.pdf) diff --git a/llvm_mode/README.laf-intel b/llvm_mode/README.laf-intel deleted file mode 100644 index 340216c3..00000000 --- a/llvm_mode/README.laf-intel +++ /dev/null @@ -1,20 +0,0 @@ -Usage -===== - -By default the passes will not run when you compile programs using -afl-clang-fast. Hence, you can use AFL as usual. -To enable the passes you must set environment variables before you -compile the target project. - -The following options exist: - -export AFL_LLVM_LAF_SPLIT_SWITCHES=1 Enables the split-switches pass. - -export AFL_LLVM_LAF_TRANSFORM_COMPARES=1 Enables the transform-compares pass - (strcmp, memcmp, strncmp, strcasecmp, strncasecmp). - -export AFL_LLVM_LAF_SPLIT_COMPARES=1 Enables the split-compares pass. - By default it will split all compares with a bit width <= 64 bits. - You can change this behaviour by setting - export AFL_LLVM_LAF_SPLIT_COMPARES_BITW=. - diff --git a/llvm_mode/README.laf-intel.md b/llvm_mode/README.laf-intel.md new file mode 100644 index 00000000..d51c7e2f --- /dev/null +++ b/llvm_mode/README.laf-intel.md @@ -0,0 +1,25 @@ +# laf-intel instrumentation + +## Usage + +By default the passes will not run when you compile programs using +afl-clang-fast. Hence, you can use AFL as usual. +To enable the passes you must set environment variables before you +compile the target project. + +The following options exist: + +`export AFL_LLVM_LAF_SPLIT_SWITCHES=1` + +Enables the split-switches pass. + +`export AFL_LLVM_LAF_TRANSFORM_COMPARES=1` + +Enables the transform-compares pass (strcmp, memcmp, strncmp, strcasecmp, strncasecmp). + +`export AFL_LLVM_LAF_SPLIT_COMPARES=1` + +Enables the split-compares pass. +By default it will split all compares with a bit width <= 64 bits. +You can change this behaviour by setting `export AFL_LLVM_LAF_SPLIT_COMPARES_BITW=`. + diff --git a/llvm_mode/README.llvm b/llvm_mode/README.llvm deleted file mode 100644 index 9bb091ac..00000000 --- a/llvm_mode/README.llvm +++ /dev/null @@ -1,217 +0,0 @@ -============================================ -Fast LLVM-based instrumentation for afl-fuzz -============================================ - - (See ../docs/README for the general instruction manual.) - (See ../gcc_plugin/README.gcc for the GCC-based instrumentation.) - -1) Introduction ---------------- - -! llvm_mode works with llvm versions 3.8.0 up to 9 ! - -The code in this directory allows you to instrument programs for AFL using -true compiler-level instrumentation, instead of the more crude -assembly-level rewriting approach taken by afl-gcc and afl-clang. This has -several interesting properties: - - - The compiler can make many optimizations that are hard to pull off when - manually inserting assembly. As a result, some slow, CPU-bound programs will - run up to around 2x faster. - - The gains are less pronounced for fast binaries, where the speed is limited - chiefly by the cost of creating new processes. In such cases, the gain will - probably stay within 10%. - - - The instrumentation is CPU-independent. At least in principle, you should - be able to rely on it to fuzz programs on non-x86 architectures (after - building afl-fuzz with AFL_NO_X86=1). - - - The instrumentation can cope a bit better with multi-threaded targets. - - - Because the feature relies on the internals of LLVM, it is clang-specific - and will *not* work with GCC (see ../gcc_plugin/ for an alternative). - -Once this implementation is shown to be sufficiently robust and portable, it -will probably replace afl-clang. For now, it can be built separately and -co-exists with the original code. - -The idea and much of the implementation comes from Laszlo Szekeres. - -2) How to use this ------------------- - -In order to leverage this mechanism, you need to have clang installed on your -system. You should also make sure that the llvm-config tool is in your path -(or pointed to via LLVM_CONFIG in the environment). - -Unfortunately, some systems that do have clang come without llvm-config or the -LLVM development headers; one example of this is FreeBSD. FreeBSD users will -also run into problems with clang being built statically and not being able to -load modules (you'll see "Service unavailable" when loading afl-llvm-pass.so). - -To solve all your problems, you can grab pre-built binaries for your OS from: - - http://llvm.org/releases/download.html - -...and then put the bin/ directory from the tarball at the beginning of your -$PATH when compiling the feature and building packages later on. You don't need -to be root for that. - -To build the instrumentation itself, type 'make'. This will generate binaries -called afl-clang-fast and afl-clang-fast++ in the parent directory. Once this -is done, you can instrument third-party code in a way similar to the standard -operating mode of AFL, e.g.: - - CC=/path/to/afl/afl-clang-fast ./configure [...options...] - make - -Be sure to also include CXX set to afl-clang-fast++ for C++ code. - -The tool honors roughly the same environmental variables as afl-gcc (see -../docs/env_variables.txt). This includes AFL_USE_ASAN, -AFL_HARDEN, and AFL_DONT_OPTIMIZE. However AFL_INST_RATIO is not honored -as it does not serve a good purpose with the more effective instrim CFG -analysis. - -Note: if you want the LLVM helper to be installed on your system for all -users, you need to build it before issuing 'make install' in the parent -directory. - -3) Options - -Several options are present to make llvm_mode faster or help it rearrange -the code to make afl-fuzz path discovery easier. - -If you need just to instrument specific parts of the code, you can whitelist -which C/C++ files to actually intrument. See README.whitelist - -For splitting memcmp, strncmp, etc. please see README.laf-intel - -Then there is an optimized instrumentation strategy that uses CFGs and -markers to just instrument what is needed. This increases speed by 20-25% -however has a lower path discovery. -If you want to use this, set AFL_LLVM_INSTRIM=1 -See README.instrim - -Finally if your llvm version is 8 or lower, you can activate a mode that -prevents that a counter overflow result in a 0 value. This is good for -path discovery, but the llvm implementation for intel for this functionality -is not optimal and was only fixed in llvm 9. -You can set this with AFL_LLVM_NOT_ZERO=1 -See README.neverzero - - -4) Gotchas, feedback, bugs --------------------------- - -This is an early-stage mechanism, so field reports are welcome. You can send bug -reports to . - -5) Bonus feature #1: deferred initialization --------------------------------------------- - -AFL tries to optimize performance by executing the targeted binary just once, -stopping it just before main(), and then cloning this "master" process to get -a steady supply of targets to fuzz. - -Although this approach eliminates much of the OS-, linker- and libc-level -costs of executing the program, it does not always help with binaries that -perform other time-consuming initialization steps - say, parsing a large config -file before getting to the fuzzed data. - -In such cases, it's beneficial to initialize the forkserver a bit later, once -most of the initialization work is already done, but before the binary attempts -to read the fuzzed input and parse it; in some cases, this can offer a 10x+ -performance gain. You can implement delayed initialization in LLVM mode in a -fairly simple way. - -First, find a suitable location in the code where the delayed cloning can -take place. This needs to be done with *extreme* care to avoid breaking the -binary. In particular, the program will probably malfunction if you select -a location after: - - - The creation of any vital threads or child processes - since the forkserver - can't clone them easily. - - - The initialization of timers via setitimer() or equivalent calls. - - - The creation of temporary files, network sockets, offset-sensitive file - descriptors, and similar shared-state resources - but only provided that - their state meaningfully influences the behavior of the program later on. - - - Any access to the fuzzed input, including reading the metadata about its - size. - -With the location selected, add this code in the appropriate spot: - -#ifdef __AFL_HAVE_MANUAL_CONTROL - __AFL_INIT(); -#endif - -You don't need the #ifdef guards, but including them ensures that the program -will keep working normally when compiled with a tool other than afl-clang-fast. - -Finally, recompile the program with afl-clang-fast (afl-gcc or afl-clang will -*not* generate a deferred-initialization binary) - and you should be all set! - -6) Bonus feature #2: persistent mode ------------------------------------- - -Some libraries provide APIs that are stateless, or whose state can be reset in -between processing different input files. When such a reset is performed, a -single long-lived process can be reused to try out multiple test cases, -eliminating the need for repeated fork() calls and the associated OS overhead. - -The basic structure of the program that does this would be: - - while (__AFL_LOOP(1000)) { - - /* Read input data. */ - /* Call library code to be fuzzed. */ - /* Reset state. */ - - } - - /* Exit normally */ - -The numerical value specified within the loop controls the maximum number -of iterations before AFL will restart the process from scratch. This minimizes -the impact of memory leaks and similar glitches; 1000 is a good starting point, -and going much higher increases the likelihood of hiccups without giving you -any real performance benefits. - -A more detailed template is shown in ../experimental/persistent_demo/. -Similarly to the previous mode, the feature works only with afl-clang-fast; -#ifdef guards can be used to suppress it when using other compilers. - -Note that as with the previous mode, the feature is easy to misuse; if you -do not fully reset the critical state, you may end up with false positives or -waste a whole lot of CPU power doing nothing useful at all. Be particularly -wary of memory leaks and of the state of file descriptors. - -PS. Because there are task switches still involved, the mode isn't as fast as -"pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot -faster than the normal fork() model, and compared to in-process fuzzing, -should be a lot more robust. - -8) Bonus feature #3: new 'trace-pc-guard' mode ----------------------------------------------- - -Recent versions of LLVM are shipping with a built-in execution tracing feature -that provides AFL with the necessary tracing data without the need to -post-process the assembly or install any compiler plugins. See: - - http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards - -If you have a sufficiently recent compiler and want to give it a try, build -afl-clang-fast this way: - - AFL_TRACE_PC=1 make clean all - -Note that this mode is currently about 20% slower than "vanilla" afl-clang-fast, -and about 5-10% slower than afl-clang. This is likely because the -instrumentation is not inlined, and instead involves a function call. On systems -that support it, compiling your target with -flto should help. - - diff --git a/llvm_mode/README.md b/llvm_mode/README.md new file mode 100644 index 00000000..c7ef4b45 --- /dev/null +++ b/llvm_mode/README.md @@ -0,0 +1,216 @@ +# Fast LLVM-based instrumentation for afl-fuzz + + (See ../docs/README for the general instruction manual.) + (See ../gcc_plugin/README.gcc for the GCC-based instrumentation.) + +## 1) Introduction + +! llvm_mode works with llvm versions 3.8.0 up to 9 ! + +The code in this directory allows you to instrument programs for AFL using +true compiler-level instrumentation, instead of the more crude +assembly-level rewriting approach taken by afl-gcc and afl-clang. This has +several interesting properties: + + - The compiler can make many optimizations that are hard to pull off when + manually inserting assembly. As a result, some slow, CPU-bound programs will + run up to around 2x faster. + + The gains are less pronounced for fast binaries, where the speed is limited + chiefly by the cost of creating new processes. In such cases, the gain will + probably stay within 10%. + + - The instrumentation is CPU-independent. At least in principle, you should + be able to rely on it to fuzz programs on non-x86 architectures (after + building afl-fuzz with AFL_NO_X86=1). + + - The instrumentation can cope a bit better with multi-threaded targets. + + - Because the feature relies on the internals of LLVM, it is clang-specific + and will *not* work with GCC (see ../gcc_plugin/ for an alternative). + +Once this implementation is shown to be sufficiently robust and portable, it +will probably replace afl-clang. For now, it can be built separately and +co-exists with the original code. + +The idea and much of the implementation comes from Laszlo Szekeres. + +## 2) How to use this + +In order to leverage this mechanism, you need to have clang installed on your +system. You should also make sure that the llvm-config tool is in your path +(or pointed to via LLVM_CONFIG in the environment). + +Unfortunately, some systems that do have clang come without llvm-config or the +LLVM development headers; one example of this is FreeBSD. FreeBSD users will +also run into problems with clang being built statically and not being able to +load modules (you'll see "Service unavailable" when loading afl-llvm-pass.so). + +To solve all your problems, you can grab pre-built binaries for your OS from: + + http://llvm.org/releases/download.html + +...and then put the bin/ directory from the tarball at the beginning of your +$PATH when compiling the feature and building packages later on. You don't need +to be root for that. + +To build the instrumentation itself, type 'make'. This will generate binaries +called afl-clang-fast and afl-clang-fast++ in the parent directory. Once this +is done, you can instrument third-party code in a way similar to the standard +operating mode of AFL, e.g.: + +``` + CC=/path/to/afl/afl-clang-fast ./configure [...options...] + make +``` + +Be sure to also include CXX set to afl-clang-fast++ for C++ code. + +The tool honors roughly the same environmental variables as afl-gcc (see +../docs/env_variables.txt). This includes AFL_USE_ASAN, +AFL_HARDEN, and AFL_DONT_OPTIMIZE. However AFL_INST_RATIO is not honored +as it does not serve a good purpose with the more effective instrim CFG +analysis. + +Note: if you want the LLVM helper to be installed on your system for all +users, you need to build it before issuing 'make install' in the parent +directory. + +## 3) Options + +Several options are present to make llvm_mode faster or help it rearrange +the code to make afl-fuzz path discovery easier. + +If you need just to instrument specific parts of the code, you can whitelist +which C/C++ files to actually intrument. See README.whitelist + +For splitting memcmp, strncmp, etc. please see README.laf-intel + +Then there is an optimized instrumentation strategy that uses CFGs and +markers to just instrument what is needed. This increases speed by 20-25% +however has a lower path discovery. +If you want to use this, set AFL_LLVM_INSTRIM=1 +See README.instrim + +Finally if your llvm version is 8 or lower, you can activate a mode that +prevents that a counter overflow result in a 0 value. This is good for +path discovery, but the llvm implementation for intel for this functionality +is not optimal and was only fixed in llvm 9. +You can set this with AFL_LLVM_NOT_ZERO=1 +See README.neverzero + +## 4) Gotchas, feedback, bugs + +This is an early-stage mechanism, so field reports are welcome. You can send bug +reports to . + +## 5) Bonus feature #1: deferred initialization + +AFL tries to optimize performance by executing the targeted binary just once, +stopping it just before main(), and then cloning this "master" process to get +a steady supply of targets to fuzz. + +Although this approach eliminates much of the OS-, linker- and libc-level +costs of executing the program, it does not always help with binaries that +perform other time-consuming initialization steps - say, parsing a large config +file before getting to the fuzzed data. + +In such cases, it's beneficial to initialize the forkserver a bit later, once +most of the initialization work is already done, but before the binary attempts +to read the fuzzed input and parse it; in some cases, this can offer a 10x+ +performance gain. You can implement delayed initialization in LLVM mode in a +fairly simple way. + +First, find a suitable location in the code where the delayed cloning can +take place. This needs to be done with *extreme* care to avoid breaking the +binary. In particular, the program will probably malfunction if you select +a location after: + + - The creation of any vital threads or child processes - since the forkserver + can't clone them easily. + + - The initialization of timers via setitimer() or equivalent calls. + + - The creation of temporary files, network sockets, offset-sensitive file + descriptors, and similar shared-state resources - but only provided that + their state meaningfully influences the behavior of the program later on. + + - Any access to the fuzzed input, including reading the metadata about its + size. + +With the location selected, add this code in the appropriate spot: + +```c +#ifdef __AFL_HAVE_MANUAL_CONTROL + __AFL_INIT(); +#endif +``` + +You don't need the #ifdef guards, but including them ensures that the program +will keep working normally when compiled with a tool other than afl-clang-fast. + +Finally, recompile the program with afl-clang-fast (afl-gcc or afl-clang will +*not* generate a deferred-initialization binary) - and you should be all set! + +## 6) Bonus feature #2: persistent mode + +Some libraries provide APIs that are stateless, or whose state can be reset in +between processing different input files. When such a reset is performed, a +single long-lived process can be reused to try out multiple test cases, +eliminating the need for repeated fork() calls and the associated OS overhead. + +The basic structure of the program that does this would be: + +```c + while (__AFL_LOOP(1000)) { + + /* Read input data. */ + /* Call library code to be fuzzed. */ + /* Reset state. */ + + } + + /* Exit normally */ +``` + +The numerical value specified within the loop controls the maximum number +of iterations before AFL will restart the process from scratch. This minimizes +the impact of memory leaks and similar glitches; 1000 is a good starting point, +and going much higher increases the likelihood of hiccups without giving you +any real performance benefits. + +A more detailed template is shown in ../experimental/persistent_demo/. +Similarly to the previous mode, the feature works only with afl-clang-fast; #ifdef +guards can be used to suppress it when using other compilers. + +Note that as with the previous mode, the feature is easy to misuse; if you +do not fully reset the critical state, you may end up with false positives or +waste a whole lot of CPU power doing nothing useful at all. Be particularly +wary of memory leaks and of the state of file descriptors. + +PS. Because there are task switches still involved, the mode isn't as fast as +"pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot +faster than the normal fork() model, and compared to in-process fuzzing, +should be a lot more robust. + +## 8) Bonus feature #3: new 'trace-pc-guard' mode + +Recent versions of LLVM are shipping with a built-in execution tracing feature +that provides AFL with the necessary tracing data without the need to +post-process the assembly or install any compiler plugins. See: + + http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards + +If you have a sufficiently recent compiler and want to give it a try, build +afl-clang-fast this way: + +``` + AFL_TRACE_PC=1 make clean all +``` + +Note that this mode is currently about 20% slower than "vanilla" afl-clang-fast, +and about 5-10% slower than afl-clang. This is likely because the +instrumentation is not inlined, and instead involves a function call. On systems +that support it, compiling your target with -flto should help. + + diff --git a/llvm_mode/README.neverzero b/llvm_mode/README.neverzero deleted file mode 100644 index ef873acb..00000000 --- a/llvm_mode/README.neverzero +++ /dev/null @@ -1,22 +0,0 @@ -Usage -===== - -In larger, complex or reiterative programs the map that collects the edge pairs -can easily fill up and wrap. -This is not that much of an issue - unless by chance it wraps just to a 0 -when the program execution ends. -In this case afl-fuzz is not able to see that the pair has been accessed and -will ignore it. - -NeverZero prevents this behaviour. If a counter wraps, it jumps over the 0 -directly to a 1. This improves path discovery (by a very little amount) -at a very little cost (one instruction per edge). - -This is implemented in afl-gcc, however for llvm_mode this is optional if -the llvm version is below 9 - as there is a perfomance bug that is only fixed -in version 9 and onwards. - -If you want to enable this for llvm < 9 then set - -export AFL_LLVM_NOT_ZERO=1 - diff --git a/llvm_mode/README.neverzero.md b/llvm_mode/README.neverzero.md new file mode 100644 index 00000000..5fcf7b47 --- /dev/null +++ b/llvm_mode/README.neverzero.md @@ -0,0 +1,24 @@ +# NeverZero counters for LLVM instrumentation + +## Usage + +In larger, complex or reiterative programs the map that collects the edge pairs +can easily fill up and wrap. +This is not that much of an issue - unless by chance it wraps just to a 0 +when the program execution ends. +In this case afl-fuzz is not able to see that the pair has been accessed and +will ignore it. + +NeverZero prevents this behaviour. If a counter wraps, it jumps over the 0 +directly to a 1. This improves path discovery (by a very little amount) +at a very little cost (one instruction per edge). + +This is implemented in afl-gcc, however for llvm_mode this is optional if +the llvm version is below 9 - as there is a perfomance bug that is only fixed +in version 9 and onwards. + +If you want to enable this for llvm < 9 then set + +``` +export AFL_LLVM_NOT_ZERO=1 +``` diff --git a/llvm_mode/README.whitelist b/llvm_mode/README.whitelist deleted file mode 100644 index ae044749..00000000 --- a/llvm_mode/README.whitelist +++ /dev/null @@ -1,75 +0,0 @@ -======================================== -Using afl++ with partial instrumentation -======================================== - - This file describes how you can selectively instrument only the source files - that are interesting to you using the LLVM instrumentation provided by - afl++ - - Originally developed by Christian Holler (:decoder) . - - -1) Description and purpose --------------------------- - -When building and testing complex programs where only a part of the program is -the fuzzing target, it often helps to only instrument the necessary parts of -the program, leaving the rest uninstrumented. This helps to focus the fuzzer -on the important parts of the program, avoiding undesired noise and -disturbance by uninteresting code being exercised. - -For this purpose, I have added a "partial instrumentation" support to the LLVM -mode of AFLFuzz that allows you to specify on a source file level which files -should be compiled with or without instrumentation. - - -2) Building the LLVM module ---------------------------- - -The new code is part of the existing afl++ LLVM module in the llvm_mode/ -subdirectory. There is nothing specifically to do :) - - -3) How to use the partial instrumentation mode ----------------------------------------------- - -In order to build with partial instrumentation, you need to build with -afl-clang-fast and afl-clang-fast++ respectively. The only required change is -that you need to set the environment variable AFL_LLVM_WHITELIST when calling -the compiler. - -The environment variable must point to a file containing all the filenames -that should be instrumented. For matching, the filename that is being compiled -must end in the filename contained in this whitelist (to avoid breaking the -matching when absolute paths are used during compilation). - -For example if your source tree looks like this: - -project/ -project/feature_a/a1.cpp -project/feature_a/a2.cpp -project/feature_b/b1.cpp -project/feature_b/b2.cpp - -And you only want to test feature_a, then create a whitelist file containing: - -feature_a/a1.cpp -feature_a/a2.cpp - -However if the whitelist file contains this, it works as well: - -a1.cpp -a2.cpp - -but it might lead to files being unwantedly instrumented if the same filename -exists somewhere else in the project. - -The created whitelist file is then set to AFL_INST_WHITELIST when you compile -your program. For each file that didn't match the whitelist, the compiler will -issue a warning at the end stating that no blocks were instrumented. If you -didn't intend to instrument that file, then you can safely ignore that warning. - -For old LLVM versions this feature might require to be compiled with debug -information (-g), however at least from llvm version 6.0 onwards this is not -required anymore (and might hurt performance and crash detection, so better not -use -g) diff --git a/llvm_mode/README.whitelist.md b/llvm_mode/README.whitelist.md new file mode 100644 index 00000000..5aededba --- /dev/null +++ b/llvm_mode/README.whitelist.md @@ -0,0 +1,75 @@ +# Using afl++ with partial instrumentation + + This file describes how you can selectively instrument only the source files + that are interesting to you using the LLVM instrumentation provided by + afl++ + + Originally developed by Christian Holler (:decoder) . + +## 1) Description and purpose + +When building and testing complex programs where only a part of the program is +the fuzzing target, it often helps to only instrument the necessary parts of +the program, leaving the rest uninstrumented. This helps to focus the fuzzer +on the important parts of the program, avoiding undesired noise and +disturbance by uninteresting code being exercised. + +For this purpose, I have added a "partial instrumentation" support to the LLVM +mode of AFLFuzz that allows you to specify on a source file level which files +should be compiled with or without instrumentation. + + +## 2) Building the LLVM module + +The new code is part of the existing afl++ LLVM module in the llvm_mode/ +subdirectory. There is nothing specifically to do :) + + +## 3) How to use the partial instrumentation mode + +In order to build with partial instrumentation, you need to build with +afl-clang-fast and afl-clang-fast++ respectively. The only required change is +that you need to set the environment variable AFL_LLVM_WHITELIST when calling +the compiler. + +The environment variable must point to a file containing all the filenames +that should be instrumented. For matching, the filename that is being compiled +must end in the filename contained in this whitelist (to avoid breaking the +matching when absolute paths are used during compilation). + +For example if your source tree looks like this: + +``` +project/ +project/feature_a/a1.cpp +project/feature_a/a2.cpp +project/feature_b/b1.cpp +project/feature_b/b2.cpp +``` + +And you only want to test feature_a, then create a whitelist file containing: + +``` +feature_a/a1.cpp +feature_a/a2.cpp +``` + +However if the whitelist file contains this, it works as well: + +``` +a1.cpp +a2.cpp +``` + +but it might lead to files being unwantedly instrumented if the same filename +exists somewhere else in the project. + +The created whitelist file is then set to AFL_INST_WHITELIST when you compile +your program. For each file that didn't match the whitelist, the compiler will +issue a warning at the end stating that no blocks were instrumented. If you +didn't intend to instrument that file, then you can safely ignore that warning. + +For old LLVM versions this feature might require to be compiled with debug +information (-g), however at least from llvm version 6.0 onwards this is not +required anymore (and might hurt performance and crash detection, so better not +use -g) -- cgit 1.4.1 From c124576a4dc00e31ad5cad118098f46eaa29cd17 Mon Sep 17 00:00:00 2001 From: hexcoder- Date: Sun, 1 Sep 2019 17:47:14 +0200 Subject: change text color in FATAL, ABORT and PFATAL macros for the actual message to avoid white text on white background (as is standard in plain X11 xterm). Now the text will be printed in default text color (which should be always readable) --- debug.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/debug.h b/debug.h index a943a573..c0044280 100644 --- a/debug.h +++ b/debug.h @@ -198,7 +198,7 @@ #define FATAL(x...) do { \ SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] PROGRAM ABORT : " \ - cBRI x); \ + cRST x); \ SAYF(cLRD "\n Location : " cRST "%s(), %s:%u\n\n", \ __FUNCTION__, __FILE__, __LINE__); \ exit(1); \ @@ -208,7 +208,7 @@ #define ABORT(x...) do { \ SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] PROGRAM ABORT : " \ - cBRI x); \ + cRST x); \ SAYF(cLRD "\n Stop location : " cRST "%s(), %s:%u\n\n", \ __FUNCTION__, __FILE__, __LINE__); \ abort(); \ @@ -219,7 +219,7 @@ #define PFATAL(x...) do { \ fflush(stdout); \ SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD "\n[-] SYSTEM ERROR : " \ - cBRI x); \ + cRST x); \ SAYF(cLRD "\n Stop location : " cRST "%s(), %s:%u\n", \ __FUNCTION__, __FILE__, __LINE__); \ SAYF(cLRD " OS message : " cRST "%s\n", strerror(errno)); \ -- cgit 1.4.1