From a9b9a76bbbe8dbd8a9a9048ec7a5d97149e223c4 Mon Sep 17 00:00:00 2001 From: dloffre-snl <50888856+dloffre-snl@users.noreply.github.com> Date: Thu, 2 Sep 2021 17:19:18 -0600 Subject: Update symcc mutator to new afl_custom_queue_new_entry signature (#1087) Co-authored-by: van Hauser --- custom_mutators/symcc/symcc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/symcc/symcc.c b/custom_mutators/symcc/symcc.c index 19218449..86f23343 100644 --- a/custom_mutators/symcc/symcc.c +++ b/custom_mutators/symcc/symcc.c @@ -129,7 +129,7 @@ uint8_t afl_custom_queue_new_entry(my_mutator_t * data, int pid = fork(); - if (pid == -1) return; + if (pid == -1) return 0; if (pid) { @@ -147,7 +147,7 @@ uint8_t afl_custom_queue_new_entry(my_mutator_t * data, if (r <= 0) { close(pipefd[1]); - return; + return 0; } -- cgit 1.4.1 From b9f88ab166bd798d25d3acdbc6b5c305d7875482 Mon Sep 17 00:00:00 2001 From: Daniel Ebert Date: Tue, 5 Oct 2021 17:40:23 -0700 Subject: fix stack-use-after-return in libfuzzer custom mutator --- custom_mutators/libfuzzer/libfuzzer.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/libfuzzer/libfuzzer.inc b/custom_mutators/libfuzzer/libfuzzer.inc index 01f21dbe..8c4bdbf6 100644 --- a/custom_mutators/libfuzzer/libfuzzer.inc +++ b/custom_mutators/libfuzzer/libfuzzer.inc @@ -2,7 +2,7 @@ extern "C" ATTRIBUTE_INTERFACE void LLVMFuzzerMyInit(int (*Callback)(const uint8_t *Data, size_t Size), unsigned int Seed) { - Random Rand(Seed); + auto *Rand = new Random(Seed); FuzzingOptions Options; Options.Verbosity = 3; Options.MaxLen = 1024000; @@ -30,7 +30,7 @@ LLVMFuzzerMyInit(int (*Callback)(const uint8_t *Data, size_t Size), unsigned int struct EntropicOptions Entropic; Entropic.Enabled = Options.Entropic; EF = new ExternalFunctions(); - auto *MD = new MutationDispatcher(Rand, Options); + auto *MD = new MutationDispatcher(*Rand, Options); auto *Corpus = new InputCorpus(Options.OutputCorpus, Entropic); auto *F = new Fuzzer(Callback, *Corpus, *MD, Options); } -- cgit 1.4.1 From f0e6a7a4f8a387cd295a132ef0723f3257bed658 Mon Sep 17 00:00:00 2001 From: Daniel Ebert Date: Wed, 6 Oct 2021 14:19:22 -0700 Subject: fix memory leak in libfuzzer custom mutator --- custom_mutators/libfuzzer/FuzzerLoop.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'custom_mutators') diff --git a/custom_mutators/libfuzzer/FuzzerLoop.cpp b/custom_mutators/libfuzzer/FuzzerLoop.cpp index 08fda520..6716dbf5 100644 --- a/custom_mutators/libfuzzer/FuzzerLoop.cpp +++ b/custom_mutators/libfuzzer/FuzzerLoop.cpp @@ -1086,6 +1086,7 @@ ATTRIBUTE_INTERFACE size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) { assert(fuzzer::F); + fuzzer::F->GetMD().StartMutationSequence(); size_t r = fuzzer::F->GetMD().DefaultMutate(Data, Size, MaxSize); #ifdef INTROSPECTION introspection_ptr = fuzzer::F->GetMD().WriteMutationSequence(); -- cgit 1.4.1 From 3deca3b09b46130c9e23320c0b98f60543f9b5ba Mon Sep 17 00:00:00 2001 From: vanhauser-thc Date: Fri, 15 Oct 2021 11:25:02 +0200 Subject: fix lto cmplog stability issue --- custom_mutators/grammar_mutator/grammar_mutator | 2 +- docs/Changelog.md | 1 + qemu_mode/qemuafl | 2 +- src/afl-fuzz-run.c | 30 +++++++++++++++++++++++-- unicorn_mode/unicornafl | 2 +- 5 files changed, 32 insertions(+), 5 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/grammar_mutator/grammar_mutator b/custom_mutators/grammar_mutator/grammar_mutator index eedf07dd..b79d51a8 160000 --- a/custom_mutators/grammar_mutator/grammar_mutator +++ b/custom_mutators/grammar_mutator/grammar_mutator @@ -1 +1 @@ -Subproject commit eedf07ddb0fb1f437f5e76b77cfd4064cf6a5d63 +Subproject commit b79d51a8daccbd7a693f9b6765c81ead14f28e26 diff --git a/docs/Changelog.md b/docs/Changelog.md index ea58a386..df4d343a 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -16,6 +16,7 @@ sending a mail to . - fix a regression introduced in 3.10 that resulted in less coverage being detected. thanks to Collin May for reporting! - fix -n dumb mode (nobody should use this) + - fix stability issue with LTO and cmplog - afl-showmap, afl-tmin and afl-analyze now honor persistent mode for more speed. thanks to dloffre-snl for reporting! - Prevent accidently killing non-afl/fuzz services when aborting diff --git a/qemu_mode/qemuafl b/qemu_mode/qemuafl index a6758d1c..71ed0d20 160000 --- a/qemu_mode/qemuafl +++ b/qemu_mode/qemuafl @@ -1 +1 @@ -Subproject commit a6758d1cc3e4dde88fca3f0b3a903581b7c8b2e5 +Subproject commit 71ed0d206fd3d877420dceb4993a1011a4637ae6 diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c index 4173f4e1..da6ba7d9 100644 --- a/src/afl-fuzz-run.c +++ b/src/afl-fuzz-run.c @@ -291,8 +291,6 @@ static void write_with_gap(afl_state_t *afl, u8 *mem, u32 len, u32 skip_at, u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem, u32 handicap, u8 from_queue) { - if (unlikely(afl->shm.cmplog_mode)) { q->exec_cksum = 0; } - u8 fault = 0, new_bits = 0, var_detected = 0, hnb = 0, first_run = (q->exec_cksum == 0); u64 start_us, stop_us, diff_us; @@ -300,6 +298,8 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem, u32 use_tmout = afl->fsrv.exec_tmout; u8 *old_sn = afl->stage_name; + if (unlikely(afl->shm.cmplog_mode)) { q->exec_cksum = 0; } + /* Be a bit more generous about timeouts when resuming sessions, or when trying to calibrate already-added finds. This helps avoid trouble due to intermittent latency. */ @@ -343,6 +343,32 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem, } + /* we need a dummy run if this is LTO + cmplog */ + if (unlikely(afl->shm.cmplog_mode)) { + + write_to_testcase(afl, use_mem, q->len); + + fault = fuzz_run_target(afl, &afl->fsrv, use_tmout); + + /* afl->stop_soon is set by the handler for Ctrl+C. When it's pressed, + we want to bail out quickly. */ + + if (afl->stop_soon || fault != afl->crash_mode) { goto abort_calibration; } + + if (!afl->non_instrumented_mode && !afl->stage_cur && + !count_bytes(afl, afl->fsrv.trace_bits)) { + + fault = FSRV_RUN_NOINST; + goto abort_calibration; + + } + +#ifdef INTROSPECTION + if (unlikely(!q->bitsmap_size)) q->bitsmap_size = afl->bitsmap_size; +#endif + + } + if (q->exec_cksum) { memcpy(afl->first_trace, afl->fsrv.trace_bits, afl->fsrv.map_size); diff --git a/unicorn_mode/unicornafl b/unicorn_mode/unicornafl index d4915053..f1c85364 160000 --- a/unicorn_mode/unicornafl +++ b/unicorn_mode/unicornafl @@ -1 +1 @@ -Subproject commit d4915053d477dd827b3fe4b494173d3fbf9f456e +Subproject commit f1c853648a74b0157d233a2ef9f1693cfee78c11 -- cgit 1.4.1 From d9e39be4e8dd0242877412d1a17589acc825292e Mon Sep 17 00:00:00 2001 From: llzmb <46303940+llzmb@users.noreply.github.com> Date: Thu, 2 Dec 2021 16:49:16 +0100 Subject: Remove the word "simply" --- custom_mutators/gramatron/README.md | 6 +++--- docs/env_variables.md | 10 +++++----- docs/fuzzing_binary-only_targets.md | 3 +-- docs/fuzzing_in_depth.md | 10 +++++----- frida_mode/DEBUGGING.md | 6 +++--- frida_mode/MapDensity.md | 14 +++++++------- frida_mode/README.md | 10 +++++----- frida_mode/Scripting.md | 2 +- instrumentation/README.llvm.md | 4 ++-- qemu_mode/libcompcov/README.md | 6 +++--- utils/libtokencap/README.md | 8 ++++---- 11 files changed, 39 insertions(+), 40 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/gramatron/README.md b/custom_mutators/gramatron/README.md index 91f93355..c8a76e3e 100644 --- a/custom_mutators/gramatron/README.md +++ b/custom_mutators/gramatron/README.md @@ -5,11 +5,11 @@ grammar-aware fuzzing. Technical details about our framework are available in the [ISSTA'21 paper](https://nebelwelt.net/files/21ISSTA.pdf). The artifact to reproduce the experiments presented in the paper are present in `artifact/`. Instructions to run a sample campaign and incorporate new -grammars is presented below: +grammars is presented below: # Compiling -Simply execute `./build_gramatron_mutator.sh` +Execute `./build_gramatron_mutator.sh` # Running @@ -25,7 +25,7 @@ afl-fuzz -i in -o out -- ./target # Adding and testing a new grammar -- Specify in a JSON format for CFG. Examples are correspond `source.json` files +- Specify in a JSON format for CFG. Examples are correspond `source.json` files - Run the automaton generation script (in `src/gramfuzz-mutator/preprocess`) which will place the generated automaton in the same folder. ``` diff --git a/docs/env_variables.md b/docs/env_variables.md index c1c70ec5..6f6110ae 100644 --- a/docs/env_variables.md +++ b/docs/env_variables.md @@ -59,9 +59,9 @@ fairly broad use of environment variables instead: otherwise. - By default, the wrapper appends `-O3` to optimize builds. Very rarely, this - will cause problems in programs built with -Werror, simply because `-O3` - enables more thorough code analysis and can spew out additional warnings. To - disable optimizations, set `AFL_DONT_OPTIMIZE`. However, if `-O...` and/or + will cause problems in programs built with -Werror, because `-O3` enables + more thorough code analysis and can spew out additional warnings. To disable + optimizations, set `AFL_DONT_OPTIMIZE`. However, if `-O...` and/or `-fno-unroll-loops` are set, these are not overridden. - Setting `AFL_HARDEN` automatically adds code hardening options when invoking @@ -651,8 +651,8 @@ call back into FRIDA to find the next block. Default is 32. * `AFL_FRIDA_STATS_FILE` - Write statistics information about the code being instrumented to the given file name. The statistics are written only for the child process when new block is instrumented (when the -`AFL_FRIDA_STATS_INTERVAL` has expired). Note that simply because a new path is -found does not mean a new block needs to be compiled. It could simply be that +`AFL_FRIDA_STATS_INTERVAL` has expired). Note that just because a new path is +found does not mean a new block needs to be compiled. It could be that the existing blocks instrumented have been executed in a different order. * `AFL_FRIDA_STATS_INTERVAL` - The maximum frequency to output statistics information. Stats will be written whenever they are updated if the given diff --git a/docs/fuzzing_binary-only_targets.md b/docs/fuzzing_binary-only_targets.md index 290c9bec..2d57d0dc 100644 --- a/docs/fuzzing_binary-only_targets.md +++ b/docs/fuzzing_binary-only_targets.md @@ -129,8 +129,7 @@ Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar. In contrast to QEMU, Unicorn does not offer a full system or even userland emulation. Runtime environment and/or loaders have to be written from scratch, if needed. On top, block chaining has been removed. This means the speed boost -introduced in the patched QEMU Mode of AFL++ cannot simply be ported over to -Unicorn. +introduced in the patched QEMU Mode of AFL++ cannot be ported over to Unicorn. For non-Linux binaries, you can use AFL++'s unicorn_mode which can emulate anything you want - for the price of speed and user written scripts. diff --git a/docs/fuzzing_in_depth.md b/docs/fuzzing_in_depth.md index 251bbc1d..7aabe090 100644 --- a/docs/fuzzing_in_depth.md +++ b/docs/fuzzing_in_depth.md @@ -427,8 +427,8 @@ It can be valuable to run afl-fuzz in a screen or tmux shell so you can log off, or afl-fuzz is not aborted if you are running it in a remote ssh session where the connection fails in between. Only do that though once you have verified that your fuzzing setup works! -Simply run it like `screen -dmS afl-main -- afl-fuzz -M main-$HOSTNAME -i ...` -and it will start away in a screen session. To enter this session simply type +Run it like `screen -dmS afl-main -- afl-fuzz -M main-$HOSTNAME -i ...` +and it will start away in a screen session. To enter this session, type `screen -r afl-main`. You see - it makes sense to name the screen session same as the afl-fuzz -M/-S naming :-) For more information on screen or tmux please check their documentation. @@ -457,7 +457,7 @@ handling in the target. Play around with various -m values until you find one that safely works for all your input seeds (if you have good ones and then double or quadruple that. -By default afl-fuzz never stops fuzzing. To terminate AFL++ simply press +By default afl-fuzz never stops fuzzing. To terminate AFL++, press Control-C or send a signal SIGINT. You can limit the number of executions or approximate runtime in seconds with options also. @@ -554,7 +554,7 @@ recommended! ### d) Using multiple machines for fuzzing Maybe you have more than one machine you want to fuzz the same target on. -Simply start the `afl-fuzz` (and perhaps libfuzzer, honggfuzz, ...) +Start the `afl-fuzz` (and perhaps libfuzzer, honggfuzz, ...) orchestra as you like, just ensure that your have one and only one `-M` instance per server, and that its name is unique, hence the recommendation for `-M main-$HOSTNAME`. @@ -609,7 +609,7 @@ e.g., `afl-plot out/default /srv/www/htdocs/plot`. ### f) Stopping fuzzing, restarting fuzzing, adding new seeds -To stop an afl-fuzz run, simply press Control-C. +To stop an afl-fuzz run, press Control-C. To restart an afl-fuzz run, just reuse the same command line but replace the `-i directory` with `-i -` or set `AFL_AUTORESUME=1`. diff --git a/frida_mode/DEBUGGING.md b/frida_mode/DEBUGGING.md index 9cdc5eb6..b703ae43 100644 --- a/frida_mode/DEBUGGING.md +++ b/frida_mode/DEBUGGING.md @@ -160,9 +160,9 @@ Lastly, if your defect only occurs when using `afl-fuzz` (e.g., when using shared memory mapping being created for it to record its data), it is possible to enable the creation of a core dump for post-mortem analysis. -Firstly, check if your `/proc/sys/kernel/core_pattern` configuration is simply -set to a filename (AFL++ encourages you to set it to the value `core` in any -case since it doesn't want any handler applications getting in the way). +Firstly, check if your `/proc/sys/kernel/core_pattern` configuration is set to a +filename (AFL++ encourages you to set it to the value `core` in any case since +it doesn't want any handler applications getting in the way). Next, set `ulimit -c unlimited` to remove any size limitations for core files. diff --git a/frida_mode/MapDensity.md b/frida_mode/MapDensity.md index b6a96ca0..50f2720f 100644 --- a/frida_mode/MapDensity.md +++ b/frida_mode/MapDensity.md @@ -77,13 +77,13 @@ evenly distributed. We start with a large address and need to discard a large number of the bits to generate a block ID which is within range. But how do we choose the unique bits of the address versus those which are the same for every block? The high bits of -the address may simply be all `0s` or all `1s` to make the address canonical, -the middle portion of the address may be the same for all blocks (since if they -are all within the same binary, then they will all be adjacent in memory), and -on some systems, even the low bits may have poor entropy as some use fixed -length aligned instructions. Then we need to consider that a portion of each -binary may contain the `.data` or `.bss` sections and so may not contain any -blocks of code at all. +the address may be all `0s` or all `1s` to make the address canonical, the +middle portion of the address may be the same for all blocks (since if they are +all within the same binary, then they will all be adjacent in memory), and on +some systems, even the low bits may have poor entropy as some use fixed length +aligned instructions. Then we need to consider that a portion of each binary may +contain the `.data` or `.bss` sections and so may not contain any blocks of code +at all. ### Edge IDs diff --git a/frida_mode/README.md b/frida_mode/README.md index c19280e1..c2b98473 100644 --- a/frida_mode/README.md +++ b/frida_mode/README.md @@ -229,9 +229,9 @@ instances run CMPLOG mode and instrumentation of the binary is less frequent * `AFL_FRIDA_STATS_FILE` - Write statistics information about the code being instrumented to the given file name. The statistics are written only for the child process when new block is instrumented (when the - `AFL_FRIDA_STATS_INTERVAL` has expired). Note that simply because a new path - is found does not mean a new block needs to be compiled. It could simply be - that the existing blocks instrumented have been executed in a different order. + `AFL_FRIDA_STATS_INTERVAL` has expired). Note that just because a new path is + found does not mean a new block needs to be compiled. It could be that the + existing blocks instrumented have been executed in a different order. ``` stats @@ -359,8 +359,8 @@ An example of how to fuzz a dynamic library on OSX is included, see [test/osx-lib](test/osx-lib). This requires the use of a simple test harness executable which will load the library and call a target function within it. The dependent library can either be loaded in using `dlopen` and `dlsym` in a -function marked `__attribute__((constructor()))` or the test harness can simply -be linked against it. It is important that the target library is loaded before +function marked `__attribute__((constructor()))` or the test harness can be +linked against it. It is important that the target library is loaded before execution of `main`, since this is the point where FRIDA mode is initialized. Otherwise, it will not be possible to configure coverage for the test library using `AFL_FRIDA_INST_RANGES` or similar. diff --git a/frida_mode/Scripting.md b/frida_mode/Scripting.md index fcf8a490..fd4282db 100644 --- a/frida_mode/Scripting.md +++ b/frida_mode/Scripting.md @@ -511,7 +511,7 @@ int main(int argc, char **argv) { ``` There are a couple of obstacles with our target application. Unlike when fuzzing -source code, though, we can't simply edit it and recompile it. The following +source code, though, we can't just edit it and recompile it. The following script shows how we can use the normal functionality of FRIDA to modify any troublesome behavior. diff --git a/instrumentation/README.llvm.md b/instrumentation/README.llvm.md index 88ea0127..35f38261 100644 --- a/instrumentation/README.llvm.md +++ b/instrumentation/README.llvm.md @@ -126,8 +126,8 @@ Then there are different ways of instrumenting the target: 1. An better instrumentation strategy uses LTO and link time instrumentation. Note that not all targets can compile in this mode, however if it works it is - the best option you can use. Simply use afl-clang-lto/afl-clang-lto++ to use - this option. See [README.lto.md](README.lto.md). + the best option you can use. To go with this option, use + afl-clang-lto/afl-clang-lto++. See [README.lto.md](README.lto.md). 2. Alternatively you can choose a completely different coverage method: diff --git a/qemu_mode/libcompcov/README.md b/qemu_mode/libcompcov/README.md index fca20a69..6a72f5ff 100644 --- a/qemu_mode/libcompcov/README.md +++ b/qemu_mode/libcompcov/README.md @@ -23,7 +23,7 @@ To use this library make sure to preload it with AFL_PRELOAD. ``` export AFL_PRELOAD=/path/to/libcompcov.so export AFL_COMPCOV_LEVEL=1 - + afl-fuzz -Q -i input -o output -- ``` @@ -33,5 +33,5 @@ logs all the comparisons. The library make use of https://github.com/ouadev/proc_maps_parser and so it is Linux specific. However this is not a strict dependency, other UNIX operating -systems can be supported simply replacing the code related to the -/proc/self/maps parsing. +systems can be supported by replacing the code related to the +/proc/self/maps parsing. \ No newline at end of file diff --git a/utils/libtokencap/README.md b/utils/libtokencap/README.md index a39ed3a5..91ebebd1 100644 --- a/utils/libtokencap/README.md +++ b/utils/libtokencap/README.md @@ -40,10 +40,10 @@ when using afl-gcc. This setting specifically adds the following flags: -fno-builtin-strcasestr ``` -The next step is simply loading this library via LD_PRELOAD. The optimal usage -pattern is to allow afl-fuzz to fuzz normally for a while and build up a corpus, -and then fire off the target binary, with libtokencap.so loaded, on every file -found by AFL in that earlier run. This demonstrates the basic principle: +The next step is to load this library via LD_PRELOAD. The optimal usage pattern +is to allow afl-fuzz to fuzz normally for a while and build up a corpus, and +then fire off the target binary, with libtokencap.so loaded, on every file found +by AFL in that earlier run. This demonstrates the basic principle: ``` export AFL_TOKEN_FILE=$PWD/temp_output.txt -- cgit 1.4.1 From 377adb776ee2adb3cb17a174279e384ae9dd590c Mon Sep 17 00:00:00 2001 From: llzmb <46303940+llzmb@users.noreply.github.com> Date: Thu, 2 Dec 2021 21:16:16 +0100 Subject: Change "eg" to "e.g." and fix punctuation --- custom_mutators/gramatron/README.md | 8 +++++--- docs/fuzzing_in_depth.md | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/gramatron/README.md b/custom_mutators/gramatron/README.md index c8a76e3e..0167f620 100644 --- a/custom_mutators/gramatron/README.md +++ b/custom_mutators/gramatron/README.md @@ -28,18 +28,20 @@ afl-fuzz -i in -o out -- ./target - Specify in a JSON format for CFG. Examples are correspond `source.json` files - Run the automaton generation script (in `src/gramfuzz-mutator/preprocess`) which will place the generated automaton in the same folder. + ``` ./preprocess/prep_automaton.sh [stack_limit] -Eg. ./preprocess/prep_automaton.sh ~/grammars/ruby/source.json PROGRAM +E.g., ./preprocess/prep_automaton.sh ~/grammars/ruby/source.json PROGRAM ``` - If the grammar has no self-embedding rules then you do not need to pass the stack limit parameter. However, if it does have self-embedding rules then you need to pass the stack limit parameter. We recommend starting with `5` and then increasing it if you need more complexity - To sanity-check that the automaton is generating inputs as expected you can use the `test` binary housed in `src/gramfuzz-mutator` + ``` ./test SanityCheck -Eg. ./test SanityCheck ~/grammars/ruby/source_automata.json -``` +E.g., ./test SanityCheck ~/grammars/ruby/source_automata.json +``` \ No newline at end of file diff --git a/docs/fuzzing_in_depth.md b/docs/fuzzing_in_depth.md index 4e1e001e..7c464d81 100644 --- a/docs/fuzzing_in_depth.md +++ b/docs/fuzzing_in_depth.md @@ -175,7 +175,7 @@ The following sanitizers have built-in support in AFL++: * ASAN = Address SANitizer, finds memory corruption vulnerabilities like use-after-free, NULL pointer dereference, buffer overruns, etc. Enabled with `export AFL_USE_ASAN=1` before compiling. -* MSAN = Memory SANitizer, finds read access to uninitialized memory, eg. a +* MSAN = Memory SANitizer, finds read access to uninitialized memory, e.g., a local variable that is defined and read before it is even set. Enabled with `export AFL_USE_MSAN=1` before compiling. * UBSAN = Undefined Behavior SANitizer, finds instances where - by the C and C++ -- cgit 1.4.1 From aa6586a761c47da1f202221c9a449b692cacbcae Mon Sep 17 00:00:00 2001 From: llzmb <46303940+llzmb@users.noreply.github.com> Date: Thu, 2 Dec 2021 21:23:22 +0100 Subject: Fix typos - 1st run --- custom_mutators/gramatron/README.md | 6 +++--- docs/FAQ.md | 2 +- docs/fuzzing_in_depth.md | 2 +- instrumentation/README.llvm.md | 2 +- qemu_mode/libcompcov/README.md | 2 +- utils/optimin/README.md | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/gramatron/README.md b/custom_mutators/gramatron/README.md index 0167f620..2ed014cd 100644 --- a/custom_mutators/gramatron/README.md +++ b/custom_mutators/gramatron/README.md @@ -1,6 +1,6 @@ # GramaTron -Gramatron is a coverage-guided fuzzer that uses grammar automatons to perform +GramaTron is a coverage-guided fuzzer that uses grammar automatons to perform grammar-aware fuzzing. Technical details about our framework are available in the [ISSTA'21 paper](https://nebelwelt.net/files/21ISSTA.pdf). The artifact to reproduce the experiments presented in the paper are present @@ -9,11 +9,11 @@ grammars is presented below: # Compiling -Execute `./build_gramatron_mutator.sh` +Execute `./build_gramatron_mutator.sh`. # Running -You have to set the grammar file to use with `GRAMMATRON_AUTOMATION`: +You have to set the grammar file to use with `GRAMATRON_AUTOMATION`: ``` export AFL_DISABLE_TRIM=1 diff --git a/docs/FAQ.md b/docs/FAQ.md index 27250415..671957ef 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -125,7 +125,7 @@ If you find an interesting or important question missing, submit it via If, however, randomness happens, e.g., a thread reading other external data, reaction to timing, etc., then in some of the re-executions with the same data - the edge coverage result will be different accross runs. Those edges that + the edge coverage result will be different across runs. Those edges that change are then flagged "unstable". The more "unstable" edges, the more difficult for AFL++ to identify valid new paths. diff --git a/docs/fuzzing_in_depth.md b/docs/fuzzing_in_depth.md index 7c464d81..011ba783 100644 --- a/docs/fuzzing_in_depth.md +++ b/docs/fuzzing_in_depth.md @@ -131,7 +131,7 @@ The following options are available when you instrument with LTO mode have to compile the target twice, once specifically with/for this mode by setting `AFL_LLVM_CMPLOG=1`, and pass this binary to afl-fuzz via the `-c` parameter. Note that you can compile also just a cmplog binary and use that - for both, however, there will be a performance penality. You can read more + for both, however, there will be a performance penalty. You can read more about this in [instrumentation/README.cmplog.md](../instrumentation/README.cmplog.md). diff --git a/instrumentation/README.llvm.md b/instrumentation/README.llvm.md index ac8f2f2a..c93cd312 100644 --- a/instrumentation/README.llvm.md +++ b/instrumentation/README.llvm.md @@ -125,7 +125,7 @@ For splitting memcmp, strncmp, etc., see Then there are different ways of instrumenting the target: -1. An better instrumentation strategy uses LTO and link time instrumentation. +1. A better instrumentation strategy uses LTO and link time instrumentation. Note that not all targets can compile in this mode, however, if it works it is the best option you can use. To go with this option, use afl-clang-lto/afl-clang-lto++. See [README.lto.md](README.lto.md). diff --git a/qemu_mode/libcompcov/README.md b/qemu_mode/libcompcov/README.md index 50f0d802..bb010d8f 100644 --- a/qemu_mode/libcompcov/README.md +++ b/qemu_mode/libcompcov/README.md @@ -31,7 +31,7 @@ The AFL_COMPCOV_LEVEL tells to QEMU and libcompcov how to log comaprisons. Level 1 logs just comparison with immediates / read-only memory and level 2 logs all the comparisons. -The library make use of https://github.com/ouadev/proc_maps_parser and so it is +The library makes use of https://github.com/ouadev/proc_maps_parser and so it is Linux specific. However, this is not a strict dependency, other UNIX operating systems can be supported by replacing the code related to the /proc/self/maps parsing. \ No newline at end of file diff --git a/utils/optimin/README.md b/utils/optimin/README.md index 507cb305..340022b8 100644 --- a/utils/optimin/README.md +++ b/utils/optimin/README.md @@ -77,7 +77,7 @@ For more details, see the paper [Seed Selection for Successful Fuzzing](https://dl.acm.org/doi/10.1145/3460319.3464795). If you use OptiMin in your research, please cite this paper. -Bibtex: +BibTeX: ```bibtex @inproceedings{Herrera:2021:FuzzSeedSelection, -- cgit 1.4.1 From ed808fe92fde997bcf00390ba53d3236e9f2733d Mon Sep 17 00:00:00 2001 From: Kiprey Date: Sat, 4 Dec 2021 23:49:35 +0800 Subject: Fix memory leak in libprotobuf-mutator-example --- .../libprotobuf-mutator-example/lpm_aflpp_custom_mutator_input.cc | 8 +++++--- .../libprotobuf-mutator-example/lpm_aflpp_custom_mutator_input.h | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/libprotobuf-mutator-example/lpm_aflpp_custom_mutator_input.cc b/custom_mutators/libprotobuf-mutator-example/lpm_aflpp_custom_mutator_input.cc index e0273849..ecbfdd1c 100644 --- a/custom_mutators/libprotobuf-mutator-example/lpm_aflpp_custom_mutator_input.cc +++ b/custom_mutators/libprotobuf-mutator-example/lpm_aflpp_custom_mutator_input.cc @@ -99,10 +99,12 @@ extern "C" size_t afl_custom_fuzz(MyMutator *mutator, // return value from afl_c std::string s = ProtoToData(*p); // Copy to a new buffer ( mutated_out ) size_t mutated_size = s.size() <= max_size ? s.size() : max_size; // check if raw data's size is larger than max_size - uint8_t *mutated_out = new uint8_t[mutated_size+1]; - memcpy(mutated_out, s.c_str(), mutated_size); // copy the mutated data + + delete mutator->mutated_out; + mutator->mutated_out = new uint8_t[mutated_size+1]; + memcpy(mutator->mutated_out, s.c_str(), mutated_size); // copy the mutated data // Assign the mutated data and return mutated_size - *out_buf = mutated_out; + *out_buf = mutator->mutated_out; return mutated_size; } diff --git a/custom_mutators/libprotobuf-mutator-example/lpm_aflpp_custom_mutator_input.h b/custom_mutators/libprotobuf-mutator-example/lpm_aflpp_custom_mutator_input.h index ebd3ca65..0f5484ca 100644 --- a/custom_mutators/libprotobuf-mutator-example/lpm_aflpp_custom_mutator_input.h +++ b/custom_mutators/libprotobuf-mutator-example/lpm_aflpp_custom_mutator_input.h @@ -2,4 +2,6 @@ #include "test.pb.h" class MyMutator : public protobuf_mutator::Mutator { +public: + uint8_t *mutated_out = nullptr; }; -- cgit 1.4.1 From 3506077fd6f250f3c080b58ea3bae117c3b122da Mon Sep 17 00:00:00 2001 From: llzmb <46303940+llzmb@users.noreply.github.com> Date: Sat, 4 Dec 2021 22:28:05 +0100 Subject: Add missing blank lines and remove double blank lines --- custom_mutators/README.md | 1 + custom_mutators/gramatron/README.md | 1 + custom_mutators/libfuzzer/README.md | 4 +++- docs/custom_mutators.md | 8 +++++++- docs/fuzzing_binary-only_targets.md | 3 ++- docs/fuzzing_in_depth.md | 1 - frida_mode/README.md | 1 + unicorn_mode/README.md | 1 - unicorn_mode/samples/c/COMPILE.md | 1 + unicorn_mode/samples/speedtest/README.md | 13 ++++++++++--- utils/afl_untracer/README.md | 8 +++++++- utils/autodict_ql/readme.md | 17 ++++++++++------- utils/qbdi_mode/README.md | 8 ++++---- utils/qemu_persistent_hook/README.md | 2 +- 14 files changed, 48 insertions(+), 21 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/README.md b/custom_mutators/README.md index fa877f34..0289e150 100644 --- a/custom_mutators/README.md +++ b/custom_mutators/README.md @@ -15,6 +15,7 @@ In `./rust`, you will find rust bindings, including a simple example in `./rust/ If you use git to clone AFL++, then the following will incorporate our excellent grammar custom mutator: + ```sh git submodule update --init ``` diff --git a/custom_mutators/gramatron/README.md b/custom_mutators/gramatron/README.md index 2ed014cd..5e10f97b 100644 --- a/custom_mutators/gramatron/README.md +++ b/custom_mutators/gramatron/README.md @@ -34,6 +34,7 @@ afl-fuzz -i in -o out -- ./target E.g., ./preprocess/prep_automaton.sh ~/grammars/ruby/source.json PROGRAM ``` + - If the grammar has no self-embedding rules then you do not need to pass the stack limit parameter. However, if it does have self-embedding rules then you need to pass the stack limit parameter. We recommend starting with `5` and diff --git a/custom_mutators/libfuzzer/README.md b/custom_mutators/libfuzzer/README.md index fb3025f2..cb4773b7 100644 --- a/custom_mutators/libfuzzer/README.md +++ b/custom_mutators/libfuzzer/README.md @@ -11,9 +11,11 @@ Note that this is currently a simple implementation and it is missing two featur * Dictionary support To update the source, all that is needed is that FuzzerDriver.cpp has to receive + ``` #include "libfuzzer.inc" ``` + before the closing namespace bracket. It is also libfuzzer.inc where the configuration of the libfuzzer mutations @@ -21,4 +23,4 @@ are done. > Original repository: https://github.com/llvm/llvm-project > Path: compiler-rt/lib/fuzzer/*.{h|cpp} -> Source commit: df3e903655e2499968fc7af64fb5fa52b2ee79bb +> Source commit: df3e903655e2499968fc7af64fb5fa52b2ee79bb \ No newline at end of file diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md index 0e94ab10..2f632e1f 100644 --- a/docs/custom_mutators.md +++ b/docs/custom_mutators.md @@ -23,6 +23,7 @@ The custom mutator is passed to `afl-fuzz` via the `AFL_CUSTOM_MUTATOR_LIBRARY` or `AFL_PYTHON_MODULE` environment variable, and must export a fuzz function. Now AFL++ also supports multiple custom mutators which can be specified in the same `AFL_CUSTOM_MUTATOR_LIBRARY` environment variable like this. + ```bash export AFL_CUSTOM_MUTATOR_LIBRARY="full/path/to/mutator_first.so;full/path/to/mutator_second.so" ``` @@ -38,6 +39,7 @@ performed with the custom mutator. ## 2) APIs C/C++: + ```c void *afl_custom_init(afl_state_t *afl, unsigned int seed); unsigned int afl_custom_fuzz_count(void *data, const unsigned char *buf, size_t buf_size); @@ -56,6 +58,7 @@ void afl_custom_deinit(void *data); ``` Python: + ```python def init(seed): pass @@ -233,7 +236,6 @@ Optionally, the following environment variables are supported: combined with a custom trimming routine (see below) because trimming can cause the same test breakage like havoc and splice. - - `AFL_PYTHON_ONLY` Deprecated and removed, use `AFL_CUSTOM_MUTATOR_ONLY` instead. @@ -268,9 +270,11 @@ In case your setup is different, set the necessary variables like this: ### Custom Mutator Preparation For C/C++ mutators, the source code must be compiled as a shared object: + ```bash gcc -shared -Wall -O3 example.c -o example.so ``` + Note that if you specify multiple custom mutators, the corresponding functions will be called in the order in which they are specified. e.g. first `post_process` function of `example_first.so` will be called and then that of @@ -279,12 +283,14 @@ will be called in the order in which they are specified. e.g. first ### Run C/C++ + ```bash export AFL_CUSTOM_MUTATOR_LIBRARY="/full/path/to/example_first.so;/full/path/to/example_second.so" afl-fuzz /path/to/program ``` Python + ```bash export PYTHONPATH=`dirname /full/path/to/example.py` export AFL_PYTHON_MODULE=example diff --git a/docs/fuzzing_binary-only_targets.md b/docs/fuzzing_binary-only_targets.md index 15155111..eaed3a91 100644 --- a/docs/fuzzing_binary-only_targets.md +++ b/docs/fuzzing_binary-only_targets.md @@ -21,6 +21,7 @@ If your target is a library, then use FRIDA mode. If your target is non-linux, then use unicorn_mode. ## Fuzzing binary-only targets with AFL++ + ### QEMU mode QEMU mode is the "native" solution to the program. It is available in the @@ -175,6 +176,7 @@ An alternative solution are binary rewriters. They are faster then the solutions native to AFL++ but don't always work. ### ZAFL + ZAFL is a static rewriting platform supporting x86-64 C/C++, stripped/unstripped, and PIE/non-PIE binaries. Beyond conventional instrumentation, ZAFL's API enables transformation passes (e.g., laf-Intel, @@ -277,7 +279,6 @@ There are many binary-only fuzzing frameworks. Some are great for CTFs but don't work with large binaries, others are very slow but have good path discovery, some are very hard to set-up... - * Jackalope: [https://github.com/googleprojectzero/Jackalope](https://github.com/googleprojectzero/Jackalope) * Manticore: diff --git a/docs/fuzzing_in_depth.md b/docs/fuzzing_in_depth.md index 2e030e7b..4a1ddf45 100644 --- a/docs/fuzzing_in_depth.md +++ b/docs/fuzzing_in_depth.md @@ -808,7 +808,6 @@ then color-codes the input based on which sections appear to be critical, and which are not; while not bulletproof, it can often offer quick insights into complex file formats. - ## 5. CI fuzzing Some notes on CI fuzzing - this fuzzing is different to normal fuzzing campaigns diff --git a/frida_mode/README.md b/frida_mode/README.md index e88bda8d..08f6b891 100644 --- a/frida_mode/README.md +++ b/frida_mode/README.md @@ -141,6 +141,7 @@ instances run CMPLOG mode and instrumentation of the binary is less frequent (only on CMP, SUB and CALL instructions) performance is not quite so critical. ## Advanced configuration options + * `AFL_FRIDA_DRIVER_NO_HOOK` - See `AFL_QEMU_DRIVER_NO_HOOK`. When using the QEMU driver to provide a `main` loop for a user provided `LLVMFuzzerTestOneInput`, this option configures the driver to read input from diff --git a/unicorn_mode/README.md b/unicorn_mode/README.md index 4c95e8f3..ee4a7b22 100644 --- a/unicorn_mode/README.md +++ b/unicorn_mode/README.md @@ -83,7 +83,6 @@ uses slightly older concepts, but describes the general ideas, at: [https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf](https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf) - The ['helper_scripts'](./helper_scripts) directory also contains several helper scripts that allow you to dump context from a running process, load it, and hook heap allocations. For details on how to use this check out the follow-up blog post to the one linked above. diff --git a/unicorn_mode/samples/c/COMPILE.md b/unicorn_mode/samples/c/COMPILE.md index 4e3cf568..e5265071 100644 --- a/unicorn_mode/samples/c/COMPILE.md +++ b/unicorn_mode/samples/c/COMPILE.md @@ -6,6 +6,7 @@ This shows a simple harness for unicornafl in C The target can be built using the `make` command. Just make sure you have built unicorn support first: + ```bash cd /path/to/afl/unicorn_mode ./build_unicorn_support.sh diff --git a/unicorn_mode/samples/speedtest/README.md b/unicorn_mode/samples/speedtest/README.md index 496d75cd..bd5ba8d0 100644 --- a/unicorn_mode/samples/speedtest/README.md +++ b/unicorn_mode/samples/speedtest/README.md @@ -35,7 +35,6 @@ cd python TODO: add results here. - ## Compiling speedtest_target.c You shouldn't need to compile simple_target.c since a X86_64 binary version is @@ -46,20 +45,28 @@ The pre-built binary (simple_target_x86_64.bin) was built using -g -O0 in gcc. Then load the binary and execute the main function directly. -## Addresses for the harness: +## Addresses for the harness + To find the address (in hex) of main, run: + ```bash objdump -M intel -D target | grep '
:' | cut -d" " -f1 ``` + To find all call sites to magicfn, run: + ```bash objdump -M intel -D target | grep '$' | cut -d":" -f1 ``` + For malloc callsites: + ```bash objdump -M intel -D target | grep '$' | cut -d":" -f1 ``` + And free callsites: + ```bash objdump -M intel -D target | grep '$' | cut -d":" -f1 -``` +``` \ No newline at end of file diff --git a/utils/afl_untracer/README.md b/utils/afl_untracer/README.md index 3fff5f83..cd87998b 100644 --- a/utils/afl_untracer/README.md +++ b/utils/afl_untracer/README.md @@ -28,28 +28,34 @@ To generate the `patches.txt` file for your target library use the The patches.txt file has to be pointed to by `AFL_UNTRACER_FILE`. To easily run the scripts without needing to run the GUI with Ghidra: + ``` /opt/ghidra/support/analyzeHeadless /tmp/ tmp$$ -import libtestinstr.so -postscript ./ghidra_get_patchpoints.java rm -rf /tmp/tmp$$ ``` + The file is created at `~/Desktop/patches.txt` ### Fuzzing Example (after modifying afl-untracer.c to your needs, compiling and creating patches.txt): + ``` LD_LIBRARY_PATH=/path/to/target/library AFL_UNTRACER_FILE=./patches.txt afl-fuzz -i in -o out -- ./afl-untracer ``` + (or even remote via afl-network-proxy). ### Testing and debugging For testing/debugging you can try: + ``` make DEBUG=1 AFL_UNTRACER_FILE=./patches.txt AFL_DEBUG=1 gdb ./afl-untracer ``` + and then you can easily set breakpoints to "breakpoint" and "fuzz". # Background @@ -57,4 +63,4 @@ and then you can easily set breakpoints to "breakpoint" and "fuzz". This idea is based on [UnTracer](https://github.com/FoRTE-Research/UnTracer-AFL) and modified by [Trapfuzz](https://github.com/googleprojectzero/p0tools/tree/master/TrapFuzz). This implementation is slower because the traps are not patched out with each -run, but on the other hand gives much better coverage information. +run, but on the other hand gives much better coverage information. \ No newline at end of file diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 491ec85b..789cd152 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -6,12 +6,12 @@ Tokens are useful when you perform fuzzing on different parsers. The AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campaign. If you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) . - ## Why CodeQL ? -We basically developed this plugin on top of the CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts. +We basically developed this plugin on top of the CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts. ## CodeQL scripts + Currently, we pushed some scripts as defaults for Token generation. In addition, we provide every CodeQL script as an standalone script because it's easier to modify or test. Currently we provided the following CodeQL scripts : @@ -28,23 +28,26 @@ Currently we provided the following CodeQL scripts : You can write other CodeQL scripts to extract possible effective tokens if you think they can be useful. - ## Usage -Before you proceed to installation make sure that you have the following packages by installing them : +Before you proceed to installation make sure that you have the following packages by installing them: + ```shell sudo apt install build-essential libtool-bin python3-dev python3 automake git vim wget -y ``` + The usage of Autodict-QL is pretty easy. But let's describe it as: 1. First of all, you need to have CodeQL installed on the system. We make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system. -Do the following : +Do the following: + ```shell # chmod +x codeql-build.sh # ./codeql-build.sh # source ~/.bashrc # codeql ``` + Then you should get: ```shell @@ -86,8 +89,8 @@ Commands: - This will create the final `tokens` dir for you and you are done, then pass the tokens path to AFL++'s `-x` flag. 5. Done! - ## More on dictionaries and tokens + Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extract useful tokens, in addition with LTO instrumentation mode, this dict2file is automatically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. In addition it's independent from LLVM system. On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file formats and specifications. For example, for testing binutils and ELF file format or AVI in FFMPEG, there are no pre-built dictionaries, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generate dictionaries based on the target. @@ -97,4 +100,4 @@ In overall, fuzzing with dictionaries and well-generated tokens will give better There are 2 important points to remember : - If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chances to discover new bugs. -- Do not forget to set `AFL_MAX_DET_EXTRAS` at least to the number of generated dictionaries. If you forget to set this environment variable, then AFL++ uses just 200 tokens and use the rest of them only probabilistically. So this will guarantee that your tokens will be used by AFL++. +- Do not forget to set `AFL_MAX_DET_EXTRAS` at least to the number of generated dictionaries. If you forget to set this environment variable, then AFL++ uses just 200 tokens and use the rest of them only probabilistically. So this will guarantee that your tokens will be used by AFL++. \ No newline at end of file diff --git a/utils/qbdi_mode/README.md b/utils/qbdi_mode/README.md index 02dd3c74..08558017 100755 --- a/utils/qbdi_mode/README.md +++ b/utils/qbdi_mode/README.md @@ -9,7 +9,6 @@ Try FRIDA mode or fpicker [https://github.com/ttdennis/fpicker/](https://github. The code in ./qbdi_mode allows you to build a standalone feature that using the QBDI framework to fuzz android native library. - ## 2) Build First download the Android NDK @@ -53,6 +52,7 @@ https://qbdi.quarkslab.com/ ``` For Android x86_64 + ``` https://github.com/QBDI/QBDI/releases/download/v0.7.0/QBDI-0.7.0-android-X86_64.tar.gz ``` @@ -86,7 +86,6 @@ Then run the build.sh this could build the afl-fuzz and also the qbdi template for android x86_64 - ### Example The demo-so.c is an vulnerable library, it has a function for test @@ -160,6 +159,7 @@ QBDI_NOINLINE int fuzz_func() { ``` Just compile it + ``` ./build.sh x86_64 ``` @@ -176,6 +176,7 @@ adb push ../../android-standalone-toolchain-x86_64/sysroot/usr/lib/x86_64-linux- ``` In android adb shell, run the loader to test if it runs + ``` cd /data/local/tmp export LD_LIBRARY_PATH=/data/local/tmp @@ -202,5 +203,4 @@ Now run `afl-fuzz` to fuzz the demo library ./afl-fuzz -i in -o out -- ./loader /data/local/tmp/libdemo.so @@ ``` -![screen1](assets/screen1.png) - +![screen1](assets/screen1.png) \ No newline at end of file diff --git a/utils/qemu_persistent_hook/README.md b/utils/qemu_persistent_hook/README.md index 3f908c22..3bbaef6b 100644 --- a/utils/qemu_persistent_hook/README.md +++ b/utils/qemu_persistent_hook/README.md @@ -16,4 +16,4 @@ mkdir in echo 0000 > in/in ../../afl-fuzz -Q -i in -o out -- ./test -``` +``` \ No newline at end of file -- cgit 1.4.1 From bb506de0b809f97a4221ee1b6e040dcb5f9ca56a Mon Sep 17 00:00:00 2001 From: llzmb <46303940+llzmb@users.noreply.github.com> Date: Sun, 5 Dec 2021 19:04:45 +0100 Subject: Fix various missed issues - 1st run --- custom_mutators/gramatron/README.md | 43 ++++----- dictionaries/README.md | 20 ++-- docs/afl-fuzz_approach.md | 11 ++- docs/custom_mutators.md | 2 +- docs/env_variables.md | 2 +- docs/features.md | 4 +- utils/autodict_ql/readme.md | 180 ++++++++++++++++++++++-------------- utils/libdislocator/README.md | 29 +++--- 8 files changed, 168 insertions(+), 123 deletions(-) (limited to 'custom_mutators') diff --git a/custom_mutators/gramatron/README.md b/custom_mutators/gramatron/README.md index 5e10f97b..8aa0cc44 100644 --- a/custom_mutators/gramatron/README.md +++ b/custom_mutators/gramatron/README.md @@ -1,17 +1,17 @@ # GramaTron GramaTron is a coverage-guided fuzzer that uses grammar automatons to perform -grammar-aware fuzzing. Technical details about our framework are available -in the [ISSTA'21 paper](https://nebelwelt.net/files/21ISSTA.pdf). -The artifact to reproduce the experiments presented in the paper are present -in `artifact/`. Instructions to run a sample campaign and incorporate new -grammars is presented below: +grammar-aware fuzzing. Technical details about our framework are available in +the [ISSTA'21 paper](https://nebelwelt.net/files/21ISSTA.pdf). The artifact to +reproduce the experiments presented in the paper are present in `artifact/`. +Instructions to run a sample campaign and incorporate new grammars is presented +below: -# Compiling +## Compiling Execute `./build_gramatron_mutator.sh`. -# Running +## Running You have to set the grammar file to use with `GRAMATRON_AUTOMATION`: @@ -23,26 +23,27 @@ export GRAMATRON_AUTOMATION=grammars/ruby/source_automata.json afl-fuzz -i in -o out -- ./target ``` -# Adding and testing a new grammar +## Adding and testing a new grammar -- Specify in a JSON format for CFG. Examples are correspond `source.json` files +- Specify in a JSON format for CFG. Examples are correspond `source.json` files. - Run the automaton generation script (in `src/gramfuzz-mutator/preprocess`) which will place the generated automaton in the same folder. -``` -./preprocess/prep_automaton.sh [stack_limit] + ``` + ./preprocess/prep_automaton.sh [stack_limit] -E.g., ./preprocess/prep_automaton.sh ~/grammars/ruby/source.json PROGRAM -``` + E.g., ./preprocess/prep_automaton.sh ~/grammars/ruby/source.json PROGRAM + ``` -- If the grammar has no self-embedding rules then you do not need to pass the - stack limit parameter. However, if it does have self-embedding rules then you +- If the grammar has no self-embedding rules, then you do not need to pass the + stack limit parameter. However, if it does have self-embedding rules, then you need to pass the stack limit parameter. We recommend starting with `5` and - then increasing it if you need more complexity -- To sanity-check that the automaton is generating inputs as expected you can use the `test` binary housed in `src/gramfuzz-mutator` + then increasing it if you need more complexity. +- To sanity-check that the automaton is generating inputs as expected, you can + use the `test` binary housed in `src/gramfuzz-mutator`. -``` -./test SanityCheck + ``` + ./test SanityCheck -E.g., ./test SanityCheck ~/grammars/ruby/source_automata.json -``` \ No newline at end of file + E.g., ./test SanityCheck ~/grammars/ruby/source_automata.json + ``` \ No newline at end of file diff --git a/dictionaries/README.md b/dictionaries/README.md index f3b8a9e5..0b3b4d90 100644 --- a/dictionaries/README.md +++ b/dictionaries/README.md @@ -2,17 +2,17 @@ (See [../README.md](../README.md) for the general instruction manual.) -This subdirectory contains a set of dictionaries that can be used in -conjunction with the -x option to allow the fuzzer to effortlessly explore the -grammar of some of the more verbose data formats or languages. +This subdirectory contains a set of dictionaries that can be used in conjunction +with the -x option to allow the fuzzer to effortlessly explore the grammar of +some of the more verbose data formats or languages. -These sets were done by Michal Zalewski, various contributors, and imported -from oss-fuzz, go-fuzz and libfuzzer. +These sets were done by Michal Zalewski, various contributors, and imported from +oss-fuzz, go-fuzz and libfuzzer. Custom dictionaries can be added at will. They should consist of a reasonably-sized set of rudimentary syntax units that the fuzzer will then try -to clobber together in various ways. Snippets between 2 and 16 bytes are -usually the sweet spot. +to clobber together in various ways. Snippets between 2 and 16 bytes are usually +the sweet spot. Custom dictionaries can be created in two ways: @@ -34,9 +34,9 @@ In the file mode, every name field can be optionally followed by @, e.g.: `keyword_foo@1 = "foo"` Such entries will be loaded only if the requested dictionary level is equal or -higher than this number. The default level is zero; a higher value can be set -by appending @ to the dictionary file name, like so: +higher than this number. The default level is zero; a higher value can be set by +appending @ to the dictionary file name, like so: `-x path/to/dictionary.dct@2` -Good examples of dictionaries can be found in xml.dict and png.dict. +Good examples of dictionaries can be found in xml.dict and png.dict. \ No newline at end of file diff --git a/docs/afl-fuzz_approach.md b/docs/afl-fuzz_approach.md index 01888935..2da61cc4 100644 --- a/docs/afl-fuzz_approach.md +++ b/docs/afl-fuzz_approach.md @@ -468,7 +468,8 @@ cd ../../ sudo make install ``` -To learn more about remote monitoring and metrics visualization with StatsD, see [rpc_statsd.md](rpc_statsd.md). +To learn more about remote monitoring and metrics visualization with StatsD, see +[rpc_statsd.md](rpc_statsd.md). ### Addendum: status and plot files @@ -524,9 +525,9 @@ into each of them or deploy scripts to read the fuzzer statistics. Using `AFL_STATSD` (and the other related environment variables `AFL_STATSD_HOST`, `AFL_STATSD_PORT`, `AFL_STATSD_TAGS_FLAVOR`) you can automatically send metrics to your favorite StatsD server. Depending on your StatsD server, you will be -able to monitor, trigger alerts, or perform actions based on these metrics (e.g: -alert on slow exec/s for a new build, threshold of crashes, time since last -crash > X, etc.). +able to monitor, trigger alerts, or perform actions based on these metrics +(e.g.: alert on slow exec/s for a new build, threshold of crashes, time since +last crash > X, etc.). The selected metrics are a subset of all the metrics found in the status and in the plot file. The list is the following: `cycle_done`, `cycles_wo_finds`, @@ -537,6 +538,6 @@ the plot file. The list is the following: `cycle_done`, `cycles_wo_finds`, definitions can be found in the addendum above. When using multiple fuzzer instances with StatsD, it is *strongly* recommended -to setup the flavor (AFL_STATSD_TAGS_FLAVOR) to match your StatsD server. This +to setup the flavor (`AFL_STATSD_TAGS_FLAVOR`) to match your StatsD server. This will allow you to see individual fuzzer performance, detect bad ones, see the progress of each strategy... \ No newline at end of file diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md index 2f632e1f..7b4e0516 100644 --- a/docs/custom_mutators.md +++ b/docs/custom_mutators.md @@ -276,7 +276,7 @@ gcc -shared -Wall -O3 example.c -o example.so ``` Note that if you specify multiple custom mutators, the corresponding functions -will be called in the order in which they are specified. e.g. first +will be called in the order in which they are specified. E.g., the first `post_process` function of `example_first.so` will be called and then that of `example_second.so`. diff --git a/docs/env_variables.md b/docs/env_variables.md index 0952b960..c45f4ab9 100644 --- a/docs/env_variables.md +++ b/docs/env_variables.md @@ -585,7 +585,7 @@ The FRIDA wrapper used to instrument binary-only code supports many of the same options as `afl-qemu-trace`, but also has a number of additional advanced options. These are listed in brief below (see [frida_mode/README.md](../frida_mode/README.md) for more details). These -settings are provided for compatibiltiy with QEMU mode, the preferred way to +settings are provided for compatibility with QEMU mode, the preferred way to configure FRIDA mode is through its [scripting](../frida_mode/Scripting.md) support. diff --git a/docs/features.md b/docs/features.md index 06b1bcbe..431d9eb1 100644 --- a/docs/features.md +++ b/docs/features.md @@ -1,7 +1,7 @@ # Important features of AFL++ AFL++ supports llvm from 3.8 up to version 12, very fast binary fuzzing with -QEMU 5.1 with laf-intel and redqueen, frida mode, unicorn mode, gcc plugin, full +QEMU 5.1 with laf-intel and redqueen, FRIDA mode, unicorn mode, gcc plugin, full *BSD, Mac OS, Solaris and Android support and much, much, much more. | Feature/Instrumentation | afl-gcc | llvm | gcc_plugin | FRIDA mode(9) | QEMU mode(10) |unicorn_mode(10) |coresight_mode(11)| @@ -30,7 +30,7 @@ QEMU 5.1 with laf-intel and redqueen, frida mode, unicorn mode, gcc plugin, full versions that write to a file to use with afl-fuzz' `-x` 8. the snapshot LKM is currently unmaintained due to too many kernel changes coming too fast :-( -9. frida mode is supported on Linux and MacOS for Intel and ARM +9. FRIDA mode is supported on Linux and MacOS for Intel and ARM 10. QEMU/Unicorn is only supported on Linux 11. Coresight mode is only available on AARCH64 Linux with a CPU with Coresight extension diff --git a/utils/autodict_ql/readme.md b/utils/autodict_ql/readme.md index 789cd152..f61026b7 100644 --- a/utils/autodict_ql/readme.md +++ b/utils/autodict_ql/readme.md @@ -2,21 +2,35 @@ ## What is this? -`Autodict-QL` is a plugin system that enables fast generation of Tokens/Dictionaries in a handy way that can be manipulated by the user (unlike The LLVM Passes that are hard to modify). This means that autodict-ql is a scriptable feature which basically uses CodeQL (a powerful semantic code analysis engine) to fetch information from a code base. +`Autodict-QL` is a plugin system that enables fast generation of +Tokens/Dictionaries in a handy way that can be manipulated by the user (unlike +The LLVM Passes that are hard to modify). This means that autodict-ql is a +scriptable feature which basically uses CodeQL (a powerful semantic code +analysis engine) to fetch information from a code base. -Tokens are useful when you perform fuzzing on different parsers. The AFL++ `-x` switch enables the usage of dictionaries through your fuzzing campaign. If you are not familiar with Dictionaries in fuzzing, take a look [here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries) . +Tokens are useful when you perform fuzzing on different parsers. The AFL++ `-x` +switch enables the usage of dictionaries through your fuzzing campaign. If you +are not familiar with Dictionaries in fuzzing, take a look +[here](https://github.com/AFLplusplus/AFLplusplus/tree/stable/dictionaries). -## Why CodeQL ? +## Why CodeQL? -We basically developed this plugin on top of the CodeQL engine because it gives the user scripting features, it's easier and it's independent of the LLVM system. This means that a user can write his CodeQL scripts or modify the current scripts to improve or change the token generation algorithms based on different program analysis concepts. +We basically developed this plugin on top of the CodeQL engine because it gives +the user scripting features, it's easier and it's independent of the LLVM +system. This means that a user can write his CodeQL scripts or modify the +current scripts to improve or change the token generation algorithms based on +different program analysis concepts. ## CodeQL scripts -Currently, we pushed some scripts as defaults for Token generation. In addition, we provide every CodeQL script as an standalone script because it's easier to modify or test. +Currently, we pushed some scripts as defaults for Token generation. In addition, +we provide every CodeQL script as an standalone script because it's easier to +modify or test. -Currently we provided the following CodeQL scripts : +Currently we provided the following CodeQL scripts: -`strcmp-str.ql` is used to extract strings that are related to the `strcmp` function. +`strcmp-str.ql` is used to extract strings that are related to the `strcmp` +function. `strncmp-str.ql` is used to extract the strings from the `strncmp` function. @@ -24,13 +38,18 @@ Currently we provided the following CodeQL scripts : `litool.ql` extracts Magic numbers as Hexadecimal format. -`strtool.ql` extracts strings with uses of a regex and dataflow concept to capture the string comparison functions. If `strcmp` is rewritten in a project as Mystrcmp or something like strmycmp, then this script can catch the arguments and these are valuable tokens. +`strtool.ql` extracts strings with uses of a regex and dataflow concept to +capture the string comparison functions. If `strcmp` is rewritten in a project +as Mystrcmp or something like strmycmp, then this script can catch the arguments +and these are valuable tokens. -You can write other CodeQL scripts to extract possible effective tokens if you think they can be useful. +You can write other CodeQL scripts to extract possible effective tokens if you +think they can be useful. ## Usage -Before you proceed to installation make sure that you have the following packages by installing them: +Before you proceed to installation make sure that you have the following +packages by installing them: ```shell sudo apt install build-essential libtool-bin python3-dev python3 automake git vim wget -y @@ -38,66 +57,91 @@ sudo apt install build-essential libtool-bin python3-dev python3 automake git vi The usage of Autodict-QL is pretty easy. But let's describe it as: -1. First of all, you need to have CodeQL installed on the system. We make this possible with `build-codeql.sh` bash script. This script will install CodeQL completety and will set the required environment variables for your system. -Do the following: - -```shell -# chmod +x codeql-build.sh -# ./codeql-build.sh -# source ~/.bashrc -# codeql -``` - -Then you should get: - -```shell -Usage: codeql ... -Create and query CodeQL databases, or work with the QL language. - -GitHub makes this program freely available for the analysis of open-source software and certain other uses, but it is -not itself free software. Type codeql --license to see the license terms. - - --license Show the license terms for the CodeQL toolchain. -Common options: - -h, --help Show this help text. - -v, --verbose Incrementally increase the number of progress messages printed. - -q, --quiet Incrementally decrease the number of progress messages printed. -Some advanced options have been hidden; try --help -v for a fuller view. -Commands: - query Compile and execute QL code. - bqrs Get information from .bqrs files. - database Create, analyze and process CodeQL databases. - dataset [Plumbing] Work with raw QL datasets. - test Execute QL unit tests. - resolve [Deep plumbing] Helper commands to resolve disk locations etc. - execute [Deep plumbing] Low-level commands that need special JVM options. - version Show the version of the CodeQL toolchain. - generate Generate formatted QL documentation. - github Commands useful for interacting with the GitHub API through CodeQL. -``` - -2. Compile your project with CodeQL: For using the Autodict-QL plugin, you need to compile the source of the target you want to fuzz with CodeQL. This is not something hard. - - First you need to create a CodeQL database of the project codebase, suppose we want to compile `libxml` with codeql. Go to libxml and issue the following commands: - - `./configure --disable-shared` - - `codeql create database libxml-db --language=cpp --command=make` - - Now you have the CodeQL database of the project :-) -3. The final step is to update the CodeQL database you created in step 2 (Suppose we are in `aflplusplus/utils/autodict_ql/` directory): - - `codeql database upgrade /home/user/libxml/libxml-db` +1. First of all, you need to have CodeQL installed on the system. We make this + possible with `build-codeql.sh` bash script. This script will install CodeQL + completety and will set the required environment variables for your system. + Do the following: + + ```shell + # chmod +x codeql-build.sh + # ./codeql-build.sh + # source ~/.bashrc + # codeql + ``` + + Then you should get: + + ```shell + Usage: codeql ... + Create and query CodeQL databases, or work with the QL language. + + GitHub makes this program freely available for the analysis of open-source software and certain other uses, but it is + not itself free software. Type codeql --license to see the license terms. + + --license Show the license terms for the CodeQL toolchain. + Common options: + -h, --help Show this help text. + -v, --verbose Incrementally increase the number of progress messages printed. + -q, --quiet Incrementally decrease the number of progress messages printed. + Some advanced options have been hidden; try --help -v for a fuller view. + Commands: + query Compile and execute QL code. + bqrs Get information from .bqrs files. + database Create, analyze and process CodeQL databases. + dataset [Plumbing] Work with raw QL datasets. + test Execute QL unit tests. + resolve [Deep plumbing] Helper commands to resolve disk locations etc. + execute [Deep plumbing] Low-level commands that need special JVM options. + version Show the version of the CodeQL toolchain. + generate Generate formatted QL documentation. + github Commands useful for interacting with the GitHub API through CodeQL. + ``` + +2. Compile your project with CodeQL: For using the Autodict-QL plugin, you need + to compile the source of the target you want to fuzz with CodeQL. This is not + something hard. + - First you need to create a CodeQL database of the project codebase, suppose + we want to compile `libxml` with codeql. Go to libxml and issue the + following commands: + - `./configure --disable-shared` + - `codeql create database libxml-db --language=cpp --command=make` + - Now you have the CodeQL database of the project :-) +3. The final step is to update the CodeQL database you created in step 2 + (Suppose we are in `aflplusplus/utils/autodict_ql/` directory): + - `codeql database upgrade /home/user/libxml/libxml-db` 4. Everything is set! Now you should issue the following to get the tokens: - - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` - - example : `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py $PWD /home/user/libxml/libxml-db tokens` - - This will create the final `tokens` dir for you and you are done, then pass the tokens path to AFL++'s `-x` flag. + - `python3 autodict-ql.py [CURRECT_DIR] [CODEQL_DATABASE_PATH] [TOKEN_PATH]` + - example: `python3 /home/user/AFLplusplus/utils/autodict_ql/autodict-ql.py + $PWD /home/user/libxml/libxml-db tokens` + - This will create the final `tokens` dir for you and you are done, then + pass the tokens path to AFL++'s `-x` flag. 5. Done! ## More on dictionaries and tokens -Core developer of the AFL++ project Marc Heuse also developed a similar tool named `dict2file` which is a LLVM pass which can automatically extract useful tokens, in addition with LTO instrumentation mode, this dict2file is automatically generates token extraction. `Autodict-QL` plugin gives you scripting capability and you can do whatever you want to extract from the Codebase and it's up to you. In addition it's independent from LLVM system. -On the other hand, you can also use Google dictionaries which have been made public in May 2020, but the problem of using Google dictionaries is that they are limited to specific file formats and specifications. For example, for testing binutils and ELF file format or AVI in FFMPEG, there are no pre-built dictionaries, so it is highly recommended to use `Autodict-QL` or `Dict2File` features to automatically generate dictionaries based on the target. - -I've personally prefered to use `Autodict-QL` or `dict2file` rather than Google dictionaries or any other manually generated dictionaries as `Autodict-QL` and `dict2file` are working based on the target. -In overall, fuzzing with dictionaries and well-generated tokens will give better results. - -There are 2 important points to remember : - -- If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code coverage and hence better chances to discover new bugs. -- Do not forget to set `AFL_MAX_DET_EXTRAS` at least to the number of generated dictionaries. If you forget to set this environment variable, then AFL++ uses just 200 tokens and use the rest of them only probabilistically. So this will guarantee that your tokens will be used by AFL++. \ No newline at end of file +Core developer of the AFL++ project Marc Heuse also developed a similar tool +named `dict2file` which is a LLVM pass which can automatically extract useful +tokens, in addition with LTO instrumentation mode, this dict2file is +automatically generates token extraction. `Autodict-QL` plugin gives you +scripting capability and you can do whatever you want to extract from the +Codebase and it's up to you. In addition it's independent from LLVM system. On +the other hand, you can also use Google dictionaries which have been made public +in May 2020, but the problem of using Google dictionaries is that they are +limited to specific file formats and specifications. For example, for testing +binutils and ELF file format or AVI in FFMPEG, there are no pre-built +dictionaries, so it is highly recommended to use `Autodict-QL` or `Dict2File` +features to automatically generate dictionaries based on the target. + +I've personally preferred to use `Autodict-QL` or `dict2file` rather than Google +dictionaries or any other manually generated dictionaries as `Autodict-QL` and +`dict2file` are working based on the target. In overall, fuzzing with +dictionaries and well-generated tokens will give better results. + +There are 2 important points to remember: + +- If you combine `Autodict-QL` with AFL++ cmplog, you will get much better code + coverage and hence better chances to discover new bugs. +- Do not forget to set `AFL_MAX_DET_EXTRAS` at least to the number of generated + dictionaries. If you forget to set this environment variable, then AFL++ uses + just 200 tokens and use the rest of them only probabilistically. So this will + guarantee that your tokens will be used by AFL++. \ No newline at end of file diff --git a/utils/libdislocator/README.md b/utils/libdislocator/README.md index 64a5f14c..7150c205 100644 --- a/utils/libdislocator/README.md +++ b/utils/libdislocator/README.md @@ -10,8 +10,8 @@ heap-related security bugs in several ways: subsequent PROT_NONE page, causing most off-by-one reads and writes to immediately segfault, - - It adds a canary immediately below the allocated buffer, to catch writes - to negative offsets (won't catch reads, though), + - It adds a canary immediately below the allocated buffer, to catch writes to + negative offsets (won't catch reads, though), - It sets the memory returned by malloc() to garbage values, improving the odds of crashing when the target accesses uninitialized data, @@ -19,35 +19,34 @@ heap-related security bugs in several ways: - It sets freed memory to PROT_NONE and does not actually reuse it, causing most use-after-free bugs to segfault right away, - - It forces all realloc() calls to return a new address - and sets - PROT_NONE on the original block. This catches use-after-realloc bugs, + - It forces all realloc() calls to return a new address - and sets PROT_NONE + on the original block. This catches use-after-realloc bugs, - - It checks for calloc() overflows and can cause soft or hard failures - of alloc requests past a configurable memory limit (AFL_LD_LIMIT_MB, + - It checks for calloc() overflows and can cause soft or hard failures of + alloc requests past a configurable memory limit (AFL_LD_LIMIT_MB, AFL_LD_HARD_FAIL). - Optionally, in platforms supporting it, huge pages can be used by passing USEHUGEPAGE=1 to make. - - Size alignment to `max_align_t` can be enforced with AFL_ALIGNED_ALLOC=1. - In this case, a tail canary is inserted in the padding bytes at the end - of the allocated zone. This reduce the ability of libdislocator to detect + - Size alignment to `max_align_t` can be enforced with AFL_ALIGNED_ALLOC=1. In + this case, a tail canary is inserted in the padding bytes at the end of the + allocated zone. This reduce the ability of libdislocator to detect off-by-one bugs but also it make slibdislocator compliant to the C standard. Basically, it is inspired by some of the non-default options available for the OpenBSD allocator - see malloc.conf(5) on that platform for reference. It is -also somewhat similar to several other debugging libraries, such as gmalloc -and DUMA - but is simple, plug-and-play, and designed specifically for fuzzing -jobs. +also somewhat similar to several other debugging libraries, such as gmalloc and +DUMA - but is simple, plug-and-play, and designed specifically for fuzzing jobs. Note that it does nothing for stack-based memory handling errors. The -fstack-protector-all setting for GCC / clang, enabled when using AFL_HARDEN, can catch some subset of that. The allocator is slow and memory-intensive (even the tiniest allocation uses up -4 kB of physical memory and 8 kB of virtual mem), making it completely unsuitable -for "production" uses; but it can be faster and more hassle-free than ASAN / MSAN -when fuzzing small, self-contained binaries. +4 kB of physical memory and 8 kB of virtual mem), making it completely +unsuitable for "production" uses; but it can be faster and more hassle-free than +ASAN / MSAN when fuzzing small, self-contained binaries. To use this library, run AFL++ like so: -- cgit 1.4.1