diff options
109 files changed, 2821 insertions, 1570 deletions
diff --git a/GNUmakefile b/GNUmakefile index bd206af0..7a1ba88a 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -306,6 +306,7 @@ endif .PHONY: all all: test_x86 test_shm test_python ready $(PROGS) afl-as llvm gcc_plugin test_build all_done + -$(MAKE) -C utils/aflpp_driver .PHONY: llvm llvm: @@ -574,7 +575,7 @@ clean: $(MAKE) -C qemu_mode/libqasan clean -$(MAKE) -C frida_mode clean ifeq "$(IN_REPO)" "1" - test -e qemu_mode/qemuafl/Makefile && $(MAKE) -C qemu_mode/qemuafl clean || true + -test -e qemu_mode/qemuafl/Makefile && $(MAKE) -C qemu_mode/qemuafl clean || true test -e unicorn_mode/unicornafl/Makefile && $(MAKE) -C unicorn_mode/unicornafl clean || true else rm -rf qemu_mode/qemuafl @@ -597,7 +598,6 @@ distrib: all -$(MAKE) -f GNUmakefile.gcc_plugin $(MAKE) -C utils/libdislocator $(MAKE) -C utils/libtokencap - -$(MAKE) -C utils/aflpp_driver $(MAKE) -C utils/afl_network_proxy $(MAKE) -C utils/socket_fuzzing $(MAKE) -C utils/argv_fuzzing @@ -622,7 +622,6 @@ source-only: all -$(MAKE) -f GNUmakefile.gcc_plugin $(MAKE) -C utils/libdislocator $(MAKE) -C utils/libtokencap - -$(MAKE) -C utils/aflpp_driver %.8: % @echo .TH $* 8 $(BUILD_DATE) "afl++" > $@ diff --git a/GNUmakefile.llvm b/GNUmakefile.llvm index 95140cb0..83eb91a9 100644 --- a/GNUmakefile.llvm +++ b/GNUmakefile.llvm @@ -306,7 +306,7 @@ ifeq "$(TEST_MMAP)" "1" endif PROGS_ALWAYS = ./afl-cc ./afl-compiler-rt.o ./afl-compiler-rt-32.o ./afl-compiler-rt-64.o -PROGS = $(PROGS_ALWAYS) ./afl-llvm-pass.so ./SanitizerCoveragePCGUARD.so ./split-compares-pass.so ./split-switches-pass.so ./cmplog-routines-pass.so ./cmplog-instructions-pass.so ./afl-llvm-dict2file.so ./compare-transform-pass.so ./afl-ld-lto ./afl-llvm-lto-instrumentlist.so ./afl-llvm-lto-instrumentation.so ./SanitizerCoverageLTO.so +PROGS = $(PROGS_ALWAYS) ./afl-llvm-pass.so ./SanitizerCoveragePCGUARD.so ./split-compares-pass.so ./split-switches-pass.so ./cmplog-routines-pass.so ./cmplog-instructions-pass.so ./cmplog-switches-pass.so ./afl-llvm-dict2file.so ./compare-transform-pass.so ./afl-ld-lto ./afl-llvm-lto-instrumentlist.so ./afl-llvm-lto-instrumentation.so ./SanitizerCoverageLTO.so # If prerequisites are not given, warn, do not build anything, and exit with code 0 ifeq "$(LLVMVER)" "" @@ -433,6 +433,9 @@ endif ./cmplog-instructions-pass.so: instrumentation/cmplog-instructions-pass.cc instrumentation/afl-llvm-common.o | test_deps $(CXX) $(CLANG_CPPFL) -shared $< -o $@ $(CLANG_LFL) instrumentation/afl-llvm-common.o +./cmplog-switches-pass.so: instrumentation/cmplog-switches-pass.cc instrumentation/afl-llvm-common.o | test_deps + $(CXX) $(CLANG_CPPFL) -shared $< -o $@ $(CLANG_LFL) instrumentation/afl-llvm-common.o + afl-llvm-dict2file.so: instrumentation/afl-llvm-dict2file.so.cc instrumentation/afl-llvm-common.o | test_deps $(CXX) $(CLANG_CPPFL) -shared $< -o $@ $(CLANG_LFL) instrumentation/afl-llvm-common.o diff --git a/README.md b/README.md index bc5b333c..94a38ab1 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ <img align="right" src="https://raw.githubusercontent.com/andreafioraldi/AFLplusplus-website/master/static/logo_256x256.png" alt="AFL++ Logo"> - Release Version: [3.13c](https://github.com/AFLplusplus/AFLplusplus/releases) + Release Version: [3.14c](https://github.com/AFLplusplus/AFLplusplus/releases) - Github Version: 3.14a + Github Version: 3.15a Repository: [https://github.com/AFLplusplus/AFLplusplus](https://github.com/AFLplusplus/AFLplusplus) @@ -31,6 +31,11 @@ With afl++ 3.13-3.20 we introduce frida_mode (-O) to have an alternative for binary-only fuzzing. It is slower than Qemu mode but works on MacOS, Android, iOS etc. +With afl++ 3.15 we introduced the following changes from previous behaviours: + * Also -M main mode does not due deterministic fuzzing by default anymore + * afl-cmin and afl-showmap -Ci now descent into subdirectories like + afl-fuzz -i does (but note that afl-cmin.bash does not) + With afl++ 3.14 we introduced the following changes from previous behaviours: * afl-fuzz: deterministic fuzzing it not a default for -M main anymore * afl-cmin/afl-showmap -i now descends into subdirectories (afl-cmin.bash @@ -54,8 +59,8 @@ behaviours and defaults: shared libraries, etc. Additionally QEMU 5.1 supports more CPU targets so this is really worth it. * When instrumenting targets, afl-cc will not supersede optimizations anymore - if any were given. This allows to fuzz targets as same as they are built - for debug or release. + if any were given. This allows to fuzz targets build regularly like those + for debug or release versions. * afl-fuzz: * if neither -M or -S is specified, `-S default` is assumed, so more fuzzers can easily be added later @@ -387,13 +392,62 @@ afl++ performs "never zero" counting in its bitmap. You can read more about this here: * [instrumentation/README.neverzero.md](instrumentation/README.neverzero.md) -#### c) Modify the target +#### c) Sanitizers + +It is possible to use sanitizers when instrumenting targets for fuzzing, +which allows you to find bugs that would not necessarily result in a crash. + +Note that sanitizers have a huge impact on CPU (= less executions per second) +and RAM usage. Also you should only run one afl-fuzz instance per sanitizer type. +This is enough because a use-after-free bug will be picked up, e.g. by +ASAN (address sanitizer) anyway when syncing to other fuzzing instances, +so not all fuzzing instances need to be instrumented with ASAN. + +The following sanitizers have built-in support in afl++: + * ASAN = Address SANitizer, finds memory corruption vulnerabilities like + use-after-free, NULL pointer dereference, buffer overruns, etc. + Enabled with `export AFL_USE_ASAN=1` before compiling. + * MSAN = Memory SANitizer, finds read access to uninitialized memory, eg. + a local variable that is defined and read before it is even set. + Enabled with `export AFL_USE_MSAN=1` before compiling. + * UBSAN = Undefined Behaviour SANitizer, finds instances where - by the + C and C++ standards - undefined behaviour happens, e.g. adding two + signed integers together where the result is larger than a signed integer + can hold. + Enabled with `export AFL_USE_UBSAN=1` before compiling. + * CFISAN = Control Flow Integrity SANitizer, finds instances where the + control flow is found to be illegal. Originally this was rather to + prevent return oriented programming exploit chains from functioning, + in fuzzing this is mostly reduced to detecting type confusion + vulnerabilities - which is however one of the most important and dangerous + C++ memory corruption classes! + Enabled with `export AFL_USE_CFISAN=1` before compiling. + * LSAN = Leak SANitizer, finds memory leaks in a program. This is not really + a security issue, but for developers this can be very valuable. + Note that unlike the other sanitizers above this needs + `__AFL_LEAK_CHECK();` added to all areas of the target source code where you + find a leak check necessary! + Enabled with `export AFL_USE_LSAN=1` before compiling. + +It is possible to further modify the behaviour of the sanitizers at run-time +by setting `ASAN_OPTIONS=...`, `LSAN_OPTIONS` etc. - the available parameters +can be looked up in the sanitizer documentation of llvm/clang. +afl-fuzz however requires some specific parameters important for fuzzing to be +set. If you want to set your own, it might bail and report what it is missing. + +Note that some sanitizers cannot be used together, e.g. ASAN and MSAN, and +others often cannot work together because of target weirdness, e.g. ASAN and +CFISAN. You might need to experiment which sanitizers you can combine in a +target (which means more instances can be run without a sanitized target, +which is more effective). + +#### d) Modify the target If the target has features that make fuzzing more difficult, e.g. -checksums, HMAC, etc. then modify the source code so that this is -removed. -This can even be done for operational source code by eliminating -these checks within this specific defines: +checksums, HMAC, etc. then modify the source code so that checks for these +values are removed. +This can even be done safely for source code used in operational products +by eliminating these checks within these AFL specific blocks: ``` #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION @@ -405,7 +459,7 @@ these checks within this specific defines: All afl++ compilers will set this preprocessor definition automatically. -#### d) Instrument the target +#### e) Instrument the target In this step the target source code is compiled so that it can be fuzzed. @@ -420,8 +474,8 @@ Then build the target. (Usually with `make`) 1. sometimes configure and build systems are fickle and do not like stderr output (and think this means a test failure) - which is something - afl++ likes to do to show statistics. It is recommended to disable them via - `export AFL_QUIET=1`. + afl++ likes to do to show statistics. It is recommended to disable afl++ + instrumentation reporting via `export AFL_QUIET=1`. 2. sometimes configure and build systems error on warnings - these should be disabled (e.g. `--disable-werror` for some configure scripts). @@ -462,20 +516,38 @@ non-standard way to set this, otherwise set up the build normally and edit the generated build environment afterwards manually to point it to the right compiler (and/or ranlib and ar). -#### d) Better instrumentation +#### f) Better instrumentation If you just fuzz a target program as-is you are wasting a great opportunity for much more fuzzing speed. -This requires the usage of afl-clang-lto or afl-clang-fast. +This variant requires the usage of afl-clang-lto, afl-clang-fast or afl-gcc-fast. -This is the so-called `persistent mode`, which is much, much faster but +It is the so-called `persistent mode`, which is much, much faster but requires that you code a source file that is specifically calling the target functions that you want to fuzz, plus a few specific afl++ functions around it. See [instrumentation/README.persistent_mode.md](instrumentation/README.persistent_mode.md) for details. Basically if you do not fuzz a target in persistent mode then you are just -doing it for a hobby and not professionally :-) +doing it for a hobby and not professionally :-). + +#### g) libfuzzer fuzzer harnesses with LLVMFuzzerTestOneInput() + +libfuzzer `LLVMFuzzerTestOneInput()` harnesses are the defacto standard +for fuzzing, and they can be used with afl++ (and honggfuzz) as well! +Compiling them is as simple as: +``` +afl-clang-fast++ -fsanitize=fuzzer -o harness harness.cpp targetlib.a +``` +You can even use advanced libfuzzer features like `FuzzedDataProvider`, +`LLVMFuzzerMutate()` etc. and they will work! + +The generated binary is fuzzed with afl-fuzz like any other fuzz target. + +Bonus: the target is already optimized for fuzzing due to persistent mode and +shared-memory testcases and hence gives you the fastest speed possible. + +For more information see [utils/aflpp_driver/README.md](utils/aflpp_driver/README.md) ### 2. Preparing the fuzzing campaign @@ -507,6 +579,8 @@ Note that the INPUTFILE argument that the target program would read from has to If the target reads from stdin instead, just omit the `@@` as this is the default. +This step is highly recommended! + #### c) Minimizing all corpus files The shorter the input files that still traverse the same path @@ -522,7 +596,8 @@ for i in *; do done ``` -This step can also be parallelized, e.g. with `parallel` +This step can also be parallelized, e.g. with `parallel`. +Note that this step is rather optional though. #### Done! @@ -558,6 +633,16 @@ step [2a. Collect inputs](#a-collect-inputs): `afl-fuzz -i input -o output -- bin/target -d @@` Note that the directory specified with -o will be created if it does not exist. +It can be valuable to run afl-fuzz in a screen or tmux shell so you can log off, +or afl-fuzz is not aborted if you are running it in a remote ssh session where +the connection fails in between. +Only do that though once you have verified that your fuzzing setup works! +Simply run it like `screen -dmS afl-main -- afl-fuzz -M main-$HOSTNAME -i ...` +and it will start away in a screen session. To enter this session simply type +`screen -r afl-main`. You see - it makes sense to name the screen session +same as the afl-fuzz -M/-S naming :-) +For more information on screen or tmux please check their documentation. + If you need to stop and re-start the fuzzing, use the same command line options (or even change them by selecting a different power schedule or another mutation mode!) and switch the input directory with a dash (`-`): @@ -611,7 +696,7 @@ of the testcases. Depending on the average testcase size (and those found during fuzzing) and their number, a value between 50-500MB is recommended. You can set the cache size (in MB) by setting the environment variable `AFL_TESTCACHE_SIZE`. -There should be one main fuzzer (`-M main` option) and as many secondary +There should be one main fuzzer (`-M main-$HOSTNAME` option) and as many secondary fuzzers (eg `-S variant1`) as you have cores that you use. Every -M/-S entry needs a unique name (that can be whatever), however the same -o output directory location has to be used for all instances. @@ -619,7 +704,7 @@ Every -M/-S entry needs a unique name (that can be whatever), however the same For every secondary fuzzer there should be a variation, e.g.: * one should fuzz the target that was compiled differently: with sanitizers activated (`export AFL_USE_ASAN=1 ; export AFL_USE_UBSAN=1 ; - export AFL_USE_CFISAN=1 ; export AFL_USE_LSAN=1`) + export AFL_USE_CFISAN=1`) * one or two should fuzz the target with CMPLOG/redqueen (see above), at least one cmplog instance should follow transformations (`-l AT`) * one to three fuzzers should fuzz a target compiled with laf-intel/COMPCOV @@ -645,8 +730,9 @@ You can also use different fuzzers. If you are using afl spinoffs or afl conforming fuzzers, then just use the same -o directory and give it a unique `-S` name. Examples are: - * [Eclipser](https://github.com/SoftSec-KAIST/Eclipser/) + * [Fuzzolic](https://github.com/season-lab/fuzzolic) * [symcc](https://github.com/eurecom-s/symcc/) + * [Eclipser](https://github.com/SoftSec-KAIST/Eclipser/) * [AFLsmart](https://github.com/aflsmart/aflsmart) * [FairFuzz](https://github.com/carolemieux/afl-rb) * [Neuzz](https://github.com/Dongdongshe/neuzz) @@ -660,7 +746,46 @@ directory of a different fuzzer is, e.g. `-F /src/target/honggfuzz`. Using honggfuzz (with `-n 1` or `-n 2`) and libfuzzer in parallel is highly recommended! -#### c) The status of the fuzz campaign +#### c) Using multiple machines for fuzzing + +Maybe you have more than one machine you want to fuzz the same target on. +Simply start the `afl-fuzz` (and perhaps libfuzzer, honggfuzz, ...) +orchestra as you like, just ensure that your have one and only one `-M` +instance per server, and that its name is unique, hence the recommendation +for `-M main-$HOSTNAME`. + +Now there are three strategies on how you can sync between the servers: + * never: sounds weird, but this makes every server an island and has the + chance the each follow different paths into the target. You can make + this even more interesting by even giving different seeds to each server. + * regularly (~4h): this ensures that all fuzzing campaigns on the servers + "see" the same thing. It is like fuzzing on a huge server. + * in intervals of 1/10th of the overall expected runtime of the fuzzing you + sync. This tries a bit to combine both. have some individuality of the + paths each campaign on a server explores, on the other hand if one + gets stuck where another found progress this is handed over making it + unstuck. + +The syncing process itself is very simple. +As the `-M main-$HOSTNAME` instance syncs to all `-S` secondaries as well +as to other fuzzers, you have to copy only this directory to the other +machines. + +Lets say all servers have the `-o out` directory in /target/foo/out, and +you created a file `servers.txt` which contains the hostnames of all +participating servers, plus you have an ssh key deployed to all of them, +then run: +```bash +for FROM in `cat servers.txt`; do + for TO in `cat servers.txt`; do + rsync -rlpogtz --rsh=ssh $FROM:/target/foo/out/main-$FROM $TO:target/foo/out/ + done +done +``` +You can run this manually, per cron job - as you need it. +There is a more complex and configurable script in `utils/distributed_fuzzing`. + +#### d) The status of the fuzz campaign afl++ comes with the `afl-whatsup` script to show the status of the fuzzing campaign. @@ -669,11 +794,14 @@ Just supply the directory that afl-fuzz is given with the -o option and you will see a detailed status of every fuzzer in that campaign plus a summary. -To have only the summary use the `-s` switch e.g.: `afl-whatsup -s output/` +To have only the summary use the `-s` switch e.g.: `afl-whatsup -s out/` + +If you have multiple servers then use the command after a sync, or you have +to execute this script per server. -#### d) Checking the coverage of the fuzzing +#### e) Checking the coverage of the fuzzing -The `paths found` value is a bad indicator how good the coverage is. +The `paths found` value is a bad indicator for checking how good the coverage is. A better indicator - if you use default llvm instrumentation with at least version 9 - is to use `afl-showmap` with the collect coverage option `-C` on @@ -701,12 +829,13 @@ then terminate it. The main node will pick it up and make it available to the other secondary nodes over time. Set `export AFL_NO_AFFINITY=1` or `export AFL_TRY_AFFINITY=1` if you have no free core. -Note that you in nearly all cases can never reach full coverage. A lot of -functionality is usually behind options that were not activated or fuzz e.g. -if you fuzz a library to convert image formats and your target is the png to -tiff API then you will not touch any of the other library APIs and features. +Note that in nearly all cases you can never reach full coverage. A lot of +functionality is usually dependent on exclusive options that would need individual +fuzzing campaigns each with one of these options set. E.g. if you fuzz a library to +convert image formats and your target is the png to tiff API then you will not +touch any of the other library APIs and features. -#### e) How long to fuzz a target? +#### f) How long to fuzz a target? This is a difficult question. Basically if no new path is found for a long time (e.g. for a day or a week) @@ -718,7 +847,7 @@ Keep the queue/ directory (for future fuzzings of the same or similar targets) and use them to seed other good fuzzers like libfuzzer with the -entropic switch or honggfuzz. -#### f) Improve the speed! +#### g) Improve the speed! * Use [persistent mode](instrumentation/README.persistent_mode.md) (x2-x20 speed increase) * If you do not use shmem persistent mode, use `AFL_TMPDIR` to point the input file on a tempfs location, see [docs/env_variables.md](docs/env_variables.md) diff --git a/afl-cmin b/afl-cmin index e71873d3..e6f8c175 100755 --- a/afl-cmin +++ b/afl-cmin @@ -122,6 +122,7 @@ function usage() { "AFL_FORKSRV_INIT_TMOUT: time the fuzzer waits for the forkserver to come up\n" \ "AFL_KEEP_TRACES: leave the temporary <out_dir>/.traces directory\n" \ "AFL_KILL_SIGNAL: Signal delivered to child processes on timeout (default: SIGKILL)\n" \ +"AFL_NO_FORKSRV: run target via execve instead of using the forkserver\n" \ "AFL_PATH: path for the afl-showmap binary if not found anywhere in PATH\n" \ "AFL_PRINT_FILENAMES: If set, the filename currently processed will be " \ "printed to stdout\n" \ diff --git a/afl-cmin.bash b/afl-cmin.bash index f4bd269d..c77dfbc1 100755 --- a/afl-cmin.bash +++ b/afl-cmin.bash @@ -135,6 +135,7 @@ For additional tips, please consult README.md. Environment variables used: AFL_KEEP_TRACES: leave the temporary <out_dir>\.traces directory +AFL_NO_FORKSRV: run target via execve instead of using the forkserver AFL_PATH: last resort location to find the afl-showmap binary AFL_SKIP_BIN_CHECK: skip check for target binary _EOF_ diff --git a/afl-plot b/afl-plot index 60a351ab..662c0907 100755 --- a/afl-plot +++ b/afl-plot @@ -127,7 +127,7 @@ set key outside set autoscale xfixmin set autoscale xfixmax -#set xlabel "all times in UTC" font "small" +set xlabel "relative time in seconds" font "small" plot '$inputdir/plot_data' using 1:4 with filledcurve x1 title 'total paths' linecolor rgb '#000000' fillstyle transparent solid 0.2 noborder, \\ '' using 1:3 with filledcurve x1 title 'current path' linecolor rgb '#f0f0f0' fillstyle transparent solid 0.5 noborder, \\ diff --git a/custom_mutators/honggfuzz/honggfuzz.h b/custom_mutators/honggfuzz/honggfuzz.h index c80cdd87..51c7b567 100644 --- a/custom_mutators/honggfuzz/honggfuzz.h +++ b/custom_mutators/honggfuzz/honggfuzz.h @@ -246,9 +246,9 @@ typedef struct { } timing; struct { struct { - uint8_t val[256]; + uint8_t val[512]; size_t len; - } dictionary[1024]; + } dictionary[8192]; size_t dictionaryCnt; const char* dictionaryFile; size_t mutationsMax; @@ -263,6 +263,7 @@ typedef struct { struct { bool useVerifier; bool exitUponCrash; + uint8_t exitCodeUponCrash; const char* reportFile; size_t dynFileIterExpire; bool only_printable; diff --git a/docs/Changelog.md b/docs/Changelog.md index 475240c2..fcfd2ce8 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -8,35 +8,42 @@ Want to stay in the loop on major new features? Join our mailing list by sending a mail to <afl-users+subscribe@googlegroups.com>. -### Version ++3.14a (release) +### Version ++3.14c (release) - afl-fuzz: - fix -F when a '/' was part of the parameter - fixed a crash for cmplog for very slow inputs + - fix for AFLfast schedule counting - removed implied -D determinstic from -M main - if the target becomes unavailable check out out/default/error.txt for an indicator why - AFL_CAL_FAST was a dead env, now does the same as AFL_FAST_CAL - reverse read the queue on resumes (more effective) + - fix custom mutator trimming - afl-cc: - Update to COMPCOV/laf-intel that speeds up the instrumentation process a lot - thanks to Michael Rodler/f0rki for the PR! + - Fix for failures for some sized string instrumentations - Fix to instrument global namespace functions in c++ - Fix for llvm 13 - support partial linking + - do honor AFL_LLVM_{ALLOW/DENY}LIST for LTO autodictionary and DICT2FILE - We do support llvm versions from 3.8 to 5.0 again - frida_mode: - several fixes for cmplog - remove need for AFL_FRIDA_PERSISTENT_RETADDR_OFFSET + - less coverage collision - feature parity of aarch64 with intel now (persistent, cmplog, in-memory testcases, asan) - - qemu_mode: - - performance fix when cmplog was used - afl-cmin and afl-showmap -i do now descend into subdirectories (like afl-fuzz does) - note that afl-cmin.bash does not! - afl_analyze: - fix timeout handling - add forkserver support for better performance - ensure afl-compiler-rt is built for gcc_module + - always build aflpp_driver for libfuzzer harnesses + - added `AFL_NO_FORKSRV` env variable support to + afl-cmin, afl-tmin, and afl-showmap, by @jhertz + - removed outdated documents, improved existing documentation ### Version ++3.13c (release) - Note: plot_data switched to relative time from unix time in 3.10 diff --git a/docs/FAQ.md b/docs/FAQ.md index ab0abe6c..0f447044 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -188,13 +188,7 @@ Four steps are required to do this and it also requires quite some knowledge of coding and/or disassembly and is effectively possible only with afl-clang-fast PCGUARD and afl-clang-lto LTO instrumentation. - 1. First step: Identify which edge ID numbers are unstable - - run the target with `export AFL_DEBUG=1` for a few minutes then terminate. - The out/fuzzer_stats file will then show the edge IDs that were identified - as unstable. - - 2. Second step: Find the responsible function(s). + 1. First step: Instrument to be able to find the responsible function(s). a) For LTO instrumented binaries this can be documented during compile time, just set `export AFL_LLVM_DOCUMENT_IDS=/path/to/a/file`. @@ -217,6 +211,14 @@ afl-clang-fast PCGUARD and afl-clang-lto LTO instrumentation. recompile with the two mentioned above. This is just for identifying the functions that have unstable edges. + 2. Second step: Identify which edge ID numbers are unstable + + run the target with `export AFL_DEBUG=1` for a few minutes then terminate. + The out/fuzzer_stats file will then show the edge IDs that were identified + as unstable in the `var_bytes` entry. You can match these numbers + directly to the data you created in the first step. + Now you know which functions are responsible for the instability + 3. Third step: create a text file with the filenames/functions Identify which source code files contain the functions that you need to diff --git a/docs/QuickStartGuide.md b/docs/QuickStartGuide.md index d1966170..2d056ecf 100644 --- a/docs/QuickStartGuide.md +++ b/docs/QuickStartGuide.md @@ -18,14 +18,12 @@ how to hit the ground running: custom SIGSEGV or SIGABRT handlers and background processes. For tips on detecting non-crashing flaws, see section 11 in [README.md](README.md) . -3) Compile the program / library to be fuzzed using afl-gcc. A common way to +3) Compile the program / library to be fuzzed using afl-cc. A common way to do this would be: - CC=/path/to/afl-gcc CXX=/path/to/afl-g++ ./configure --disable-shared + CC=/path/to/afl-cc CXX=/path/to/afl-c++ ./configure --disable-shared make clean all - If program build fails, ping <afl-users@googlegroups.com>. - 4) Get a small but valid input file that makes sense to the program. When fuzzing verbose syntax (SQL, HTTP, etc), create a dictionary as described in dictionaries/README.md, too. @@ -41,9 +39,6 @@ how to hit the ground running: 6) Investigate anything shown in red in the fuzzer UI by promptly consulting [status_screen.md](status_screen.md). -7) compile and use llvm_mode (afl-clang-fast/afl-clang-fast++) as it is way - faster and has a few cool features - 8) There is a basic docker build with 'docker build -t aflplusplus .' That's it. Sit back, relax, and - time permitting - try to skim through the diff --git a/docs/README.MOpt.md b/docs/README.MOpt.md deleted file mode 100644 index 3de6d670..00000000 --- a/docs/README.MOpt.md +++ /dev/null @@ -1,54 +0,0 @@ -# MOpt(imized) AFL by <puppet@zju.edu.cn> - -### 1. Description -MOpt-AFL is a AFL-based fuzzer that utilizes a customized Particle Swarm -Optimization (PSO) algorithm to find the optimal selection probability -distribution of operators with respect to fuzzing effectiveness. -More details can be found in the technical report. - -### 2. Cite Information -Chenyang Lyu, Shouling Ji, Chao Zhang, Yuwei Li, Wei-Han Lee, Yu Song and -Raheem Beyah, MOPT: Optimized Mutation Scheduling for Fuzzers, -USENIX Security 2019. - -### 3. Seed Sets -We open source all the seed sets used in the paper -"MOPT: Optimized Mutation Scheduling for Fuzzers". - -### 4. Experiment Results -The experiment results can be found in -https://drive.google.com/drive/folders/184GOzkZGls1H2NuLuUfSp9gfqp1E2-lL?usp=sharing. -We only open source the crash files since the space is limited. - -### 5. Technical Report -MOpt_TechReport.pdf is the technical report of the paper -"MOPT: Optimized Mutation Scheduling for Fuzzers", which contains more deatails. - -### 6. Parameter Introduction -Most important, you must add the parameter `-L` (e.g., `-L 0`) to launch the -MOpt scheme. - -Option '-L' controls the time to move on to the pacemaker fuzzing mode. -'-L t': when MOpt-AFL finishes the mutation of one input, if it has not -discovered any new unique crash or path for more than t minutes, MOpt-AFL will -enter the pacemaker fuzzing mode. - -Setting 0 will enter the pacemaker fuzzing mode at first, which is -recommended in a short time-scale evaluation. - -Setting -1 will enable both pacemaker mode and normal aflmutation fuzzing in -parallel. - -Other important parameters can be found in afl-fuzz.c, for instance, - -'swarm_num': the number of the PSO swarms used in the fuzzing process. -'period_pilot': how many times MOpt-AFL will execute the target program - in the pilot fuzzing module, then it will enter the core fuzzing module. -'period_core': how many times MOpt-AFL will execute the target program in the - core fuzzing module, then it will enter the PSO updating module. -'limit_time_bound': control how many interesting test cases need to be found - before MOpt-AFL quits the pacemaker fuzzing mode and reuses the deterministic stage. - 0 < 'limit_time_bound' < 1, MOpt-AFL-tmp. - 'limit_time_bound' >= 1, MOpt-AFL-ever. - -Have fun with MOpt in AFL! diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md index 129d6676..2c0ca3c5 100644 --- a/docs/custom_mutators.md +++ b/docs/custom_mutators.md @@ -204,9 +204,7 @@ trimmed input. Here's a quick API description: arguments because we already have the initial buffer from `init_trim` and we can memorize the current state in the data variables. This can also save reparsing steps for each iteration. It should return the trimmed input - buffer, where the returned data must not exceed the initial input data in - length. Returning anything that is larger than the original data (passed to - `init_trim`) will result in a fatal abort of AFL++. + buffer. - `post_trim` (optional) diff --git a/docs/historical_notes.md b/docs/historical_notes.md deleted file mode 100644 index b5d3d157..00000000 --- a/docs/historical_notes.md +++ /dev/null @@ -1,143 +0,0 @@ -# Historical notes - - This doc talks about the rationale of some of the high-level design decisions - for American Fuzzy Lop. It's adopted from a discussion with Rob Graham. - See README.md for the general instruction manual, and technical_details.md for - additional implementation-level insights. - -## 1) Influences - -In short, `afl-fuzz` is inspired chiefly by the work done by Tavis Ormandy back -in 2007. Tavis did some very persuasive experiments using `gcov` block coverage -to select optimal test cases out of a large corpus of data, and then using -them as a starting point for traditional fuzzing workflows. - -(By "persuasive", I mean: netting a significant number of interesting -vulnerabilities.) - -In parallel to this, both Tavis and I were interested in evolutionary fuzzing. -Tavis had his experiments, and I was working on a tool called bunny-the-fuzzer, -released somewhere in 2007. - -Bunny used a generational algorithm not much different from `afl-fuzz`, but -also tried to reason about the relationship between various input bits and -the internal state of the program, with hopes of deriving some additional value -from that. The reasoning / correlation part was probably in part inspired by -other projects done around the same time by Will Drewry and Chris Evans. - -The state correlation approach sounded very sexy on paper, but ultimately, made -the fuzzer complicated, brittle, and cumbersome to use; every other target -program would require a tweak or two. Because Bunny didn't fare a whole lot -better than less sophisticated brute-force tools, I eventually decided to write -it off. You can still find its original documentation at: - - https://code.google.com/p/bunny-the-fuzzer/wiki/BunnyDoc - -There has been a fair amount of independent work, too. Most notably, a few -weeks earlier that year, Jared DeMott had a Defcon presentation about a -coverage-driven fuzzer that relied on coverage as a fitness function. - -Jared's approach was by no means identical to what afl-fuzz does, but it was in -the same ballpark. His fuzzer tried to explicitly solve for the maximum coverage -with a single input file; in comparison, afl simply selects for cases that do -something new (which yields better results - see [technical_details.md](technical_details.md)). - -A few years later, Gabriel Campana released fuzzgrind, a tool that relied purely -on Valgrind and a constraint solver to maximize coverage without any brute-force -bits; and Microsoft Research folks talked extensively about their still -non-public, solver-based SAGE framework. - -In the past six years or so, I've also seen a fair number of academic papers -that dealt with smart fuzzing (focusing chiefly on symbolic execution) and a -couple papers that discussed proof-of-concept applications of genetic -algorithms with the same goals in mind. I'm unconvinced how practical most of -these experiments were; I suspect that many of them suffer from the -bunny-the-fuzzer's curse of being cool on paper and in carefully designed -experiments, but failing the ultimate test of being able to find new, -worthwhile security bugs in otherwise well-fuzzed, real-world software. - -In some ways, the baseline that the "cool" solutions have to compete against is -a lot more impressive than it may seem, making it difficult for competitors to -stand out. For a singular example, check out the work by Gynvael and Mateusz -Jurczyk, applying "dumb" fuzzing to ffmpeg, a prominent and security-critical -component of modern browsers and media players: - - http://googleonlinesecurity.blogspot.com/2014/01/ffmpeg-and-thousand-fixes.html - -Effortlessly getting comparable results with state-of-the-art symbolic execution -in equally complex software still seems fairly unlikely, and hasn't been -demonstrated in practice so far. - -But I digress; ultimately, attribution is hard, and glorying the fundamental -concepts behind AFL is probably a waste of time. The devil is very much in the -often-overlooked details, which brings us to... - -## 2. Design goals for afl-fuzz - -In short, I believe that the current implementation of afl-fuzz takes care of -several itches that seemed impossible to scratch with other tools: - -1) Speed. It's genuinely hard to compete with brute force when your "smart" - approach is resource-intensive. If your instrumentation makes it 10x more - likely to find a bug, but runs 100x slower, your users are getting a bad - deal. - - To avoid starting with a handicap, `afl-fuzz` is meant to let you fuzz most of - the intended targets at roughly their native speed - so even if it doesn't - add value, you do not lose much. - - On top of this, the tool leverages instrumentation to actually reduce the - amount of work in a couple of ways: for example, by carefully trimming the - corpus or skipping non-functional but non-trimmable regions in the input - files. - -2) Rock-solid reliability. It's hard to compete with brute force if your - approach is brittle and fails unexpectedly. Automated testing is attractive - because it's simple to use and scalable; anything that goes against these - principles is an unwelcome trade-off and means that your tool will be used - less often and with less consistent results. - - Most of the approaches based on symbolic execution, taint tracking, or - complex syntax-aware instrumentation are currently fairly unreliable with - real-world targets. Perhaps more importantly, their failure modes can render - them strictly worse than "dumb" tools, and such degradation can be difficult - for less experienced users to notice and correct. - - In contrast, `afl-fuzz` is designed to be rock solid, chiefly by keeping it - simple. In fact, at its core, it's designed to be just a very good - traditional fuzzer with a wide range of interesting, well-researched - strategies to go by. The fancy parts just help it focus the effort in - places where it matters the most. - -3) Simplicity. The author of a testing framework is probably the only person - who truly understands the impact of all the settings offered by the tool - - and who can dial them in just right. Yet, even the most rudimentary fuzzer - frameworks often come with countless knobs and fuzzing ratios that need to - be guessed by the operator ahead of the time. This can do more harm than - good. - - AFL is designed to avoid this as much as possible. The three knobs you - can play with are the output file, the memory limit, and the ability to - override the default, auto-calibrated timeout. The rest is just supposed to - work. When it doesn't, user-friendly error messages outline the probable - causes and workarounds, and get you back on track right away. - -4) Chainability. Most general-purpose fuzzers can't be easily employed - against resource-hungry or interaction-heavy tools, necessitating the - creation of custom in-process fuzzers or the investment of massive CPU - power (most of which is wasted on tasks not directly related to the code - we actually want to test). - - AFL tries to scratch this itch by allowing users to use more lightweight - targets (e.g., standalone image parsing libraries) to create small - corpora of interesting test cases that can be fed into a manual testing - process or a UI harness later on. - -As mentioned in [technical_details.md](technical_details.md), AFL does all this not by systematically -applying a single overarching CS concept, but by experimenting with a variety -of small, complementary methods that were shown to reliably yields results -better than chance. The use of instrumentation is a part of that toolkit, but is -far from being the most important one. - -Ultimately, what matters is that `afl-fuzz` is designed to find cool bugs - and -has a pretty robust track record of doing just that. diff --git a/docs/notes_for_asan.md b/docs/notes_for_asan.md deleted file mode 100644 index f55aeaf2..00000000 --- a/docs/notes_for_asan.md +++ /dev/null @@ -1,157 +0,0 @@ -# Notes for using ASAN with afl-fuzz - - This file discusses some of the caveats for fuzzing under ASAN, and suggests - a handful of alternatives. See README.md for the general instruction manual. - -## 1) Short version - -ASAN on 64-bit systems requests a lot of memory in a way that can't be easily -distinguished from a misbehaving program bent on crashing your system. - -Because of this, fuzzing with ASAN is recommended only in four scenarios: - - - On 32-bit systems, where we can always enforce a reasonable memory limit - (-m 800 or so is a good starting point), - - - On 64-bit systems only if you can do one of the following: - - - Compile the binary in 32-bit mode (gcc -m32), - - - Precisely gauge memory needs using http://jwilk.net/software/recidivm . - - - Limit the memory available to process using cgroups on Linux (see - utils/asan_cgroups). - -To compile with ASAN, set AFL_USE_ASAN=1 before calling 'make clean all'. The -afl-gcc / afl-clang wrappers will pick that up and add the appropriate flags. -Note that ASAN is incompatible with -static, so be mindful of that. - -(You can also use AFL_USE_MSAN=1 to enable MSAN instead.) - -When compiling with AFL_USE_LSAN, the leak sanitizer will normally run -when the program exits. In order to utilize this check at different times, -such as at the end of a loop, you may use the macro __AFL_LEAK_CHECK();. -This macro will report a crash in afl-fuzz if any memory is left leaking -at this stage. You can also use LSAN_OPTIONS and a supressions file -for more fine-tuned checking, however make sure you keep exitcode=23. - -NOTE: if you run several secondary instances, only one should run the target -compiled with ASAN (and UBSAN, CFISAN), the others should run the target with -no sanitizers compiled in. - -There is also the option of generating a corpus using a non-ASAN binary, and -then feeding it to an ASAN-instrumented one to check for bugs. This is faster, -and can give you somewhat comparable results. You can also try using -libdislocator (see [utils/libdislocator/README.dislocator.md](../utils/libdislocator/README.dislocator.md) in the parent directory) as a -lightweight and hassle-free (but less thorough) alternative. - -## 2) Long version - -ASAN allocates a huge region of virtual address space for bookkeeping purposes. -Most of this is never actually accessed, so the OS never has to allocate any -real pages of memory for the process, and the VM grabbed by ASAN is essentially -"free" - but the mapping counts against the standard OS-enforced limit -(RLIMIT_AS, aka ulimit -v). - -On our end, afl-fuzz tries to protect you from processes that go off-rails -and start consuming all the available memory in a vain attempt to parse a -malformed input file. This happens surprisingly often, so enforcing such a limit -is important for almost any fuzzer: the alternative is for the kernel OOM -handler to step in and start killing random processes to free up resources. -Needless to say, that's not a very nice prospect to live with. - -Unfortunately, un*x systems offer no portable way to limit the amount of -pages actually given to a process in a way that distinguishes between that -and the harmless "land grab" done by ASAN. In principle, there are three standard -ways to limit the size of the heap: - - - The RLIMIT_AS mechanism (ulimit -v) caps the size of the virtual space - - but as noted, this pays no attention to the number of pages actually - in use by the process, and doesn't help us here. - - - The RLIMIT_DATA mechanism (ulimit -d) seems like a good fit, but it applies - only to the traditional sbrk() / brk() methods of requesting heap space; - modern allocators, including the one in glibc, routinely rely on mmap() - instead, and circumvent this limit completely. - - - Finally, the RLIMIT_RSS limit (ulimit -m) sounds like what we need, but - doesn't work on Linux - mostly because nobody felt like implementing it. - -There are also cgroups, but they are Linux-specific, not universally available -even on Linux systems, and they require root permissions to set up; I'm a bit -hesitant to make afl-fuzz require root permissions just for that. That said, -if you are on Linux and want to use cgroups, check out the contributed script -that ships in utils/asan_cgroups/. - -In settings where cgroups aren't available, we have no nice, portable way to -avoid counting the ASAN allocation toward the limit. On 32-bit systems, or for -binaries compiled in 32-bit mode (-m32), this is not a big deal: ASAN needs -around 600-800 MB or so, depending on the compiler - so all you need to do is -to specify -m that is a bit higher than that. - -On 64-bit systems, the situation is more murky, because the ASAN allocation -is completely outlandish - around 17.5 TB in older versions, and closer to -20 TB with newest ones. The actual amount of memory on your system is -(probably!) just a tiny fraction of that - so unless you dial the limit -with surgical precision, you will get no protection from OOM bugs. - -On my system, the amount of memory grabbed by ASAN with a slightly older -version of gcc is around 17,825,850 MB; for newest clang, it's 20,971,600. -But there is no guarantee that these numbers are stable, and if you get them -wrong by "just" a couple gigs or so, you will be at risk. - -To get the precise number, you can use the recidivm tool developed by Jakub -Wilk (http://jwilk.net/software/recidivm). In absence of this, ASAN is *not* -recommended when fuzzing 64-bit binaries, unless you are confident that they -are robust and enforce reasonable memory limits (in which case, you can -specify '-m none' when calling afl-fuzz). - -Using recidivm or running with no limits aside, there are two other decent -alternatives: build a corpus of test cases using a non-ASAN binary, and then -examine them with ASAN, Valgrind, or other heavy-duty tools in a more -controlled setting; or compile the target program with -m32 (32-bit mode) -if your system supports that. - -## 3) Interactions with the QEMU mode - -ASAN, MSAN, and other sanitizers appear to be incompatible with QEMU user -emulation, so please do not try to use them with the -Q option; QEMU doesn't -seem to appreciate the shadow VM trick used by these tools, and will likely -just allocate all your physical memory, then crash. - -You can, however, use QASan to run binaries that are not instrumented with ASan -under QEMU with the AFL++ instrumentation. - -https://github.com/andreafioraldi/qasan - -## 4) ASAN and OOM crashes - -By default, ASAN treats memory allocation failures as fatal errors, immediately -causing the program to crash. Since this is a departure from normal POSIX -semantics (and creates the appearance of security issues in otherwise -properly-behaving programs), we try to disable this by specifying -allocator_may_return_null=1 in ASAN_OPTIONS. - -Unfortunately, it's been reported that this setting still causes ASAN to -trigger phantom crashes in situations where the standard allocator would -simply return NULL. If this is interfering with your fuzzing jobs, you may -want to cc: yourself on this bug: - - https://bugs.llvm.org/show_bug.cgi?id=22026 - -## 5) What about UBSAN? - -New versions of UndefinedBehaviorSanitizer offers the --fsanitize=undefined-trap-on-error compiler flag that tells UBSan to insert an -istruction that will cause SIGILL (ud2 on x86) when an undefined behaviour -is detected. This is the option that you want to use when combining AFL++ -and UBSan. - -AFL_USE_UBSAN=1 env var will add this compiler flag to afl-clang-fast, -afl-gcc-fast and afl-gcc for you. - -Old versions of UBSAN don't offer a consistent way -to abort() on fault conditions or to terminate with a distinctive exit code -but there are some versions of the library can be binary-patched to address this -issue. You can also preload a shared library that substitute all the UBSan -routines used to report errors with abort(). diff --git a/docs/perf_tips.md b/docs/perf_tips.md index c5968206..9c31e56b 100644 --- a/docs/perf_tips.md +++ b/docs/perf_tips.md @@ -48,13 +48,9 @@ be then manually fed to a more resource-hungry program later on. Also note that reading the fuzzing input via stdin is faster than reading from a file. -## 3. Use LLVM instrumentation +## 3. Use LLVM persistent instrumentation -When fuzzing slow targets, you can gain 20-100% performance improvement by -using the LLVM-based instrumentation mode described in [the instrumentation README](../instrumentation/README.llvm.md). -Note that this mode requires the use of clang and will not work with GCC. - -The LLVM mode also offers a "persistent", in-process fuzzing mode that can +The LLVM mode offers a "persistent", in-process fuzzing mode that can work well for certain types of self-contained libraries, and for fast targets, can offer performance gains up to 5-10x; and a "deferred fork server" mode that can offer huge benefits for programs with high startup overhead. Both @@ -138,8 +134,7 @@ misses, or similar factors, but they are less likely to be a concern.) ## 7. Keep memory use and timeouts in check -If you have increased the `-m` or `-t` limits more than truly necessary, consider -dialing them back down. +Consider setting low values for `-m` and `-t`. For programs that are nominally very fast, but get sluggish for some inputs, you can also try setting `-t` values that are more punishing than what `afl-fuzz` @@ -164,6 +159,20 @@ There are several OS-level factors that may affect fuzzing speed: - Network filesystems, either used for fuzzer input / output, or accessed by the fuzzed binary to read configuration files (pay special attention to the home directory - many programs search it for dot-files). + - Disable all the spectre, meltdown etc. security countermeasures in the + kernel if your machine is properly separated: + +``` +ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off +no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable +nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off +spectre_v2=off stf_barrier=off +``` + In most Linux distributions you can put this into a `/etc/default/grub` + variable. + +The following list of changes are made when executing `afl-system-config`: + - On-demand CPU scaling. The Linux `ondemand` governor performs its analysis on a particular schedule and is known to underestimate the needs of short-lived processes spawned by `afl-fuzz` (or any other fuzzer). On Linux, @@ -196,26 +205,4 @@ There are several OS-level factors that may affect fuzzing speed: Setting a different scheduling policy for the fuzzer process - say `SCHED_RR` - can usually speed things up, too, but needs to be done with care. - - Use the `afl-system-config` script to set all proc/sys settings above in one go. - - Disable all the spectre, meltdown etc. security countermeasures in the - kernel if your machine is properly separated: - -``` -ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off -no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable -nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off -spectre_v2=off stf_barrier=off -``` - In most Linux distributions you can put this into a `/etc/default/grub` - variable. - -## 9. If all other options fail, use `-d` - -For programs that are genuinely slow, in cases where you really can't escape -using huge input files, or when you simply want to get quick and dirty results -early on, you can always resort to the `-d` mode. -The mode causes `afl-fuzz` to skip all the deterministic fuzzing steps, which -makes output a lot less neat and can ultimately make the testing a bit less -in-depth, but it will give you an experience more familiar from other fuzzing -tools. diff --git a/docs/power_schedules.md b/docs/power_schedules.md deleted file mode 100644 index 493f9609..00000000 --- a/docs/power_schedules.md +++ /dev/null @@ -1,32 +0,0 @@ -# afl++'s power schedules based on AFLfast - -<a href="https://mboehme.github.io/paper/CCS16.pdf"><img src="https://mboehme.github.io/paper/CCS16.png" align="right" width="250"></a> -Power schedules implemented by Marcel Böhme \<marcel.boehme@acm.org\>. -AFLFast is an extension of AFL which is written and maintained by -Michal Zalewski \<lcamtuf@google.com\>. - -AFLfast has helped in the success of Team Codejitsu at the finals of the DARPA Cyber Grand Challenge where their bot Galactica took **2nd place** in terms of #POVs proven (see red bar at https://www.cybergrandchallenge.com/event#results). AFLFast exposed several previously unreported CVEs that could not be exposed by AFL in 24 hours and otherwise exposed vulnerabilities significantly faster than AFL while generating orders of magnitude more unique crashes. - -Essentially, we observed that most generated inputs exercise the same few "high-frequency" paths and developed strategies to gravitate towards low-frequency paths, to stress significantly more program behavior in the same amount of time. We devised several **search strategies** that decide in which order the seeds should be fuzzed and **power schedules** that smartly regulate the number of inputs generated from a seed (i.e., the time spent fuzzing a seed). We call the number of inputs generated from a seed, the seed's **energy**. - -We find that AFL's exploitation-based constant schedule assigns **too much energy to seeds exercising high-frequency paths** (e.g., paths that reject invalid inputs) and not enough energy to seeds exercising low-frequency paths (e.g., paths that stress interesting behaviors). Technically, we modified the computation of a seed's performance score (`calculate_score`), which seed is marked as favourite (`update_bitmap_score`), and which seed is chosen next from the circular queue (`main`). We implemented the following schedules (in the order of their effectiveness, best first): - -| AFL flag | Power Schedule | -| ------------- | -------------------------- | -| `-p explore` |  | -| `-p fast` (default)| =\\min\\left(\\frac{\\alpha(i)}{\\beta}\\cdot\\frac{2^{s(i)}}{f(i)},M\\right)) | -| `-p coe` |  | -| `-p quad` |  | -| `-p lin` |  | -| `-p exploit` (AFL) |  | -| `-p mmopt` | Experimental: `explore` with no weighting to runtime and increased weighting on the last 5 queue entries | -| `-p rare` | Experimental: `rare` puts focus on queue entries that hit rare edges | -| `-p seek` | Experimental: `seek` is EXPLORE but ignoring the runtime of the queue input and less focus on the size | -where *α(i)* is the performance score that AFL uses to compute for the seed input *i*, *β(i)>1* is a constant, *s(i)* is the number of times that seed *i* has been chosen from the queue, *f(i)* is the number of generated inputs that exercise the same path as seed *i*, and *μ* is the average number of generated inputs exercising a path. - -More details can be found in the paper that was accepted at the [23rd ACM Conference on Computer and Communications Security (CCS'16)](https://www.sigsac.org/ccs/CCS2016/accepted-papers/). - -PS: In parallel mode (several instances with shared queue), we suggest to run the main node using the exploit schedule (-p exploit) and the secondary nodes with a combination of cut-off-exponential (-p coe), exponential (-p fast; default), and explore (-p explore) schedules. In single mode, the default settings will do. **EDIT:** In parallel mode, AFLFast seems to perform poorly because the path probability estimates are incorrect for the imported seeds. Pull requests to fix this issue by syncing the estimates across instances are appreciated :) - -Copyright 2013, 2014, 2015, 2016 Google Inc. All rights reserved. -Released under terms and conditions of Apache License, Version 2.0. diff --git a/docs/technical_details.md b/docs/technical_details.md index a0453c91..6a4660a2 100644 --- a/docs/technical_details.md +++ b/docs/technical_details.md @@ -1,5 +1,9 @@ # Technical "whitepaper" for afl-fuzz + +NOTE: this document is rather outdated! + + This document provides a quick overview of the guts of American Fuzzy Lop. See README.md for the general instruction manual; and for a discussion of motivations and design goals behind AFL, see historical_notes.md. diff --git a/frida_mode/GNUmakefile b/frida_mode/GNUmakefile index f5a96501..fad183e1 100644 --- a/frida_mode/GNUmakefile +++ b/frida_mode/GNUmakefile @@ -19,13 +19,14 @@ CFLAGS+=-fPIC \ -g \ -O3 \ -funroll-loops \ + -ffunction-sections \ -RT_CFLAGS:=-Wno-unused-parameter \ +AFL_CFLAGS:=-Wno-unused-parameter \ -Wno-sign-compare \ -Wno-unused-function \ -Wno-unused-result \ -Wno-int-to-pointer-cast \ - -Wno-pointer-sign \ + -Wno-pointer-sign LDFLAGS+=-shared \ -lpthread \ @@ -52,6 +53,10 @@ ifeq "$(ARCH)" "aarch64" ARCH:=arm64 endif +ifeq "$(ARCH)" "armv7l" + ARCH:=armhf +endif + ifeq "$(ARCH)" "i686" ARCH:=x86 endif @@ -59,12 +64,15 @@ endif ifeq "$(shell uname)" "Darwin" OS:=macos - RT_CFLAGS:=$(RT_CFLAGS) -Wno-deprecated-declarations + AFL_CFLAGS:=$(AFL_CFLAGS) -Wno-deprecated-declarations else ifdef DEBUG - RT_CFLAGS:=$(RT_CFLAGS) -Wno-prio-ctor-dtor + AFL_CFLAGS:=$(AFL_CFLAGS) -Wno-prio-ctor-dtor endif -LDFLAGS+=-z noexecstack +LDFLAGS+= -z noexecstack \ + -Wl,--gc-sections \ + -Wl,--exclude-libs,ALL +LDSCRIPT:=-Wl,--version-script=$(PWD)frida.map endif ifeq "$(shell uname)" "Linux" @@ -75,7 +83,16 @@ ifndef OS $(error "Operating system unsupported") endif +ifeq "$(ARCH)" "arm64" +# 15.0.0 Not released for aarch64 yet GUM_DEVKIT_VERSION=14.2.18 +else +ifeq "$(ARCH)" "armhf" +GUM_DEVKIT_VERSION=14.2.18 +else +GUM_DEVKIT_VERSION=15.0.0 +endif +endif GUM_DEVKIT_FILENAME=frida-gumjs-devkit-$(GUM_DEVKIT_VERSION)-$(OS)-$(ARCH).tar.xz GUM_DEVKIT_URL="https://github.com/frida/frida/releases/download/$(GUM_DEVKIT_VERSION)/$(GUM_DEVKIT_FILENAME)" @@ -94,15 +111,24 @@ FRIDA_GUM_DEVKIT_COMPRESSED_TARBALL:=$(FRIDA_DIR)build/$(GUM_DEVKIT_FILENAME) AFL_COMPILER_RT_SRC:=$(ROOT)instrumentation/afl-compiler-rt.o.c AFL_COMPILER_RT_OBJ:=$(OBJ_DIR)afl-compiler-rt.o +AFL_PERFORMANCE_SRC:=$(ROOT)src/afl-performance.c +AFL_PERFORMANCE_OBJ:=$(OBJ_DIR)afl-performance.o + HOOK_DIR:=$(PWD)hook/ -AFLPP_DRIVER_HOOK_SRC=$(HOOK_DIR)hook.c -AFLPP_DRIVER_HOOK_OBJ=$(BUILD_DIR)hook.so +AFLPP_FRIDA_DRIVER_HOOK_SRC=$(HOOK_DIR)frida_hook.c +AFLPP_FRIDA_DRIVER_HOOK_OBJ=$(BUILD_DIR)frida_hook.so + +AFLPP_QEMU_DRIVER_HOOK_SRC:=$(HOOK_DIR)qemu_hook.c +AFLPP_QEMU_DRIVER_HOOK_OBJ:=$(BUILD_DIR)qemu_hook.so + +BIN2C:=$(BUILD_DIR)bin2c +BIN2C_SRC:=$(PWD)util/bin2c.c .PHONY: all 32 clean format hook $(FRIDA_GUM) ############################## ALL ############################################# -all: $(FRIDA_TRACE) $(AFLPP_DRIVER_HOOK_OBJ) +all: $(FRIDA_TRACE) $(AFLPP_FRIDA_DRIVER_HOOK_OBJ) $(AFLPP_QEMU_DRIVER_HOOK_OBJ) 32: CFLAGS="-m32" LDFLAGS="-m32" ARCH="x86" make all @@ -143,17 +169,26 @@ $(GUM_DEVKIT_TARBALL): | $(FRIDA_BUILD_DIR) wget -O $@ $(GUM_DEVKIT_URL) endif -$(GUM_DEVIT_LIBRARY): | $(GUM_DEVKIT_TARBALL) - tar Jxvf $(GUM_DEVKIT_TARBALL) -C $(FRIDA_BUILD_DIR) +$(GUM_DEVIT_LIBRARY): $(GUM_DEVKIT_TARBALL) + tar Jxvfm $(GUM_DEVKIT_TARBALL) -C $(FRIDA_BUILD_DIR) -$(GUM_DEVIT_HEADER): | $(GUM_DEVKIT_TARBALL) - tar Jxvf $(GUM_DEVKIT_TARBALL) -C $(FRIDA_BUILD_DIR) +$(GUM_DEVIT_HEADER): $(GUM_DEVKIT_TARBALL) + tar Jxvfm $(GUM_DEVKIT_TARBALL) -C $(FRIDA_BUILD_DIR) ############################## AFL ############################################# $(AFL_COMPILER_RT_OBJ): $(AFL_COMPILER_RT_SRC) $(CC) \ $(CFLAGS) \ - $(RT_CFLAGS) \ + $(AFL_CFLAGS) \ + -I $(ROOT) \ + -I $(ROOT)include \ + -o $@ \ + -c $< + +$(AFL_PERFORMANCE_OBJ): $(AFL_PERFORMANCE_SRC) + $(CC) \ + $(CFLAGS) \ + $(AFL_CFLAGS) \ -I $(ROOT) \ -I $(ROOT)include \ -o $@ \ @@ -161,10 +196,13 @@ $(AFL_COMPILER_RT_OBJ): $(AFL_COMPILER_RT_SRC) ############################### JS ############################################# -$(JS_SRC): $(JS) | $(BUILD_DIR) - cd $(JS_DIR) && xxd -i $(JS_NAME) $@ +$(BIN2C): $(BIN2C_SRC) + $(CC) -D_GNU_SOURCE -o $@ $< -$(JS_OBJ): $(JS_SRC) +$(JS_SRC): $(JS) $(BIN2C)| $(BUILD_DIR) + cd $(JS_DIR) && $(BIN2C) api_js $(JS) $@ + +$(JS_OBJ): $(JS_SRC) GNUmakefile $(CC) \ $(CFLAGS) \ -I $(ROOT)include \ @@ -190,23 +228,28 @@ $(foreach src,$(SOURCES),$(eval $(call BUILD_SOURCE,$(src),$(OBJ_DIR)$(notdir $( ######################## AFL-FRIDA-TRACE ####################################### -$(FRIDA_TRACE): $(GUM_DEVIT_LIBRARY) $(GUM_DEVIT_HEADER) $(OBJS) $(JS_OBJ) $(AFL_COMPILER_RT_OBJ) GNUmakefile | $(BUILD_DIR) +$(FRIDA_TRACE): $(GUM_DEVIT_LIBRARY) $(GUM_DEVIT_HEADER) $(OBJS) $(JS_OBJ) $(AFL_COMPILER_RT_OBJ) $(AFL_PERFORMANCE_OBJ) GNUmakefile | $(BUILD_DIR) $(CXX) \ $(OBJS) \ $(JS_OBJ) \ $(GUM_DEVIT_LIBRARY) \ $(AFL_COMPILER_RT_OBJ) \ + $(AFL_PERFORMANCE_OBJ) \ $(LDFLAGS) \ + $(LDSCRIPT) \ -o $@ \ cp -v $(FRIDA_TRACE) $(ROOT) ############################# HOOK ############################################# -$(AFLPP_DRIVER_HOOK_OBJ): $(AFLPP_DRIVER_HOOK_SRC) | $(BUILD_DIR) +$(AFLPP_FRIDA_DRIVER_HOOK_OBJ): $(AFLPP_FRIDA_DRIVER_HOOK_SRC) $(GUM_DEVIT_HEADER) | $(BUILD_DIR) $(CC) $(CFLAGS) $(LDFLAGS) -I $(FRIDA_BUILD_DIR) $< -o $@ -hook: $(AFLPP_DRIVER_HOOK_OBJ) +$(AFLPP_QEMU_DRIVER_HOOK_OBJ): $(AFLPP_QEMU_DRIVER_HOOK_SRC) | $(BUILD_DIR) + $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ + +hook: $(AFLPP_FRIDA_DRIVER_HOOK_OBJ) $(AFLPP_QEMU_DRIVER_HOOK_OBJ) ############################# CLEAN ############################################ clean: @@ -214,7 +257,7 @@ clean: ############################# FORMAT ########################################### format: - cd $(ROOT) && echo $(SOURCES) $(AFLPP_DRIVER_HOOK_SRC) | xargs -L1 ./.custom-format.py -i + cd $(ROOT) && echo $(SOURCES) $(AFLPP_FRIDA_DRIVER_HOOK_SRC) $(BIN2C_SRC) | xargs -L1 ./.custom-format.py -i cd $(ROOT) && echo $(INCLUDES) | xargs -L1 ./.custom-format.py -i ############################# RUN ############################################# diff --git a/frida_mode/MapDensity.md b/frida_mode/MapDensity.md new file mode 100644 index 00000000..f4ae3ace --- /dev/null +++ b/frida_mode/MapDensity.md @@ -0,0 +1,147 @@ +# Map Density + +# How Coverage Works +The coverage in AFL++ works by assigning each basic block of code a unique ID +and during execution when transitioning between blocks (e.g. by calls or jumps) +assigning each of these edges an ID based upon the source and destination block +ID. + +For each individual execution of the target, a single dimensional byte array +indexed by the edge ID is used to count how many times each edge is traversed. + +A single dimensional cumulative byte array is also constructed where each byte +again represents an individual edge ID, but this time, the value of the byte +represents a range of how many times that edge has been traversed. + +```1, 2, 3, 4-7, 8-15, 16-31, 32-127, 128+``` + +The theory is that a new path isn't particularly interesting if an edge has been +traversed `23` instead of `24` times for example, but is interesting if an edge +has been traversed for the very first time, or the number of times fits within a different bucket. + +After each run, the count of times each edge is hit is compared to the values in +the cumulative map and if it is different, then the input is kept as a new seed +and the cumulative map is updated. + +This mechanism is described in greater detail in the seminal +[paper](https://lcamtuf.coredump.cx/afl/technical_details.txt) on AFL by +[lcamtuf](https://github.com/lcamtuf). + +# Collisions +In black-box fuzzing, we must assume that control may flow from any block to any +other block, since we don't know any better. Thus for a target with `n` basic +blocks of code, there are `n * n` potential edges. As we can see, even with a +small number of edges, a very large map will be required so that we have space +to fit them all. Even if our target only had `1024` blocks, this would require a +map containing `1048576` entries (or 1Mb in size). + +Whilst this may not seem like a lot of memory, it causes problems for two reasons. Firstly, the processing step after each execution must now process much more +data, and secondly a map this size is unlikely to fit within the L2 cache of the processor. Since this is a very hot code path, we are likely to pay a very heavy +performance cost. + +Therefore, we must accept that not all edges can have a unique and that +therefore there will be collisions. This means that if the fuzzer finds a new +path by uncovering an edge which was not previously found, but that the same +edge ID is used by another edge, then it may go completely unnoticed. This is +obviously undesirable, but equally if our map is too large, then we will not be +able to process as many potential inputs in the same time and hence not uncover +edges for that reason. Thus a careful trade-off of map size must be made. + +# Block & Edge Numbering +Since the original AFL, blocks and edges have always been numbered in the same +way as we can see from the following C snippet from the whitepaper. + +```c + cur_location = (block_address >> 4) ^ (block_address << 8); + shared_mem[cur_location ^ prev_location]++; + prev_location = cur_location >> 1; + +``` + +Each block ID is generated by performing a shift and XOR on its address. Then +the edge ID is calculated as `E = B ^ (B' >> 1)`. Here, we can make two +observations. In fact, the edge ID is also masked to ensure it is less than the +size of the map being used. + +## Block IDs +Firstly, the block ID doesn't have very good entropy. If we consider the address +of the block, then whilst each block has a unique ID, it isn't necessarily very +evenly distributed. + +We start with a large address, and need to discard a large number of the bits to +generate a block ID which is within range. But how do we choose the unique bits +of the address verus those which are the same for every block? The high bits of +the address may simply be all `0s` or all `1s` to make the address cannonical, +the middle portion of the address may be the same for all blocks (since if they +are all within the same binary, then they will all be adjacent in memory), and +on some systems, even the low bits may have poor entropy as some use fixed +length aligned instructions. Then we need to consider that a portion of each +binary may contain the `.data` or `.bss` sections and so may not contain any +blocks of code at all. + +## Edge IDs +Secondly, we can observe that when we generate an edge ID from the source and +destination block IDs, we perform a right shift on the source block ID. Whilst +there are good reasons as set out in the whitepaper why such a transform is +applied, in so doing, we dispose of `1` bit of precious entropy in our source +block ID. + +All together, this means that some edge IDs may be more popular than others. +This means that some portions of the map may be very densly populated with large +numbers of edges, whilst others may be very sparsely populated, or not populated +at all. + +# Improvements +One of the main reaons why this algorithm selected, is performance. All of the +operations are very quick to perform and given we may be carrying this out for +every block of code we execute, performance is critical. + +However, the design of the binary instrumentation modes of AFL++ has moved on. +Both QEMU and FRIDA modes use a two stage process when executing a target +application. Each block is first compiled or instrumented, and then it is +executed. The compiled blocks can be re-used each time the target executes them. + +Since a blocks ID is based on its address, and this is known at compile time, we +only need to generate this ID once per block and so this ID generation no longer +needs to be as performant. We can therefore use a hash algorithm to generate +this ID and therefore ensure that the block IDs are more evenly distributed. + +Edge IDs however, can only be determined at run-time. Since we don't know which +blocks a given input will traverse until we run it. However, given our block IDs +are now evenly distributed, generating an evenly distributed edge ID becomes +simple. Here, the only change we make is to use a rotate operation rather than +a shift operation so we don't lose a bit of entropy from the source ID. + +So our new algorithm becomes: +```c + cur_location = hash(block_address) + shared_mem[cur_location ^ prev_location]++; + prev_location = rotate(cur_location, 1); +``` + +Lastly, in the original design, the `cur_location` was always set to `0`, at the +beginning of a run, we instead set the value of `cur_location` to `hash(0)`. + +# Parallel Fuzzing +Another sub-optimal aspect of the original design is that no matter how many +instances of the fuzzer you ran in parallel, each instance numbered each block +and so each edge with the same ID. Each instance would therefore find the same +subset of edges collide with each other. In the event of a collision, all +instances will hit the same road block. + +However, if we instead use a different seed for our hashing function for each +instance, then each will ascribe each block a different ID and hence each edge +will be given a different edge ID. This means that whilst one instance of the +fuzzer may find a given pair of edges collide, it is very unlikely that another +instance will find the same pair also collide. + +Due to the collaborative nature of parallel fuzzing, this means that whilst one +instance may struggle to find a particular new path because the new edge +collides, another instance will likely not encounter the same collision and thus +be able to differentiate this new path and share it with the other instances. + +If only a single new edge is found, and the new path is shared with an instance +for which that edge collides, that instance may disregard it as irrelevant. In +practice, however, the discovery of a single new edge, likely leads to several +more edges beneath it also being found and therefore the likelihood of all of +these being collisions is very slim. diff --git a/frida_mode/README.md b/frida_mode/README.md index 6bed52b7..3009e171 100644 --- a/frida_mode/README.md +++ b/frida_mode/README.md @@ -153,14 +153,20 @@ Generated block 0x7ffff75e98e2 *** ``` +* `AFL_FRIDA_INST_JIT` - Enable the instrumentation of Just-In-Time compiled +code. Code is considered to be JIT if the executable segment is not backed by a +file. * `AFL_FRIDA_INST_NO_OPTIMIZE` - Don't use optimized inline assembly coverage instrumentation (the default where available). Required to use `AFL_FRIDA_INST_TRACE`. * `AFL_FRIDA_INST_NO_PREFETCH` - Disable prefetching. By default the child will report instrumented blocks back to the parent so that it can also instrument them and they be inherited by the next child on fork. +* `AFL_FRIDA_INST_SEED` - Sets the initial seed for the hash function used to +generate block (and hence edge) IDs. Setting this to a constant value may be +useful for debugging purposes, e.g. investigating unstable edges. * `AFL_FRIDA_INST_TRACE` - Log to stdout the address of executed blocks, -requires `AFL_FRIDA_INST_NO_OPTIMIZE`. +implies `AFL_FRIDA_INST_NO_OPTIMIZE`. * `AFL_FRIDA_INST_TRACE_UNIQUE` - As per `AFL_FRIDA_INST_TRACE`, but each edge is logged only once, requires `AFL_FRIDA_INST_NO_OPTIMIZE`. * `AFL_FRIDA_OUTPUT_STDOUT` - Redirect the standard output of the target @@ -290,6 +296,10 @@ FASAN then adds instrumentation for any instrucutions which use memory operands then calls into the `__asan_loadN` and `__asan_storeN` functions provided by the DSO to validate memory accesses against the shadow memory. +# Collisions +FRIDA mode has also introduced some improvements to reduce collisions in the map. +See [here](MapDensity.md) for details. + ## TODO The next features to be added are Aarch32 support as well as looking at diff --git a/frida_mode/Scripting.md b/frida_mode/Scripting.md index 4c6fe6b2..5467db99 100644 --- a/frida_mode/Scripting.md +++ b/frida_mode/Scripting.md @@ -605,6 +605,19 @@ difficult to diagnose. The code above only prints the instructions when running in the parent process (the one provided by `Process.id` when the JS script is executed). +# OSX +Note that the JavaScript debug symbol api for OSX makes use of the +`CoreSymbolication` APIs and as such the `CoreFoundation` module must be loaded +into the target to make use of it. This can be done by setting: + +``` +AFL_PRELOAD=/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation +``` + +It should be noted that `CoreSymbolication` API may take a while to initialize +and build its caches. For this reason, it may be nescessary to also increase the +value of the `-t` flag passed to `afl-fuzz`. + # API ```js class Afl { diff --git a/frida_mode/frida.map b/frida_mode/frida.map new file mode 100644 index 00000000..7223d50e --- /dev/null +++ b/frida_mode/frida.map @@ -0,0 +1,35 @@ +{ + global: + __afl_fuzz_len; + __afl_fuzz_ptr; + __afl_sharedmem_fuzzing; + afl_frida_start; + js_api_add_exclude_range; + js_api_add_include_range; + js_api_done; + js_api_error; + js_api_set_debug_maps; + js_api_set_entrypoint; + js_api_set_instrument_debug_file; + js_api_set_instrument_jit; + js_api_set_instrument_libraries; + js_api_set_instrument_no_optimize; + js_api_set_instrument_seed; + js_api_set_instrument_trace; + js_api_set_instrument_trace_unique; + js_api_set_persistent_address; + js_api_set_persistent_count; + js_api_set_persistent_debug; + js_api_set_persistent_hook; + js_api_set_persistent_return; + js_api_set_prefetch_disable; + js_api_set_stalker_callback; + js_api_set_stats_file; + js_api_set_stats_interval; + js_api_set_stats_transitions; + js_api_set_stderr; + js_api_set_stdout; + + local: + *; +}; diff --git a/frida_mode/hook/frida_hook.c b/frida_mode/hook/frida_hook.c new file mode 100644 index 00000000..3bfdb207 --- /dev/null +++ b/frida_mode/hook/frida_hook.c @@ -0,0 +1,64 @@ +/* + * + * Modify this file to set the right registers with the fuzz input and length. + * It is a good idea to check input_buf_len to be not larger than the + * destination buffer! + * + */ + +#include <stdint.h> +#include <string.h> + +#include "frida-gumjs.h" + +#if defined(__x86_64__) + +__attribute__((visibility("default"))) void afl_persistent_hook( + GumCpuContext *regs, uint8_t *input_buf, uint32_t input_buf_len) { + + // do a length check matching the target! + + memcpy((void *)regs->rdi, input_buf, input_buf_len); + regs->rsi = input_buf_len; + +} + +#elif defined(__i386__) + +__attribute__((visibility("default"))) void afl_persistent_hook( + GumCpuContext *regs, uint8_t *input_buf, uint32_t input_buf_len) { + + // do a length check matching the target! + + void **esp = (void **)regs->esp; + void * arg1 = esp[0]; + void **arg2 = &esp[1]; + memcpy(arg1, input_buf, input_buf_len); + *arg2 = (void *)input_buf_len; + +} + +#elif defined(__aarch64__) + +__attribute__((visibility("default"))) void afl_persistent_hook( + GumCpuContext *regs, uint8_t *input_buf, uint32_t input_buf_len) { + + // do a length check matching the target! + + memcpy((void *)regs->x[0], input_buf, input_buf_len); + regs->x[1] = input_buf_len; + +} + +#else + #pragma error "Unsupported architecture" +#endif + +__attribute__((visibility("default"))) int afl_persistent_hook_init(void) { + + // 1 for shared memory input (faster), 0 for normal input (you have to use + // read(), input_buf will be NULL) + return 1; + +} + diff --git a/frida_mode/hook/hook.c b/frida_mode/hook/hook.c deleted file mode 100644 index 7d08101f..00000000 --- a/frida_mode/hook/hook.c +++ /dev/null @@ -1,50 +0,0 @@ -#include <stdint.h> -#include <string.h> - -#include "frida-gumjs.h" - -#if defined(__x86_64__) - -void afl_persistent_hook(GumCpuContext *regs, uint8_t *input_buf, - uint32_t input_buf_len) { - - memcpy((void *)regs->rdi, input_buf, input_buf_len); - regs->rsi = input_buf_len; - -} - -#elif defined(__i386__) - -void afl_persistent_hook(GumCpuContext *regs, uint8_t *input_buf, - uint32_t input_buf_len) { - - void **esp = (void **)regs->esp; - void * arg1 = esp[0]; - void **arg2 = &esp[1]; - memcpy(arg1, input_buf, input_buf_len); - *arg2 = (void *)input_buf_len; - -} - -#elif defined(__aarch64__) - -void afl_persistent_hook(GumCpuContext *regs, uint8_t *input_buf, - uint32_t input_buf_len) { - - memcpy((void *)regs->x[0], input_buf, input_buf_len); - regs->x[1] = input_buf_len; - -} - -#else - #pragma error "Unsupported architecture" -#endif - -int afl_persistent_hook_init(void) { - - // 1 for shared memory input (faster), 0 for normal input (you have to use - // read(), input_buf will be NULL) - return 1; - -} - diff --git a/frida_mode/hook/qemu_hook.c b/frida_mode/hook/qemu_hook.c new file mode 100644 index 00000000..56e787e3 --- /dev/null +++ b/frida_mode/hook/qemu_hook.c @@ -0,0 +1,195 @@ +#include <stdint.h> +#include <string.h> + +#if defined(__x86_64__) + +struct x86_64_regs { + + uint64_t rax, rbx, rcx, rdx, rdi, rsi, rbp, r8, r9, r10, r11, r12, r13, r14, + r15; + + union { + + uint64_t rip; + uint64_t pc; + + }; + + union { + + uint64_t rsp; + uint64_t sp; + + }; + + union { + + uint64_t rflags; + uint64_t flags; + + }; + + uint8_t zmm_regs[32][64]; + +}; + +void afl_persistent_hook(struct x86_64_regs *regs, uint64_t guest_base, + uint8_t *input_buf, uint32_t input_buf_len) { + + (void)guest_base; /* unused */ + memcpy((void *)regs->rdi, input_buf, input_buf_len); + regs->rsi = input_buf_len; + +} + +#elif defined(__i386__) + +struct x86_regs { + + uint32_t eax, ebx, ecx, edx, edi, esi, ebp; + + union { + + uint32_t eip; + uint32_t pc; + + }; + + union { + + uint32_t esp; + uint32_t sp; + + }; + + union { + + uint32_t eflags; + uint32_t flags; + + }; + + uint8_t xmm_regs[8][16]; + +}; + +void afl_persistent_hook(struct x86_regs *regs, uint64_t guest_base, + uint8_t *input_buf, uint32_t input_buf_len) { + + (void)guest_base; /* unused */ + void **esp = (void **)regs->esp; + void * arg1 = esp[1]; + void **arg2 = &esp[2]; + memcpy(arg1, input_buf, input_buf_len); + *arg2 = (void *)input_buf_len; + +} +#elif defined(__aarch64__) + +struct arm64_regs { + + uint64_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10; + + union { + + uint64_t x11; + uint32_t fp_32; + + }; + + union { + + uint64_t x12; + uint32_t ip_32; + + }; + + union { + + uint64_t x13; + uint32_t sp_32; + + }; + + union { + + uint64_t x14; + uint32_t lr_32; + + }; + + union { + + uint64_t x15; + uint32_t pc_32; + + }; + + union { + + uint64_t x16; + uint64_t ip0; + + }; + + union { + + uint64_t x17; + uint64_t ip1; + + }; + + uint64_t x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28; + + union { + + uint64_t x29; + uint64_t fp; + + }; + + union { + + uint64_t x30; + uint64_t lr; + + }; + + union { + + uint64_t x31; + uint64_t sp; + + }; + + // the zero register is not saved here ofc + + uint64_t pc; + + uint32_t cpsr; + + uint8_t vfp_zregs[32][16 * 16]; + uint8_t vfp_pregs[17][32]; + uint32_t vfp_xregs[16]; + +}; + +void afl_persistent_hook(struct arm64_regs *regs, uint64_t guest_base, + uint8_t *input_buf, uint32_t input_buf_len) { + + (void)guest_base; /* unused */ + memcpy((void *)regs->x0, input_buf, input_buf_len); + regs->x1 = input_buf_len; +} + +#else + #pragma error "Unsupported architecture" +#endif + +int afl_persistent_hook_init(void) { + + // 1 for shared memory input (faster), 0 for normal input (you have to use + // read(), input_buf will be NULL) + return 1; + +} diff --git a/frida_mode/include/entry.h b/frida_mode/include/entry.h index 801c2bbe..cbc5c8c7 100644 --- a/frida_mode/include/entry.h +++ b/frida_mode/include/entry.h @@ -3,7 +3,8 @@ #include "frida-gumjs.h" -extern guint64 entry_point; +extern guint64 entry_point; +extern gboolean entry_reached; void entry_config(void); diff --git a/frida_mode/include/instrument.h b/frida_mode/include/instrument.h index 9c8d3a5d..29f14da9 100644 --- a/frida_mode/include/instrument.h +++ b/frida_mode/include/instrument.h @@ -5,11 +5,15 @@ #include "config.h" -extern char * instrument_debug_filename; -extern gboolean instrument_tracing; -extern gboolean instrument_optimize; -extern gboolean instrument_unique; -extern __thread uint64_t instrument_previous_pc; +extern char * instrument_debug_filename; +extern gboolean instrument_tracing; +extern gboolean instrument_optimize; +extern gboolean instrument_unique; +extern __thread guint64 instrument_previous_pc; +extern guint64 instrument_hash_zero; + +extern gboolean instrument_use_fixed_seed; +extern guint64 instrument_fixed_seed; extern uint8_t *__afl_area_ptr; extern uint32_t __afl_map_size; @@ -33,5 +37,10 @@ void instrument_debug_instruction(uint64_t address, uint16_t size); void instrument_debug_end(GumStalkerOutput *output); void instrument_flush(GumStalkerOutput *output); gpointer instrument_cur(GumStalkerOutput *output); + +void instrument_on_fork(); + +guint64 instrument_get_offset_hash(GumAddress current_rip); + #endif diff --git a/frida_mode/include/persistent.h b/frida_mode/include/persistent.h index 8f00196c..c79f0143 100644 --- a/frida_mode/include/persistent.h +++ b/frida_mode/include/persistent.h @@ -30,7 +30,10 @@ void persistent_init(void); gboolean persistent_is_supported(void); void persistent_prologue(GumStalkerOutput *output); +void persistent_prologue_arch(GumStalkerOutput *output); + void persistent_epilogue(GumStalkerOutput *output); +void persistent_epilogue_arch(GumStalkerOutput *output); #endif diff --git a/frida_mode/include/ranges.h b/frida_mode/include/ranges.h index a667fb76..2eb9b355 100644 --- a/frida_mode/include/ranges.h +++ b/frida_mode/include/ranges.h @@ -5,6 +5,7 @@ extern gboolean ranges_debug_maps; extern gboolean ranges_inst_libs; +extern gboolean ranges_inst_jit; void ranges_config(void); void ranges_init(void); diff --git a/frida_mode/include/stalker.h b/frida_mode/include/stalker.h index 2136fe52..b5e05d5a 100644 --- a/frida_mode/include/stalker.h +++ b/frida_mode/include/stalker.h @@ -7,6 +7,7 @@ void stalker_config(void); void stalker_init(void); GumStalker *stalker_get(void); void stalker_start(void); +void stalker_trust(void); #endif diff --git a/frida_mode/include/stats.h b/frida_mode/include/stats.h index 1cfd6b8f..cd2350ea 100644 --- a/frida_mode/include/stats.h +++ b/frida_mode/include/stats.h @@ -28,6 +28,7 @@ gboolean stats_is_supported_arch(void); size_t stats_data_size_arch(void); void stats_collect_arch(const cs_insn *instr); void stats_write_arch(void); +void stats_on_fork(void); #endif diff --git a/frida_mode/many-linux/Dockerfile b/frida_mode/many-linux/Dockerfile new file mode 100644 index 00000000..2cd56bc8 --- /dev/null +++ b/frida_mode/many-linux/Dockerfile @@ -0,0 +1,24 @@ +FROM fridadotre/manylinux-x86_64 + +COPY realpath /bin/realpath +RUN chmod +x /bin/realpath + +RUN yum -y install xz +RUN yum -y install vim-common + +WORKDIR / +RUN git clone https://github.com/AFLplusplus/AFLplusplus.git + +WORKDIR /AFLplusplus +RUN mkdir -p /AFLplusplus/frida_mode/build/frida/ +RUN curl -L -o /AFLplusplus/frida_mode/build/frida/frida-gumjs-devkit-15.0.0-linux-x86_64.tar.xz "https://github.com/frida/frida/releases/download/15.0.0/frida-gumjs-devkit-15.0.0-linux-x86_64.tar.xz" + +WORKDIR /AFLplusplus +RUN git checkout dev +WORKDIR /AFLplusplus/frida_mode +ENV CFLAGS="\ + -DADDR_NO_RANDOMIZE=0x0040000 \ + -Wno-implicit-function-declaration \ + " +ENV CXX=$CC +RUN make diff --git a/frida_mode/many-linux/GNUmakefile b/frida_mode/many-linux/GNUmakefile new file mode 100644 index 00000000..2860f20c --- /dev/null +++ b/frida_mode/many-linux/GNUmakefile @@ -0,0 +1,21 @@ +PWD:=$(shell pwd)/ +BUILD_DIR:=$(PWD)build/ + +.PHONY: all clean shell + +all: | $(BUILD_DIR) + docker build --tag many-afl-frida . + docker run --rm \ + -v $(PWD)build/:/export \ + many-afl-frida \ + cp /AFLplusplus/afl-frida-trace.so /export + +$(BUILD_DIR): + mkdir -p $@ + +clean: + rm -rf $(BUILD_DIR) + docker images --filter 'dangling=true' -q --no-trunc | xargs -L1 docker rmi --force + +shell: + docker run -ti --rm many-afl-frida /bin/bash diff --git a/frida_mode/many-linux/Makefile b/frida_mode/many-linux/Makefile new file mode 100644 index 00000000..f3c3cd55 --- /dev/null +++ b/frida_mode/many-linux/Makefile @@ -0,0 +1,9 @@ +all: + @echo trying to use GNU make... + @gmake all || echo please install GNUmake + +clean: + @gmake clean + +shell: + @gmake shell diff --git a/frida_mode/many-linux/README.md b/frida_mode/many-linux/README.md new file mode 100644 index 00000000..2c7b6823 --- /dev/null +++ b/frida_mode/many-linux/README.md @@ -0,0 +1,8 @@ +# many-linux + +This folder contains a Docker image to allow the building of +`afl-frida-trace.so` using the `many-linux` docker image. This docker image is +based on CentOS Linux 5. By building `afl-frida-trace.so` for such an old +version of Linux, given the strong backward compatibility of Linux, this should +work on the majority of Linux environments. This may be useful for targetting +Linux distributions other than your development environment. \ No newline at end of file diff --git a/frida_mode/many-linux/realpath b/frida_mode/many-linux/realpath new file mode 100644 index 00000000..1fdc49a7 --- /dev/null +++ b/frida_mode/many-linux/realpath @@ -0,0 +1,2 @@ +#!/bin/sh +readlink -f -- "$@" diff --git a/frida_mode/src/ctx/ctx_arm32.c b/frida_mode/src/ctx/ctx_arm32.c index a354c117..9fc70fb4 100644 --- a/frida_mode/src/ctx/ctx_arm32.c +++ b/frida_mode/src/ctx/ctx_arm32.c @@ -6,7 +6,7 @@ #if defined(__arm__) -gsize ctx_read_reg(GumIA32CpuContext *ctx, x86_reg reg) { +gsize ctx_read_reg(GumArmCpuContext *ctx, arm_reg reg) { FATAL("ctx_read_reg unimplemented for this architecture"); diff --git a/frida_mode/src/entry.c b/frida_mode/src/entry.c index e95b923b..a0ffd028 100644 --- a/frida_mode/src/entry.c +++ b/frida_mode/src/entry.c @@ -4,12 +4,16 @@ #include "entry.h" #include "instrument.h" +#include "persistent.h" +#include "ranges.h" #include "stalker.h" +#include "stats.h" #include "util.h" extern void __afl_manual_init(); -guint64 entry_point = 0; +guint64 entry_point = 0; +gboolean entry_reached = FALSE; static void entry_launch(void) { @@ -17,7 +21,8 @@ static void entry_launch(void) { __afl_manual_init(); /* Child here */ - instrument_previous_pc = 0; + instrument_on_fork(); + stats_on_fork(); } @@ -50,6 +55,16 @@ static void entry_callout(GumCpuContext *cpu_context, gpointer user_data) { void entry_prologue(GumStalkerIterator *iterator, GumStalkerOutput *output) { UNUSED_PARAMETER(output); + OKF("AFL_ENTRYPOINT reached"); + + if (persistent_start == 0) { + + entry_reached = TRUE; + ranges_exclude(); + stalker_trust(); + + } + gum_stalker_iterator_put_callout(iterator, entry_callout, NULL, NULL); } diff --git a/frida_mode/src/instrument/instrument.c b/frida_mode/src/instrument/instrument.c index 2a217d96..67aafa5a 100644 --- a/frida_mode/src/instrument/instrument.c +++ b/frida_mode/src/instrument/instrument.c @@ -1,11 +1,13 @@ #include <unistd.h> #include <sys/shm.h> #include <sys/mman.h> +#include <sys/syscall.h> #include "frida-gumjs.h" #include "config.h" #include "debug.h" +#include "hash.h" #include "asan.h" #include "entry.h" @@ -22,10 +24,15 @@ gboolean instrument_tracing = false; gboolean instrument_optimize = false; gboolean instrument_unique = false; +guint64 instrument_hash_zero = 0; +guint64 instrument_hash_seed = 0; + +gboolean instrument_use_fixed_seed = FALSE; +guint64 instrument_fixed_seed = 0; static GumStalkerTransformer *transformer = NULL; -__thread uint64_t instrument_previous_pc = 0; +__thread guint64 instrument_previous_pc = 0; static GumAddress previous_rip = 0; static u8 * edges_notified = NULL; @@ -49,21 +56,18 @@ static void trace_debug(char *format, ...) { } -__attribute__((hot)) static void on_basic_block(GumCpuContext *context, - gpointer user_data) { +guint64 instrument_get_offset_hash(GumAddress current_rip) { - UNUSED_PARAMETER(context); + guint64 area_offset = hash64((unsigned char *)¤t_rip, + sizeof(GumAddress), instrument_hash_seed); + return area_offset &= MAP_SIZE - 1; - GumAddress current_rip = GUM_ADDRESS(user_data); - GumAddress current_pc; - GumAddress edge; - uint8_t * cursor; - uint64_t value; +} - current_pc = (current_rip >> 4) ^ (current_rip << 8); - current_pc &= MAP_SIZE - 1; +__attribute__((hot)) static void instrument_increment_map(GumAddress edge) { - edge = current_pc ^ instrument_previous_pc; + uint8_t *cursor; + uint64_t value; cursor = &__afl_area_ptr[edge]; value = *cursor; @@ -79,7 +83,21 @@ __attribute__((hot)) static void on_basic_block(GumCpuContext *context, } *cursor = value; - instrument_previous_pc = current_pc >> 1; + +} + +__attribute__((hot)) static void on_basic_block(GumCpuContext *context, + gpointer user_data) { + + UNUSED_PARAMETER(context); + + GumAddress current_rip = GUM_ADDRESS(user_data); + guint64 current_pc = instrument_get_offset_hash(current_rip); + guint64 edge; + + edge = current_pc ^ instrument_previous_pc; + + instrument_increment_map(edge); if (unlikely(instrument_tracing)) { @@ -98,6 +116,9 @@ __attribute__((hot)) static void on_basic_block(GumCpuContext *context, } + instrument_previous_pc = + ((current_pc & (MAP_SIZE - 1) >> 1)) | ((current_pc & 0x1) << 15); + } static void instrument_basic_block(GumStalkerIterator *iterator, @@ -149,7 +170,13 @@ static void instrument_basic_block(GumStalkerIterator *iterator, if (unlikely(begin)) { - prefetch_write(GSIZE_TO_POINTER(instr->address)); + instrument_debug_start(instr->address, output); + + if (likely(entry_reached)) { + + prefetch_write(GSIZE_TO_POINTER(instr->address)); + + } if (likely(!excluded)) { @@ -197,6 +224,8 @@ void instrument_config(void) { instrument_optimize = (getenv("AFL_FRIDA_INST_NO_OPTIMIZE") == NULL); instrument_tracing = (getenv("AFL_FRIDA_INST_TRACE") != NULL); instrument_unique = (getenv("AFL_FRIDA_INST_TRACE_UNIQUE") != NULL); + instrument_use_fixed_seed = (getenv("AFL_FRIDA_INST_SEED") != NULL); + instrument_fixed_seed = util_read_num("AFL_FRIDA_INST_SEED"); instrument_debug_config(); asan_config(); @@ -211,16 +240,20 @@ void instrument_init(void) { OKF("Instrumentation - optimize [%c]", instrument_optimize ? 'X' : ' '); OKF("Instrumentation - tracing [%c]", instrument_tracing ? 'X' : ' '); OKF("Instrumentation - unique [%c]", instrument_unique ? 'X' : ' '); + OKF("Instrumentation - fixed seed [%c] [0x%016" G_GINT64_MODIFIER "x]", + instrument_use_fixed_seed ? 'X' : ' ', instrument_fixed_seed); if (instrument_tracing && instrument_optimize) { - FATAL("AFL_FRIDA_INST_TRACE requires AFL_FRIDA_INST_NO_OPTIMIZE"); + WARNF("AFL_FRIDA_INST_TRACE implies AFL_FRIDA_INST_NO_OPTIMIZE"); + instrument_optimize = FALSE; } if (instrument_unique && instrument_optimize) { - FATAL("AFL_FRIDA_INST_TRACE_UNIQUE requires AFL_FRIDA_INST_NO_OPTIMIZE"); + WARNF("AFL_FRIDA_INST_TRACE_UNIQUE implies AFL_FRIDA_INST_NO_OPTIMIZE"); + instrument_optimize = FALSE; } @@ -244,7 +277,8 @@ void instrument_init(void) { g_assert(edges_notified != MAP_FAILED); /* - * Configure the shared memory region to be removed once the process dies. + * Configure the shared memory region to be removed once the process + * dies. */ if (shmctl(shm_id, IPC_RMID, NULL) < 0) { @@ -257,6 +291,31 @@ void instrument_init(void) { } + if (instrument_use_fixed_seed) { + + /* + * This configuration option may be useful for diagnostics or + * debugging. + */ + instrument_hash_seed = instrument_fixed_seed; + + } else { + + /* + * By using a different seed value for the hash, we can make different + * instances have edge collisions in different places when carrying out + * parallel fuzzing. The seed itself, doesn't have to be random, it + * just needs to be different for each instance. + */ + instrument_hash_seed = g_get_monotonic_time() ^ + (((guint64)getpid()) << 32) ^ syscall(SYS_gettid); + + } + + OKF("Instrumentation - seed [0x%016" G_GINT64_MODIFIER "x]", + instrument_hash_seed); + instrument_hash_zero = instrument_get_offset_hash(0); + instrument_debug_init(); asan_init(); cmplog_init(); @@ -270,3 +329,9 @@ GumStalkerTransformer *instrument_get_transformer(void) { } +void instrument_on_fork() { + + instrument_previous_pc = instrument_hash_zero; + +} + diff --git a/frida_mode/src/instrument/instrument_arm64.c b/frida_mode/src/instrument/instrument_arm64.c index 17f97c97..cf37e048 100644 --- a/frida_mode/src/instrument/instrument_arm64.c +++ b/frida_mode/src/instrument/instrument_arm64.c @@ -12,15 +12,15 @@ static GumAddress current_log_impl = GUM_ADDRESS(0); static const guint8 afl_log_code[] = { // __afl_area_ptr[current_pc ^ previous_pc]++; - // previous_pc = current_pc >> 1; + // previous_pc = current_pc ROR 1; 0xE1, 0x0B, 0xBF, 0xA9, // stp x1, x2, [sp, -0x10]! 0xE3, 0x13, 0xBF, 0xA9, // stp x3, x4, [sp, -0x10]! // x0 = current_pc - 0xe1, 0x01, 0x00, 0x58, // ldr x1, #0x3c, =&__afl_area_ptr + 0x21, 0x02, 0x00, 0x58, // ldr x1, #0x44, =&__afl_area_ptr 0x21, 0x00, 0x40, 0xf9, // ldr x1, [x1] (=__afl_area_ptr) - 0xe2, 0x01, 0x00, 0x58, // ldr x2, #0x3c, =&previous_pc + 0x22, 0x02, 0x00, 0x58, // ldr x2, #0x44, =&previous_pc 0x42, 0x00, 0x40, 0xf9, // ldr x2, [x2] (=previous_pc) // __afl_area_ptr[current_pc ^ previous_pc]++; @@ -30,8 +30,11 @@ static const guint8 afl_log_code[] = { 0x63, 0x00, 0x1f, 0x9a, // adc x3, x3, xzr 0x23, 0x68, 0x22, 0xf8, // str x3, [x1, x2] - // previous_pc = current_pc >> 1; - 0xe0, 0x07, 0x40, 0x8b, // add x0, xzr, x0, LSR #1 + // previous_pc = current_pc ROR 1; + 0xe4, 0x07, 0x40, 0x8b, // add x4, xzr, x0, LSR #1 + 0xe0, 0xff, 0x00, 0x8b, // add x0, xzr, x0, LSL #63 + 0x80, 0xc0, 0x40, 0x8b, // add x0, x4, x0, LSR #48 + 0xe2, 0x00, 0x00, 0x58, // ldr x2, #0x1c, =&previous_pc 0x40, 0x00, 0x00, 0xf9, // str x0, [x2] @@ -54,8 +57,7 @@ void instrument_coverage_optimize(const cs_insn * instr, GumStalkerOutput *output) { guint64 current_pc = instr->address; - guint64 area_offset = (current_pc >> 4) ^ (current_pc << 8); - area_offset &= MAP_SIZE - 1; + guint64 area_offset = instrument_get_offset_hash(GUM_ADDRESS(instr->address)); GumArm64Writer *cw = output->writer.arm64; if (current_log_impl == 0 || diff --git a/frida_mode/src/instrument/instrument_x64.c b/frida_mode/src/instrument/instrument_x64.c index a38b5b14..fec8afbb 100644 --- a/frida_mode/src/instrument/instrument_x64.c +++ b/frida_mode/src/instrument/instrument_x64.c @@ -10,23 +10,21 @@ static GumAddress current_log_impl = GUM_ADDRESS(0); static const guint8 afl_log_code[] = { - // 0xcc, - 0x9c, /* pushfq */ 0x51, /* push rcx */ 0x52, /* push rdx */ - 0x48, 0x8b, 0x0d, 0x28, + 0x48, 0x8b, 0x0d, 0x26, 0x00, 0x00, 0x00, /* mov rcx, sym.&previous_pc */ 0x48, 0x8b, 0x11, /* mov rdx, qword [rcx] */ 0x48, 0x31, 0xfa, /* xor rdx, rdi */ - 0x48, 0x03, 0x15, 0x13, + 0x48, 0x03, 0x15, 0x11, 0x00, 0x00, 0x00, /* add rdx, sym._afl_area_ptr_ptr */ 0x80, 0x02, 0x01, /* add byte ptr [rdx], 1 */ 0x80, 0x12, 0x00, /* adc byte ptr [rdx], 0 */ - 0x48, 0xd1, 0xef, /* shr rdi, 1 */ + 0x66, 0xd1, 0xcf, /* ror di, 1 */ 0x48, 0x89, 0x39, /* mov qword [rcx], rdi */ 0x5a, /* pop rdx */ @@ -34,7 +32,8 @@ static const guint8 afl_log_code[] = { 0x9d, /* popfq */ 0xc3, /* ret */ - 0x90, 0x90, 0x90 /* nop pad */ + + 0x90 /* Read-only data goes here: */ /* uint8_t* __afl_area_ptr */ @@ -48,12 +47,11 @@ gboolean instrument_is_coverage_optimize_supported(void) { } -void instrument_coverage_optimize(const cs_insn * instr, - GumStalkerOutput *output) { +static guint8 align_pad[] = {0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90}; - guint64 current_pc = instr->address; - guint64 area_offset = (current_pc >> 4) ^ (current_pc << 8); - area_offset &= MAP_SIZE - 1; +static void instrument_coverate_write_function(GumStalkerOutput *output) { + + guint64 misalign = 0; GumX86Writer *cw = output->writer.x86; if (current_log_impl == 0 || @@ -65,6 +63,13 @@ void instrument_coverage_optimize(const cs_insn * instr, gum_x86_writer_put_jmp_near_label(cw, after_log_impl); + misalign = (cw->pc & 0x7); + if (misalign != 0) { + + gum_x86_writer_put_bytes(cw, align_pad, 8 - misalign); + + } + current_log_impl = cw->pc; gum_x86_writer_put_bytes(cw, afl_log_code, sizeof(afl_log_code)); @@ -78,6 +83,15 @@ void instrument_coverage_optimize(const cs_insn * instr, } +} + +void instrument_coverage_optimize(const cs_insn * instr, + GumStalkerOutput *output) { + + GumX86Writer *cw = output->writer.x86; + guint64 area_offset = instrument_get_offset_hash(GUM_ADDRESS(instr->address)); + instrument_coverate_write_function(output); + gum_x86_writer_put_lea_reg_reg_offset(cw, GUM_REG_RSP, GUM_REG_RSP, -GUM_RED_ZONE_SIZE); gum_x86_writer_put_push_reg(cw, GUM_REG_RDI); diff --git a/frida_mode/src/instrument/instrument_x86.c b/frida_mode/src/instrument/instrument_x86.c index 3c3dc272..7bf48f96 100644 --- a/frida_mode/src/instrument/instrument_x86.c +++ b/frida_mode/src/instrument/instrument_x86.c @@ -30,7 +30,8 @@ static void instrument_coverage_function(GumX86Writer *cw) { uint8_t adc_byte_ptr_edx_0[] = {0x80, 0x12, 0x00}; gum_x86_writer_put_bytes(cw, adc_byte_ptr_edx_0, sizeof(adc_byte_ptr_edx_0)); - gum_x86_writer_put_shr_reg_u8(cw, GUM_REG_EDI, 1); + uint8_t ror_di_1[] = {0x66, 0xd1, 0xcf}; + gum_x86_writer_put_bytes(cw, ror_di_1, sizeof(ror_di_1)); gum_x86_writer_put_mov_reg_ptr_reg(cw, GUM_REG_ECX, GUM_REG_EDI); gum_x86_writer_put_pop_reg(cw, GUM_REG_EDX); @@ -46,15 +47,8 @@ gboolean instrument_is_coverage_optimize_supported(void) { } -void instrument_coverage_optimize(const cs_insn * instr, - GumStalkerOutput *output) { - - UNUSED_PARAMETER(instr); - UNUSED_PARAMETER(output); +static void instrument_coverate_write_function(GumStalkerOutput *output) { - guint64 current_pc = instr->address; - guint64 area_offset = (current_pc >> 4) ^ (current_pc << 8); - area_offset &= MAP_SIZE - 1; GumX86Writer *cw = output->writer.x86; if (current_log_impl == 0 || @@ -73,7 +67,15 @@ void instrument_coverage_optimize(const cs_insn * instr, } - // gum_x86_writer_put_breakpoint(cw); +} + +void instrument_coverage_optimize(const cs_insn * instr, + GumStalkerOutput *output) { + + GumX86Writer *cw = output->writer.x86; + guint64 area_offset = instrument_get_offset_hash(GUM_ADDRESS(instr->address)); + instrument_coverate_write_function(output); + gum_x86_writer_put_push_reg(cw, GUM_REG_EDI); gum_x86_writer_put_mov_reg_address(cw, GUM_REG_EDI, area_offset); gum_x86_writer_put_call_address(cw, current_log_impl); diff --git a/frida_mode/src/js/api.js b/frida_mode/src/js/api.js index 4cb04704..b8f2d39a 100644 --- a/frida_mode/src/js/api.js +++ b/frida_mode/src/js/api.js @@ -100,6 +100,12 @@ class Afl { Afl.jsApiSetInstrumentTrace(); } /** + * See `AFL_FRIDA_INST_JIT`. + */ + static setInstrumentJit() { + Afl.jsApiSetInstrumentJit(); + } + /** * See `AFL_INST_LIBS`. */ static setInstrumentLibraries() { @@ -111,6 +117,12 @@ class Afl { static setInstrumentNoOptimize() { Afl.jsApiSetInstrumentNoOptimize(); } + /* + * See `AFL_FRIDA_INST_SEED` + */ + static setInstrumentSeed(seed) { + Afl.jsApiSetInstrumentSeed(seed); + } /** * See `AFL_FRIDA_INST_TRACE_UNIQUE`. */ @@ -222,8 +234,10 @@ Afl.jsApiError = Afl.jsApiGetFunction("js_api_error", "void", ["pointer"]); Afl.jsApiSetDebugMaps = Afl.jsApiGetFunction("js_api_set_debug_maps", "void", []); Afl.jsApiSetEntryPoint = Afl.jsApiGetFunction("js_api_set_entrypoint", "void", ["pointer"]); Afl.jsApiSetInstrumentDebugFile = Afl.jsApiGetFunction("js_api_set_instrument_debug_file", "void", ["pointer"]); +Afl.jsApiSetInstrumentJit = Afl.jsApiGetFunction("js_api_set_instrument_jit", "void", []); Afl.jsApiSetInstrumentLibraries = Afl.jsApiGetFunction("js_api_set_instrument_libraries", "void", []); Afl.jsApiSetInstrumentNoOptimize = Afl.jsApiGetFunction("js_api_set_instrument_no_optimize", "void", []); +Afl.jsApiSetInstrumentSeed = Afl.jsApiGetFunction("js_api_set_instrument_seed", "void", ["uint64"]); Afl.jsApiSetInstrumentTrace = Afl.jsApiGetFunction("js_api_set_instrument_trace", "void", []); Afl.jsApiSetInstrumentTraceUnique = Afl.jsApiGetFunction("js_api_set_instrument_trace_unique", "void", []); Afl.jsApiSetPersistentAddress = Afl.jsApiGetFunction("js_api_set_persistent_address", "void", ["pointer"]); diff --git a/frida_mode/src/js/js.c b/frida_mode/src/js/js.c index ed378d2c..e3cd4933 100644 --- a/frida_mode/src/js/js.c +++ b/frida_mode/src/js/js.c @@ -9,12 +9,15 @@ static char * js_script = NULL; gboolean js_done = FALSE; js_api_stalker_callback_t js_user_callback = NULL; -static gchar * filename = "afl.js"; -static gchar * contents; -static GumScriptBackend *backend; -static GCancellable * cancellable = NULL; -static GError * error = NULL; -static GumScript * script; +static gchar * filename = "afl.js"; +static gchar * contents; +static GumScriptBackend * backend; +static GCancellable * cancellable = NULL; +static GError * error = NULL; +static GumScript * script; +static GumScriptScheduler *scheduler; +static GMainContext * context; +static GMainLoop * main_loop; static void js_msg(GumScript *script, const gchar *message, GBytes *data, gpointer user_data) { @@ -80,31 +83,48 @@ static void js_print_script(gchar *source) { } -void js_start(void) { +static void load_cb(GObject *source_object, GAsyncResult *result, + gpointer user_data) { - GMainContext *context; + UNUSED_PARAMETER(source_object); + UNUSED_PARAMETER(user_data); + gum_script_load_finish(script, result); + if (error != NULL) { FATAL("Failed to load script - %s", error->message); } - gchar *source = js_get_script(); - if (source == NULL) { return; } - js_print_script(source); +} - backend = gum_script_backend_obtain_qjs(); +static void create_cb(GObject *source_object, GAsyncResult *result, + gpointer user_data) { - script = gum_script_backend_create_sync(backend, "example", source, - cancellable, &error); + UNUSED_PARAMETER(source_object); + UNUSED_PARAMETER(user_data); + script = gum_script_backend_create_finish(backend, result, &error); + if (error != NULL) { FATAL("Failed to create script: %s", error->message); } - if (error != NULL) { + gum_script_set_message_handler(script, js_msg, NULL, NULL); - g_printerr("%s\n", error->message); - FATAL("Error processing script"); + gum_script_load(script, cancellable, load_cb, NULL); - } +} - gum_script_set_message_handler(script, js_msg, NULL, NULL); +void js_start(void) { + + gchar *source = js_get_script(); + if (source == NULL) { return; } + js_print_script(source); + + scheduler = gum_script_backend_get_scheduler(); + gum_script_scheduler_disable_background_thread(scheduler); + + backend = gum_script_backend_obtain_qjs(); + + context = gum_script_scheduler_get_js_context(scheduler); + main_loop = g_main_loop_new(context, true); + g_main_context_push_thread_default(context); - gum_script_load_sync(script, cancellable); + gum_script_backend_create(backend, "example", source, cancellable, create_cb, + &error); - context = g_main_context_get_thread_default(); while (g_main_context_pending(context)) g_main_context_iteration(context, FALSE); diff --git a/frida_mode/src/js/js_api.c b/frida_mode/src/js/js_api.c index 91dccab2..930a6dc0 100644 --- a/frida_mode/src/js/js_api.c +++ b/frida_mode/src/js/js_api.c @@ -9,142 +9,191 @@ #include "ranges.h" #include "stats.h" #include "util.h" - -void js_api_done() { +__attribute__((visibility("default"))) void js_api_done() { js_done = TRUE; } -void js_api_error(char *msg) { +__attribute__((visibility("default"))) void js_api_error(char *msg) { FATAL("%s", msg); } -void js_api_set_entrypoint(void *address) { +__attribute__((visibility("default"))) void js_api_set_entrypoint( + void *address) { + + if (address == NULL) { + + js_api_error("js_api_set_entrypoint called with NULL"); + + } entry_point = GPOINTER_TO_SIZE(address); } -void js_api_set_persistent_address(void *address) { +__attribute__((visibility("default"))) void js_api_set_persistent_address( + void *address) { + + if (address == NULL) { + + js_api_error("js_api_set_persistent_address called with NULL"); + + } persistent_start = GPOINTER_TO_SIZE(address); } -void js_api_set_persistent_return(void *address) { +__attribute__((visibility("default"))) void js_api_set_persistent_return( + void *address) { + + if (address == NULL) { + + js_api_error("js_api_set_persistent_return called with NULL"); + + } persistent_ret = GPOINTER_TO_SIZE(address); } -void js_api_set_persistent_count(uint64_t count) { +__attribute__((visibility("default"))) void js_api_set_persistent_count( + uint64_t count) { persistent_count = count; } -void js_api_set_persistent_debug() { +__attribute__((visibility("default"))) void js_api_set_persistent_debug() { persistent_debug = TRUE; } -void js_api_set_debug_maps() { +__attribute__((visibility("default"))) void js_api_set_debug_maps() { ranges_debug_maps = TRUE; } -void js_api_add_include_range(void *address, gsize size) { +__attribute__((visibility("default"))) void js_api_add_include_range( + void *address, gsize size) { GumMemoryRange range = {.base_address = GUM_ADDRESS(address), .size = size}; ranges_add_include(&range); } -void js_api_add_exclude_range(void *address, gsize size) { +__attribute__((visibility("default"))) void js_api_add_exclude_range( + void *address, gsize size) { GumMemoryRange range = {.base_address = GUM_ADDRESS(address), .size = size}; ranges_add_exclude(&range); } -void js_api_set_instrument_libraries() { +__attribute__((visibility("default"))) void js_api_set_instrument_jit() { + + ranges_inst_jit = TRUE; + +} + +__attribute__((visibility("default"))) void js_api_set_instrument_libraries() { ranges_inst_libs = TRUE; } -void js_api_set_instrument_debug_file(char *path) { +__attribute__((visibility("default"))) void js_api_set_instrument_debug_file( + char *path) { instrument_debug_filename = g_strdup(path); } -void js_api_set_prefetch_disable(void) { +__attribute__((visibility("default"))) void js_api_set_prefetch_disable(void) { prefetch_enable = FALSE; } -void js_api_set_instrument_no_optimize(void) { +__attribute__((visibility("default"))) void js_api_set_instrument_no_optimize( + void) { instrument_optimize = FALSE; } -void js_api_set_instrument_trace(void) { +__attribute__((visibility("default"))) void js_api_set_instrument_seed( + guint64 seed) { + + instrument_use_fixed_seed = TRUE; + instrument_fixed_seed = seed; + +} + +__attribute__((visibility("default"))) void js_api_set_instrument_trace(void) { instrument_tracing = TRUE; } -void js_api_set_instrument_trace_unique(void) { +__attribute__((visibility("default"))) void js_api_set_instrument_trace_unique( + void) { instrument_unique = TRUE; } -void js_api_set_stdout(char *file) { +__attribute__((visibility("default"))) void js_api_set_stdout(char *file) { output_stdout = g_strdup(file); } -void js_api_set_stderr(char *file) { +__attribute__((visibility("default"))) void js_api_set_stderr(char *file) { output_stderr = g_strdup(file); } -void js_api_set_stats_file(char *file) { +__attribute__((visibility("default"))) void js_api_set_stats_file(char *file) { stats_filename = g_strdup(file); } -void js_api_set_stats_interval(uint64_t interval) { +__attribute__((visibility("default"))) void js_api_set_stats_interval( + uint64_t interval) { stats_interval = interval; } -void js_api_set_stats_transitions() { +__attribute__((visibility("default"))) void js_api_set_stats_transitions() { stats_transitions = TRUE; } -void js_api_set_persistent_hook(void *address) { +__attribute__((visibility("default"))) void js_api_set_persistent_hook( + void *address) { + + if (address == NULL) { + + js_api_error("js_api_set_persistent_hook called with NULL"); + + } persistent_hook = address; } -void js_api_set_stalker_callback(const js_api_stalker_callback_t callback) { +__attribute__((visibility("default"))) void js_api_set_stalker_callback( + const js_api_stalker_callback_t callback) { js_user_callback = callback; diff --git a/frida_mode/src/main.c b/frida_mode/src/main.c index 85b0bbf3..91687046 100644 --- a/frida_mode/src/main.c +++ b/frida_mode/src/main.c @@ -163,7 +163,7 @@ static void afl_print_env(void) { } -void afl_frida_start(void) { +__attribute__((visibility("default"))) void afl_frida_start(void) { afl_print_cmdline(); afl_print_env(); diff --git a/frida_mode/src/persistent/persistent.c b/frida_mode/src/persistent/persistent.c index bcc59ea7..639a694e 100644 --- a/frida_mode/src/persistent/persistent.c +++ b/frida_mode/src/persistent/persistent.c @@ -5,7 +5,10 @@ #include "config.h" #include "debug.h" +#include "entry.h" #include "persistent.h" +#include "ranges.h" +#include "stalker.h" #include "util.h" int __afl_sharedmem_fuzzing = 0; @@ -83,3 +86,20 @@ void persistent_init(void) { } +void persistent_prologue(GumStalkerOutput *output) { + + OKF("AFL_FRIDA_PERSISTENT_ADDR reached"); + entry_reached = TRUE; + ranges_exclude(); + stalker_trust(); + persistent_prologue_arch(output); + +} + +void persistent_epilogue(GumStalkerOutput *output) { + + OKF("AFL_FRIDA_PERSISTENT_RET reached"); + persistent_epilogue_arch(output); + +} + diff --git a/frida_mode/src/persistent/persistent_arm32.c b/frida_mode/src/persistent/persistent_arm32.c index f12f1af8..769f1505 100644 --- a/frida_mode/src/persistent/persistent_arm32.c +++ b/frida_mode/src/persistent/persistent_arm32.c @@ -61,14 +61,14 @@ gboolean persistent_is_supported(void) { } -void persistent_prologue(GumStalkerOutput *output) { +void persistent_prologue_arch(GumStalkerOutput *output) { UNUSED_PARAMETER(output); FATAL("Persistent mode not supported on this architecture"); } -void persistent_epilogue(GumStalkerOutput *output) { +void persistent_epilogue_arch(GumStalkerOutput *output) { UNUSED_PARAMETER(output); FATAL("Persistent mode not supported on this architecture"); diff --git a/frida_mode/src/persistent/persistent_arm64.c b/frida_mode/src/persistent/persistent_arm64.c index 003f058a..3cd61cd5 100644 --- a/frida_mode/src/persistent/persistent_arm64.c +++ b/frida_mode/src/persistent/persistent_arm64.c @@ -237,7 +237,7 @@ static void instrument_exit(GumArm64Writer *cw) { static int instrument_afl_persistent_loop_func(void) { int ret = __afl_persistent_loop(persistent_count); - instrument_previous_pc = 0; + instrument_previous_pc = instrument_hash_zero; return ret; } @@ -299,7 +299,7 @@ static void instrument_persitent_save_lr(GumArm64Writer *cw) { } -void persistent_prologue(GumStalkerOutput *output) { +void persistent_prologue_arch(GumStalkerOutput *output) { /* * SAVE REGS @@ -366,7 +366,7 @@ void persistent_prologue(GumStalkerOutput *output) { } -void persistent_epilogue(GumStalkerOutput *output) { +void persistent_epilogue_arch(GumStalkerOutput *output) { GumArm64Writer *cw = output->writer.arm64; diff --git a/frida_mode/src/persistent/persistent_x64.c b/frida_mode/src/persistent/persistent_x64.c index b2186db1..c0bd9a09 100644 --- a/frida_mode/src/persistent/persistent_x64.c +++ b/frida_mode/src/persistent/persistent_x64.c @@ -174,7 +174,7 @@ static void instrument_exit(GumX86Writer *cw) { static int instrument_afl_persistent_loop_func(void) { int ret = __afl_persistent_loop(persistent_count); - instrument_previous_pc = 0; + instrument_previous_pc = instrument_hash_zero; return ret; } @@ -244,7 +244,7 @@ static void instrument_persitent_save_ret(GumX86Writer *cw) { } -void persistent_prologue(GumStalkerOutput *output) { +void persistent_prologue_arch(GumStalkerOutput *output) { /* * SAVE REGS @@ -313,7 +313,7 @@ void persistent_prologue(GumStalkerOutput *output) { } -void persistent_epilogue(GumStalkerOutput *output) { +void persistent_epilogue_arch(GumStalkerOutput *output) { GumX86Writer *cw = output->writer.x86; diff --git a/frida_mode/src/persistent/persistent_x86.c b/frida_mode/src/persistent/persistent_x86.c index f50bccb0..b911676a 100644 --- a/frida_mode/src/persistent/persistent_x86.c +++ b/frida_mode/src/persistent/persistent_x86.c @@ -130,7 +130,7 @@ static void instrument_exit(GumX86Writer *cw) { static int instrument_afl_persistent_loop_func(void) { int ret = __afl_persistent_loop(persistent_count); - instrument_previous_pc = 0; + instrument_previous_pc = instrument_hash_zero; return ret; } @@ -184,7 +184,7 @@ static void instrument_persitent_save_ret(GumX86Writer *cw) { } -void persistent_prologue(GumStalkerOutput *output) { +void persistent_prologue_arch(GumStalkerOutput *output) { /* * SAVE REGS @@ -251,7 +251,7 @@ void persistent_prologue(GumStalkerOutput *output) { } -void persistent_epilogue(GumStalkerOutput *output) { +void persistent_epilogue_arch(GumStalkerOutput *output) { GumX86Writer *cw = output->writer.x86; diff --git a/frida_mode/src/ranges.c b/frida_mode/src/ranges.c index 534f202b..6fdd65a7 100644 --- a/frida_mode/src/ranges.c +++ b/frida_mode/src/ranges.c @@ -19,9 +19,11 @@ typedef struct { gboolean ranges_debug_maps = FALSE; gboolean ranges_inst_libs = FALSE; +gboolean ranges_inst_jit = FALSE; static GArray *module_ranges = NULL; static GArray *libs_ranges = NULL; +static GArray *jit_ranges = NULL; static GArray *include_ranges = NULL; static GArray *exclude_ranges = NULL; static GArray *ranges = NULL; @@ -145,11 +147,13 @@ static void convert_name_token(gchar *token, GumMemoryRange *range) { static void convert_token(gchar *token, GumMemoryRange *range) { - if (g_strrstr(token, "-")) { + if (g_str_has_prefix(token, "0x")) { convert_address_token(token, range); - } else { + } + + else { convert_name_token(token, range); @@ -172,19 +176,27 @@ static gboolean print_ranges_callback(const GumRangeDetails *details, gpointer user_data) { UNUSED_PARAMETER(user_data); + if (details->file == NULL) { - OKF("MAP - 0x%016" G_GINT64_MODIFIER "x - 0x%016" G_GINT64_MODIFIER "X", + OKF("MAP - 0x%016" G_GINT64_MODIFIER "x - 0x%016" G_GINT64_MODIFIER + "X %c%c%c", details->range->base_address, - details->range->base_address + details->range->size); + details->range->base_address + details->range->size, + details->protection & GUM_PAGE_READ ? 'R' : '-', + details->protection & GUM_PAGE_WRITE ? 'W' : '-', + details->protection & GUM_PAGE_EXECUTE ? 'X' : '-'); } else { OKF("MAP - 0x%016" G_GINT64_MODIFIER "x - 0x%016" G_GINT64_MODIFIER - "X %s(0x%016" G_GINT64_MODIFIER "x)", + "X %c%c%c %s(0x%016" G_GINT64_MODIFIER "x)", details->range->base_address, details->range->base_address + details->range->size, - details->file->path, details->file->offset); + details->protection & GUM_PAGE_READ ? 'R' : '-', + details->protection & GUM_PAGE_WRITE ? 'W' : '-', + details->protection & GUM_PAGE_EXECUTE ? 'X' : '-', details->file->path, + details->file->offset); } @@ -329,6 +341,39 @@ static GArray *collect_libs_ranges(void) { } +static gboolean collect_jit_ranges_callback(const GumRangeDetails *details, + gpointer user_data) { + + GArray *ranges = (GArray *)user_data; + + /* If the executable code isn't backed by a file, it's probably JIT */ + if (details->file == NULL) { + + GumMemoryRange range = *details->range; + g_array_append_val(ranges, range); + + } + + return TRUE; + +} + +static GArray *collect_jit_ranges(void) { + + GArray *result; + result = g_array_new(false, false, sizeof(GumMemoryRange)); + if (!ranges_inst_jit) { + + gum_process_enumerate_ranges(GUM_PAGE_EXECUTE, collect_jit_ranges_callback, + result); + + } + + print_ranges("JIT", result); + return result; + +} + static gboolean intersect_range(GumMemoryRange *rr, GumMemoryRange *ra, GumMemoryRange *rb) { @@ -508,6 +553,14 @@ void ranges_config(void) { if (getenv("AFL_FRIDA_DEBUG_MAPS") != NULL) { ranges_debug_maps = TRUE; } if (getenv("AFL_INST_LIBS") != NULL) { ranges_inst_libs = TRUE; } + if (getenv("AFL_FRIDA_INST_JIT") != NULL) { ranges_inst_jit = TRUE; } + + if (ranges_debug_maps) { + + gum_process_enumerate_ranges(GUM_PAGE_NO_ACCESS, print_ranges_callback, + NULL); + + } include_ranges = collect_ranges("AFL_FRIDA_INST_RANGES"); exclude_ranges = collect_ranges("AFL_FRIDA_EXCLUDE_RANGES"); @@ -521,13 +574,13 @@ void ranges_init(void) { GArray * step2; GArray * step3; GArray * step4; + GArray * step5; - if (ranges_debug_maps) { - - gum_process_enumerate_ranges(GUM_PAGE_NO_ACCESS, print_ranges_callback, - NULL); + OKF("Ranges - Instrument jit [%c]", ranges_inst_jit ? 'X' : ' '); + OKF("Ranges - Instrument libraries [%c]", ranges_inst_libs ? 'X' : ' '); - } + print_ranges("AFL_FRIDA_INST_RANGES", include_ranges); + print_ranges("AFL_FRIDA_EXCLUDE_RANGES", exclude_ranges); OKF("Ranges - Instrument libraries [%c]", ranges_inst_libs ? 'X' : ' '); @@ -536,6 +589,7 @@ void ranges_init(void) { module_ranges = collect_module_ranges(); libs_ranges = collect_libs_ranges(); + jit_ranges = collect_jit_ranges(); /* If include ranges is empty, then assume everything is included */ if (include_ranges->len == 0) { @@ -558,17 +612,20 @@ void ranges_init(void) { step3 = subtract_ranges(step2, exclude_ranges); print_ranges("step3", step3); + step4 = subtract_ranges(step3, jit_ranges); + print_ranges("step4", step4); + /* - * After step3, we have the total ranges to be instrumented, we now subtract + * After step4, we have the total ranges to be instrumented, we now subtract * that from the original ranges of the modules to configure stalker. */ + step5 = subtract_ranges(module_ranges, step4); + print_ranges("step5", step5); - step4 = subtract_ranges(module_ranges, step3); - print_ranges("step4", step4); - - ranges = merge_ranges(step4); + ranges = merge_ranges(step5); print_ranges("final", ranges); + g_array_free(step5, TRUE); g_array_free(step4, TRUE); g_array_free(step3, TRUE); g_array_free(step2, TRUE); diff --git a/frida_mode/src/stalker.c b/frida_mode/src/stalker.c index 98483cde..5df0386f 100644 --- a/frida_mode/src/stalker.c +++ b/frida_mode/src/stalker.c @@ -38,7 +38,7 @@ void stalker_init(void) { stalker = gum_stalker_new(); if (stalker == NULL) { FATAL("Failed to initialize stalker"); } - gum_stalker_set_trust_threshold(stalker, 0); + gum_stalker_set_trust_threshold(stalker, -1); /* *NEVER* stalk the stalker, only bad things will ever come of this! */ gum_process_enumerate_ranges(GUM_PAGE_EXECUTE, stalker_exclude_self, NULL); @@ -59,3 +59,9 @@ void stalker_start(void) { } +void stalker_trust(void) { + + gum_stalker_set_trust_threshold(stalker, 0); + +} + diff --git a/frida_mode/src/stats/stats.c b/frida_mode/src/stats/stats.c index 0dd8be70..91a58741 100644 --- a/frida_mode/src/stats/stats.c +++ b/frida_mode/src/stats/stats.c @@ -178,10 +178,12 @@ void stats_write(void) { } -static void stats_maybe_write(void) { +void stats_on_fork(void) { guint64 current_time; + if (stats_filename == NULL) { return; } + if (stats_interval == 0) { return; } current_time = g_get_monotonic_time(); @@ -208,7 +210,5 @@ void stats_collect(const cs_insn *instr, gboolean begin) { stats_collect_arch(instr); - stats_maybe_write(); - } diff --git a/frida_mode/src/stats/stats_x64.c b/frida_mode/src/stats/stats_x64.c index 7c3a90d7..11464a2a 100644 --- a/frida_mode/src/stats/stats_x64.c +++ b/frida_mode/src/stats/stats_x64.c @@ -31,6 +31,9 @@ typedef struct { guint64 num_rip_relative; + guint64 num_rip_relative_type[X86_INS_ENDING]; + char name_rip_relative_type[X86_INS_ENDING][CS_MNEMONIC_SIZE]; + } stats_data_arch_t; gboolean stats_is_supported_arch(void) { @@ -136,6 +139,18 @@ void stats_write_arch(void) { stats_data_arch->num_rip_relative, (stats_data_arch->num_rip_relative * 100 / num_instructions)); + for (size_t i = 0; i < X86_INS_ENDING; i++) { + + if (stats_data_arch->num_rip_relative_type[i] != 0) { + + stats_print(" %10d %s\n", + stats_data_arch->num_rip_relative_type[i], + stats_data_arch->name_rip_relative_type[i]); + + } + + } + stats_print("\n"); stats_print("\n"); @@ -256,6 +271,9 @@ static void stats_collect_rip_relative_arch(const cs_insn *instr) { if (rm != 5) { return; } stats_data_arch->num_rip_relative++; + stats_data_arch->num_rip_relative_type[instr->id]++; + memcpy(stats_data_arch->name_rip_relative_type[instr->id], instr->mnemonic, + CS_MNEMONIC_SIZE); } diff --git a/frida_mode/test/deferred/GNUmakefile b/frida_mode/test/deferred/GNUmakefile index ae580e3f..f7520051 100644 --- a/frida_mode/test/deferred/GNUmakefile +++ b/frida_mode/test/deferred/GNUmakefile @@ -10,7 +10,7 @@ TESTINSTSRC:=$(PWD)testinstr.c QEMU_OUT:=$(BUILD_DIR)qemu-out FRIDA_OUT:=$(BUILD_DIR)frida-out -GET_SYMBOL_ADDR:=$(ROOT)frida_mode/test/png/persistent/get_symbol_addr.py +GET_SYMBOL_ADDR:=$(ROOT)frida_mode/util/get_symbol_addr.sh ifndef ARCH @@ -24,17 +24,21 @@ ifeq "$(ARCH)" "i686" endif endif +ifeq "$(shell uname)" "Darwin" +TEST_BIN_LDFLAGS:=-Wl,-no_pie +endif + ARCH=$(shell uname -m) ifeq "$(ARCH)" "aarch64" - AFL_ENTRYPOINT=$(shell $(GET_SYMBOL_ADDR) -f $(TESTINSTBIN) -s run -b 0x0000aaaaaaaaa000) + AFL_ENTRYPOINT=$(shell $(GET_SYMBOL_ADDR) $(TESTINSTBIN) run 0x0000aaaaaaaaa000) endif ifeq "$(ARCH)" "x86_64" - AFL_ENTRYPOINT=$(shell $(GET_SYMBOL_ADDR) -f $(TESTINSTBIN) -s run -b 0x0000555555554000) + AFL_ENTRYPOINT=$(shell $(GET_SYMBOL_ADDR) $(TESTINSTBIN) run 0x0000555555554000) endif ifeq "$(ARCH)" "x86" - AFL_ENTRYPOINT=$(shell $(GET_SYMBOL_ADDR) -f $(TESTINSTBIN) -s run -b 0x56555000) + AFL_ENTRYPOINT=$(shell $(GET_SYMBOL_ADDR) $(TESTINSTBIN) run 0x56555000) endif .PHONY: all clean frida @@ -55,7 +59,7 @@ $(TESTINSTR_DATA_FILE): | $(TESTINSTR_DATA_DIR) echo -n "000" > $@ $(TESTINSTBIN): $(TESTINSTSRC) | $(BUILD_DIR) - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< + $(CC) $(CFLAGS) $(LDFLAGS) $(TEST_BIN_LDFLAGS) -o $@ $< clean: rm -rf $(BUILD_DIR) diff --git a/frida_mode/test/deferred/testinstr.c b/frida_mode/test/deferred/testinstr.c index 8b3688d7..c7a05ac5 100644 --- a/frida_mode/test/deferred/testinstr.c +++ b/frida_mode/test/deferred/testinstr.c @@ -51,7 +51,6 @@ int run(char *file) { fd = open(file, O_RDONLY); if (fd < 0) { - perror("open"); break; diff --git a/frida_mode/test/entry_point/GNUmakefile b/frida_mode/test/entry_point/GNUmakefile index c99bcecb..5453c1ad 100644 --- a/frida_mode/test/entry_point/GNUmakefile +++ b/frida_mode/test/entry_point/GNUmakefile @@ -10,7 +10,7 @@ TESTINSTSRC:=$(PWD)testinstr.c QEMU_OUT:=$(BUILD_DIR)qemu-out FRIDA_OUT:=$(BUILD_DIR)frida-out -GET_SYMBOL_ADDR:=$(ROOT)frida_mode/test/png/persistent/get_symbol_addr.py +GET_SYMBOL_ADDR:=$(ROOT)frida_mode/util/get_symbol_addr.sh ifndef ARCH @@ -24,17 +24,21 @@ ifeq "$(ARCH)" "i686" endif endif +ifeq "$(shell uname)" "Darwin" +TEST_BIN_LDFLAGS:=-Wl,-no_pie +endif + ARCH=$(shell uname -m) ifeq "$(ARCH)" "aarch64" - AFL_ENTRYPOINT=$(shell $(GET_SYMBOL_ADDR) -f $(TESTINSTBIN) -s run -b 0x0000aaaaaaaaa000) + AFL_ENTRYPOINT=$(shell $(GET_SYMBOL_ADDR) $(TESTINSTBIN) run 0x0000aaaaaaaaa000) endif ifeq "$(ARCH)" "x86_64" - AFL_ENTRYPOINT=$(shell $(GET_SYMBOL_ADDR) -f $(TESTINSTBIN) -s run -b 0x0000555555554000) + AFL_ENTRYPOINT=$(shell $(GET_SYMBOL_ADDR) $(TESTINSTBIN) run 0x0000555555554000) endif ifeq "$(ARCH)" "x86" - AFL_ENTRYPOINT=$(shell $(GET_SYMBOL_ADDR) -f $(TESTINSTBIN) -s run -b 0x56555000) + AFL_ENTRYPOINT=$(shell $(GET_SYMBOL_ADDR) $(TESTINSTBIN) run 0x56555000) endif .PHONY: all clean qemu frida @@ -55,7 +59,7 @@ $(TESTINSTR_DATA_FILE): | $(TESTINSTR_DATA_DIR) echo -n "000" > $@ $(TESTINSTBIN): $(TESTINSTSRC) | $(BUILD_DIR) - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< + $(CC) $(CFLAGS) $(LDFLAGS) $(TEST_BIN_LDFLAGS) -o $@ $< clean: rm -rf $(BUILD_DIR) diff --git a/frida_mode/test/jpeg/GNUmakefile b/frida_mode/test/jpeg/GNUmakefile index e3a8f321..1c124743 100644 --- a/frida_mode/test/jpeg/GNUmakefile +++ b/frida_mode/test/jpeg/GNUmakefile @@ -2,7 +2,7 @@ PWD:=$(shell pwd)/ ROOT:=$(shell realpath $(PWD)../../..)/ BUILD_DIR:=$(PWD)build/ -AFLPP_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/hook.so +AFLPP_FRIDA_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/frida_hook.so LIBJPEG_BUILD_DIR:=$(BUILD_DIR)libjpeg/ HARNESS_BUILD_DIR:=$(BUILD_DIR)harness/ @@ -26,7 +26,7 @@ LDFLAGS += -lpthread TEST_BIN:=$(BUILD_DIR)test ifeq "$(shell uname)" "Darwin" -TEST_BIN_LDFLAGS:=-undefined dynamic_lookup +TEST_BIN_LDFLAGS:=-undefined dynamic_lookup -Wl,-no_pie endif TEST_DATA_DIR:=$(BUILD_DIR)in/ @@ -46,16 +46,18 @@ ifeq "$(ARCH)" "i686" endif endif +GET_SYMBOL_ADDR:=$(ROOT)frida_mode/util/get_symbol_addr.sh + ifeq "$(ARCH)" "aarch64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x0000aaaaaaaaa000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x0000aaaaaaaaa000) endif ifeq "$(ARCH)" "x86_64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x0000555555554000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x0000555555554000) endif ifeq "$(ARCH)" "x86" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x56555000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x56555000) endif .PHONY: all clean frida hook @@ -77,7 +79,7 @@ $(HARNESS_FILE): | $(HARNESS_BUILD_DIR) wget -O $@ $(HARNESS_URL) $(HARNESS_OBJ): $(HARNESS_FILE) - $(CC) $(CXXFLAGS) $(LDFLAGS) -o $@ -c $< + $(CC) $(CXXFLAGS) $(LDFLAGS) $(TEST_BIN_LDFLAGS) -o $@ -c $< ######### JPEGTEST ######## @@ -130,7 +132,7 @@ $(TEST_DATA_FILE): | $(TEST_DATA_DIR) clean: rm -rf $(BUILD_DIR) -frida: $(TEST_BIN) $(AFLPP_DRIVER_HOOK_OBJ) $(TEST_DATA_FILE) +frida: $(TEST_BIN) $(AFLPP_FRIDA_DRIVER_HOOK_OBJ) $(TEST_DATA_FILE) AFL_DEBUG_CHILD=1 \ AFL_DISABLE_TRIM=1 \ AFL_FRIDA_PERSISTENT_CNT=1000000 \ @@ -142,7 +144,7 @@ frida: $(TEST_BIN) $(AFLPP_DRIVER_HOOK_OBJ) $(TEST_DATA_FILE) AFL_SKIP_CPUFREQ=1 \ AFL_SKIP_CRASHES=1 \ AFL_TESTCACHE_SIZE=2 \ - AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_DRIVER_HOOK_OBJ) \ + AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_FRIDA_DRIVER_HOOK_OBJ) \ AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR) \ AFL_ENTRYPOINT=$(AFL_FRIDA_PERSISTENT_ADDR) \ $(ROOT)afl-fuzz \ diff --git a/frida_mode/test/js/GNUmakefile b/frida_mode/test/js/GNUmakefile index af40c1c4..ee8d4ebc 100644 --- a/frida_mode/test/js/GNUmakefile +++ b/frida_mode/test/js/GNUmakefile @@ -13,7 +13,11 @@ TESTINSTSRC2:=$(PWD)test2.c QEMU_OUT:=$(BUILD_DIR)qemu-out FRIDA_OUT:=$(BUILD_DIR)frida-out -.PHONY: all 32 clean qemu frida +ifeq "$(shell uname)" "Darwin" +AFL_PRELOAD=/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation +endif + +.PHONY: all 32 clean qemu frida debug all: $(TESTINSTBIN) $(TESTINSTBIN2) make -C $(ROOT)frida_mode/ @@ -40,12 +44,14 @@ clean: rm -rf $(BUILD_DIR) frida_js_entry: $(TESTINSTBIN) $(TEST_DATA_FILE) + AFL_PRELOAD=$(AFL_PRELOAD) \ AFL_FRIDA_JS_SCRIPT=entry.js \ $(ROOT)afl-fuzz \ -D \ -O \ -i $(TEST_DATA_DIR) \ -o $(FRIDA_OUT) \ + -t 10000+ \ -- \ $(TESTINSTBIN) @@ @@ -78,3 +84,15 @@ frida_js_stalker: $(TESTINSTBIN2) $(TEST_DATA_FILE) -o $(FRIDA_OUT) \ -- \ $(TESTINSTBIN2) @@ + +debug: $(TEST_DATA_FILE) + gdb \ + --ex 'set environment LD_PRELOAD=$(ROOT)afl-frida-trace.so' \ + --ex 'set environment AFL_FRIDA_JS_SCRIPT=entry.js' \ + --ex 'set disassembly-flavor intel' \ + --args $(TESTINSTBIN) $(TEST_DATA_FILE) + +strace: $(TEST_DATA_FILE) + LD_PRELOAD=$(ROOT)afl-frida-trace.so \ + AFL_FRIDA_JS_SCRIPT=entry.js \ + strace $(TESTINSTBIN) $(TEST_DATA_FILE) diff --git a/frida_mode/test/js/entry.js b/frida_mode/test/js/entry.js index f10ef2d1..2bdd7d13 100644 --- a/frida_mode/test/js/entry.js +++ b/frida_mode/test/js/entry.js @@ -9,12 +9,18 @@ new ModuleMap().values().forEach(m => { Afl.print(`${m.base}-${m.base.add(m.size)} ${m.name}`); }); -const entry_point = DebugSymbol.fromName('run'); -Afl.print(`entry_point: ${entry_point.address}`); +const name = Process.enumerateModules()[0].name; +Afl.print(`Name: ${name}`); -Afl.setEntryPoint(entry_point.address); +if (name === 'test') { -// Afl.error('HARD NOPE'); + Afl.print('Searching...\n'); + const entry_point = DebugSymbol.fromName('run'); + Afl.print(`entry_point: ${entry_point}`); + + Afl.setEntryPoint(entry_point.address); + +} Afl.done(); Afl.print("done"); diff --git a/frida_mode/test/libpcap/GNUmakefile b/frida_mode/test/libpcap/GNUmakefile index 8a10be07..f1ad06e4 100644 --- a/frida_mode/test/libpcap/GNUmakefile +++ b/frida_mode/test/libpcap/GNUmakefile @@ -2,7 +2,8 @@ PWD:=$(shell pwd)/ ROOT:=$(shell realpath $(PWD)../../..)/ BUILD_DIR:=$(PWD)build/ -AFLPP_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/hook.so +AFLPP_FRIDA_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/frida_hook.so +AFLPP_QEMU_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/qemu_hook.so LIBPCAP_BUILD_DIR:=$(BUILD_DIR)libpcap/ HARNESS_BUILD_DIR:=$(BUILD_DIR)harness/ @@ -34,7 +35,7 @@ LDFLAGS += -lpthread TEST_BIN:=$(BUILD_DIR)test ifeq "$(shell uname)" "Darwin" -TEST_BIN_LDFLAGS:=-undefined dynamic_lookup +TEST_BIN_LDFLAGS:=-undefined dynamic_lookup -Wl,-no_pie endif AFLPP_DRIVER_DUMMY_INPUT:=$(TCPDUMP_TESTS_DIR)in @@ -54,18 +55,20 @@ ifeq "$(ARCH)" "i686" endif endif -AFL_QEMU_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x4000000000) +GET_SYMBOL_ADDR:=$(ROOT)frida_mode/util/get_symbol_addr.sh + +AFL_QEMU_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x4000000000) ifeq "$(ARCH)" "aarch64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x0000aaaaaaaaa000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x0000aaaaaaaaa000) endif ifeq "$(ARCH)" "x86_64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x0000555555554000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x0000555555554000) endif ifeq "$(ARCH)" "x86" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x56555000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x56555000) endif .PHONY: all clean qemu frida hook @@ -146,8 +149,8 @@ $(AFLPP_DRIVER_DUMMY_INPUT): | $(TCPDUMP_TESTS_DIR) clean: rm -rf $(BUILD_DIR) -qemu: $(TEST_BIN) $(AFLPP_DRIVER_HOOK_OBJ) $(AFLPP_DRIVER_DUMMY_INPUT) | $(TCPDUMP_TESTS_DIR) - AFL_QEMU_PERSISTENT_HOOK=$(AFLPP_DRIVER_HOOK_OBJ) \ +qemu: $(TEST_BIN) $(AFLPP_QEMU_DRIVER_HOOK_OBJ) $(AFLPP_DRIVER_DUMMY_INPUT) | $(TCPDUMP_TESTS_DIR) + AFL_QEMU_PERSISTENT_HOOK=$(AFLPP_QEMU_DRIVER_HOOK_OBJ) \ AFL_ENTRYPOINT=$(AFL_QEMU_PERSISTENT_ADDR) \ AFL_QEMU_PERSISTENT_ADDR=$(AFL_QEMU_PERSISTENT_ADDR) \ AFL_QEMU_PERSISTENT_GPR=1 \ @@ -160,8 +163,8 @@ qemu: $(TEST_BIN) $(AFLPP_DRIVER_HOOK_OBJ) $(AFLPP_DRIVER_DUMMY_INPUT) | $(TCPDU -- \ $(TEST_BIN) $(AFLPP_DRIVER_DUMMY_INPUT) -frida: $(TEST_BIN) $(AFLPP_DRIVER_HOOK_OBJ) $(AFLPP_DRIVER_DUMMY_INPUT) | $(TCPDUMP_TESTS_DIR) - AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_DRIVER_HOOK_OBJ) \ +frida: $(TEST_BIN) $(AFLPP_FRIDA_DRIVER_HOOK_OBJ) $(AFLPP_DRIVER_DUMMY_INPUT) | $(TCPDUMP_TESTS_DIR) + AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_FRIDA_DRIVER_HOOK_OBJ) \ AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR) \ AFL_ENTRYPOINT=$(AFL_FRIDA_PERSISTENT_ADDR) \ $(ROOT)afl-fuzz \ diff --git a/frida_mode/test/libpcap/get_symbol_addr.py b/frida_mode/test/libpcap/get_symbol_addr.py deleted file mode 100755 index 1c46e010..00000000 --- a/frida_mode/test/libpcap/get_symbol_addr.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/python3 -import argparse -from elftools.elf.elffile import ELFFile - -def process_file(file, symbol, base): - with open(file, 'rb') as f: - elf = ELFFile(f) - symtab = elf.get_section_by_name('.symtab') - mains = symtab.get_symbol_by_name(symbol) - if len(mains) != 1: - print ("Failed to find main") - return 1 - - main_addr = mains[0]['st_value'] - main = base + main_addr - print ("0x%016x" % main) - return 0 - -def hex_value(x): - return int(x, 16) - -def main(): - parser = argparse.ArgumentParser(description='Process some integers.') - parser.add_argument('-f', '--file', dest='file', type=str, - help='elf file name', required=True) - parser.add_argument('-s', '--symbol', dest='symbol', type=str, - help='symbol name', required=True) - parser.add_argument('-b', '--base', dest='base', type=hex_value, - help='elf base address', required=True) - - args = parser.parse_args() - return process_file (args.file, args.symbol, args.base) - -if __name__ == "__main__": - ret = main() - exit(ret) diff --git a/frida_mode/test/persistent_ret/GNUmakefile b/frida_mode/test/persistent_ret/GNUmakefile index f11269e3..adcacf5a 100644 --- a/frida_mode/test/persistent_ret/GNUmakefile +++ b/frida_mode/test/persistent_ret/GNUmakefile @@ -22,20 +22,30 @@ ifeq "$(ARCH)" "i686" endif endif +GET_SYMBOL_ADDR:=$(ROOT)frida_mode/util/get_symbol_addr.sh + +ifeq "$(shell uname)" "Darwin" +TEST_BIN_LDFLAGS:=-Wl,-no_pie +endif + ARCH=$(shell uname -m) ifeq "$(ARCH)" "aarch64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TESTINSTBIN) -s main -b 0x0000aaaaaaaaa000) - AFL_FRIDA_PERSISTENT_RET=$(shell $(PWD)get_symbol_addr.py -f $(TESTINSTBIN) -s slow -b 0x0000aaaaaaaaa000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TESTINSTBIN) main 0x0000aaaaaaaaa000) + AFL_FRIDA_PERSISTENT_RET=$(shell $(GET_SYMBOL_ADDR) $(TESTINSTBIN) slow 0x0000aaaaaaaaa000) endif ifeq "$(ARCH)" "x86_64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TESTINSTBIN) -s main -b 0x0000555555554000) - AFL_FRIDA_PERSISTENT_RET=$(shell $(PWD)get_symbol_addr.py -f $(TESTINSTBIN) -s slow -b 0x0000555555554000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TESTINSTBIN) main 0x0000555555554000) + AFL_FRIDA_PERSISTENT_RET=$(shell $(GET_SYMBOL_ADDR) $(TESTINSTBIN) slow 0x0000555555554000) endif ifeq "$(ARCH)" "x86" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TESTINSTBIN) -s main -b 0x56555000) - AFL_FRIDA_PERSISTENT_RET=$(shell $(PWD)get_symbol_addr.py -f $(TESTINSTBIN) -s slow -b 0x56555000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TESTINSTBIN) main 0x56555000) + AFL_FRIDA_PERSISTENT_RET=$(shell $(GET_SYMBOL_ADDR) $(TESTINSTBIN) slow 0x56555000) +endif + +ifeq "$(shell uname)" "Darwin" +AFL_PRELOAD=/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation endif .PHONY: all 32 clean qemu frida @@ -56,7 +66,7 @@ $(TESTINSTR_DATA_FILE): | $(TESTINSTR_DATA_DIR) echo -n "000" > $@ $(TESTINSTBIN): $(TESTINSTSRC) | $(BUILD_DIR) - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< + $(CC) $(CFLAGS) $(LDFLAGS) $(TEST_BIN_LDFLAGS) -o $@ $< clean: rm -rf $(BUILD_DIR) @@ -83,6 +93,7 @@ frida_ret: $(TESTINSTBIN) $(TESTINSTR_DATA_FILE) $(TESTINSTBIN) @@ frida_js: $(TESTINSTBIN) $(TESTINSTR_DATA_FILE) + AFL_PRELOAD=$(AFL_PRELOAD) \ AFL_FRIDA_JS_SCRIPT=test.js \ $(ROOT)afl-fuzz \ -D \ diff --git a/frida_mode/test/persistent_ret/get_symbol_addr.py b/frida_mode/test/persistent_ret/get_symbol_addr.py deleted file mode 100755 index 1c46e010..00000000 --- a/frida_mode/test/persistent_ret/get_symbol_addr.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/python3 -import argparse -from elftools.elf.elffile import ELFFile - -def process_file(file, symbol, base): - with open(file, 'rb') as f: - elf = ELFFile(f) - symtab = elf.get_section_by_name('.symtab') - mains = symtab.get_symbol_by_name(symbol) - if len(mains) != 1: - print ("Failed to find main") - return 1 - - main_addr = mains[0]['st_value'] - main = base + main_addr - print ("0x%016x" % main) - return 0 - -def hex_value(x): - return int(x, 16) - -def main(): - parser = argparse.ArgumentParser(description='Process some integers.') - parser.add_argument('-f', '--file', dest='file', type=str, - help='elf file name', required=True) - parser.add_argument('-s', '--symbol', dest='symbol', type=str, - help='symbol name', required=True) - parser.add_argument('-b', '--base', dest='base', type=hex_value, - help='elf base address', required=True) - - args = parser.parse_args() - return process_file (args.file, args.symbol, args.base) - -if __name__ == "__main__": - ret = main() - exit(ret) diff --git a/frida_mode/test/png/GNUmakefile b/frida_mode/test/png/GNUmakefile index e05bade2..a1a7f1a5 100644 --- a/frida_mode/test/png/GNUmakefile +++ b/frida_mode/test/png/GNUmakefile @@ -22,7 +22,7 @@ PNGTEST_URL:="https://raw.githubusercontent.com/google/fuzzbench/master/benchmar TEST_BIN:=$(BUILD_DIR)test ifeq "$(shell uname)" "Darwin" -TEST_BIN_LDFLAGS:=-undefined dynamic_lookup +TEST_BIN_LDFLAGS:=-undefined dynamic_lookup -Wl,-no_pie endif TEST_DATA_DIR:=$(LIBPNG_DIR)contrib/pngsuite/ @@ -112,3 +112,9 @@ frida: $(TEST_BIN) -o $(FRIDA_OUT) \ -- \ $(TEST_BIN) @@ + +debug: + gdb \ + --ex 'set environment LD_PRELOAD=$(ROOT)afl-frida-trace.so' \ + --ex 'set disassembly-flavor intel' \ + --args $(TEST_BIN) $(TEST_DATA_DIR)basn0g01.png diff --git a/frida_mode/test/png/Makefile b/frida_mode/test/png/Makefile index 4bef1ccb..f843af19 100644 --- a/frida_mode/test/png/Makefile +++ b/frida_mode/test/png/Makefile @@ -14,3 +14,6 @@ qemu: frida: @gmake frida + +debug: + @gmake debug diff --git a/frida_mode/test/png/persistent/GNUmakefile b/frida_mode/test/png/persistent/GNUmakefile index 5af64822..c1ad86e5 100644 --- a/frida_mode/test/png/persistent/GNUmakefile +++ b/frida_mode/test/png/persistent/GNUmakefile @@ -21,18 +21,20 @@ ifeq "$(ARCH)" "i686" endif endif -AFL_QEMU_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s main -b 0x4000000000) +GET_SYMBOL_ADDR:=$(ROOT)frida_mode/util/get_symbol_addr.sh + +AFL_QEMU_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) main 0x4000000000) ifeq "$(ARCH)" "arm64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s main -b 0x0000aaaaaaaaa000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) main 0x0000aaaaaaaaa000) endif ifeq "$(ARCH)" "x86_64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s main -b 0x0000555555554000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) main 0x0000555555554000) endif ifeq "$(ARCH)" "x86" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s main -b 0x56555000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) main 0x56555000) endif .PHONY: all 32 clean qemu qemu_entry frida frida_entry diff --git a/frida_mode/test/png/persistent/get_symbol_addr.py b/frida_mode/test/png/persistent/get_symbol_addr.py deleted file mode 100755 index 1c46e010..00000000 --- a/frida_mode/test/png/persistent/get_symbol_addr.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/python3 -import argparse -from elftools.elf.elffile import ELFFile - -def process_file(file, symbol, base): - with open(file, 'rb') as f: - elf = ELFFile(f) - symtab = elf.get_section_by_name('.symtab') - mains = symtab.get_symbol_by_name(symbol) - if len(mains) != 1: - print ("Failed to find main") - return 1 - - main_addr = mains[0]['st_value'] - main = base + main_addr - print ("0x%016x" % main) - return 0 - -def hex_value(x): - return int(x, 16) - -def main(): - parser = argparse.ArgumentParser(description='Process some integers.') - parser.add_argument('-f', '--file', dest='file', type=str, - help='elf file name', required=True) - parser.add_argument('-s', '--symbol', dest='symbol', type=str, - help='symbol name', required=True) - parser.add_argument('-b', '--base', dest='base', type=hex_value, - help='elf base address', required=True) - - args = parser.parse_args() - return process_file (args.file, args.symbol, args.base) - -if __name__ == "__main__": - ret = main() - exit(ret) diff --git a/frida_mode/test/png/persistent/hook/GNUmakefile b/frida_mode/test/png/persistent/hook/GNUmakefile index 0ff9fe86..ddf63a96 100644 --- a/frida_mode/test/png/persistent/hook/GNUmakefile +++ b/frida_mode/test/png/persistent/hook/GNUmakefile @@ -2,7 +2,8 @@ PWD:=$(shell pwd)/ ROOT:=$(shell realpath $(PWD)../../../../..)/ BUILD_DIR:=$(PWD)build/ -AFLPP_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/hook.so +AFLPP_FRIDA_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/frida_hook.so +AFLPP_QEMU_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/qemu_hook.so CFLAGS+=-O3 \ -funroll-loops \ @@ -31,18 +32,24 @@ ifeq "$(ARCH)" "i686" endif endif -AFL_QEMU_PERSISTENT_ADDR=$(shell $(PWD)../get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x4000000000) +GET_SYMBOL_ADDR:=$(ROOT)frida_mode/util/get_symbol_addr.sh + +AFL_QEMU_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x4000000000) ifeq "$(ARCH)" "arm64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)../get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x0000aaaaaaaaa000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x0000aaaaaaaaa000) endif ifeq "$(ARCH)" "x86_64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)../get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x0000555555554000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x0000555555554000) endif ifeq "$(ARCH)" "x86" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)../get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x56555000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x56555000) +endif + +ifeq "$(shell uname)" "Darwin" +AFL_PRELOAD=/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation endif .PHONY: all 32 clean format qemu qemu_entry frida frida_entry debug @@ -67,8 +74,8 @@ $(TEST_DATA_DIR): | $(BUILD_DIR) $(AFLPP_DRIVER_DUMMY_INPUT): | $(BUILD_DIR) truncate -s 1M $@ -qemu: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_DRIVER_HOOK_OBJ) | $(BUILD_DIR) - AFL_QEMU_PERSISTENT_HOOK=$(AFLPP_DRIVER_HOOK_OBJ) \ +qemu: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_QEMU_DRIVER_HOOK_OBJ) | $(BUILD_DIR) + AFL_QEMU_PERSISTENT_HOOK=$(AFLPP_QEMU_DRIVER_HOOK_OBJ) \ AFL_QEMU_PERSISTENT_ADDR=$(AFL_QEMU_PERSISTENT_ADDR) \ AFL_QEMU_PERSISTENT_GPR=1 \ $(ROOT)/afl-fuzz \ @@ -80,8 +87,8 @@ qemu: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_DRIVER_HOOK_OBJ) | $(BUILD_DIR) -- \ $(TEST_BIN) $(AFLPP_DRIVER_DUMMY_INPUT) -qemu_entry: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_DRIVER_HOOK_OBJ) | $(BUILD_DIR) - AFL_QEMU_PERSISTENT_HOOK=$(AFLPP_DRIVER_HOOK_OBJ) \ +qemu_entry: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_QEMU_DRIVER_HOOK_OBJ) | $(BUILD_DIR) + AFL_QEMU_PERSISTENT_HOOK=$(AFLPP_QEMU_DRIVER_HOOK_OBJ) \ AFL_QEMU_PERSISTENT_ADDR=$(AFL_QEMU_PERSISTENT_ADDR) \ AFL_ENTRYPOINT=$(AFL_QEMU_PERSISTENT_ADDR) \ AFL_QEMU_PERSISTENT_GPR=1 \ @@ -94,8 +101,8 @@ qemu_entry: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_DRIVER_HOOK_OBJ) | $(BUILD_DIR) -- \ $(TEST_BIN) $(AFLPP_DRIVER_DUMMY_INPUT) -frida: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_DRIVER_HOOK_OBJ) | $(BUILD_DIR) - AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_DRIVER_HOOK_OBJ) \ +frida: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_FRIDA_DRIVER_HOOK_OBJ) | $(BUILD_DIR) + AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_FRIDA_DRIVER_HOOK_OBJ) \ AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR) \ $(ROOT)afl-fuzz \ -D \ @@ -107,8 +114,8 @@ frida: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_DRIVER_HOOK_OBJ) | $(BUILD_DIR) $(TEST_BIN) $(AFLPP_DRIVER_DUMMY_INPUT) -frida_entry: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_DRIVER_HOOK_OBJ) | $(BUILD_DIR) - AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_DRIVER_HOOK_OBJ) \ +frida_entry: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_FRIDA_DRIVER_HOOK_OBJ) | $(BUILD_DIR) + AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_FRIDA_DRIVER_HOOK_OBJ) \ AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR) \ AFL_ENTRYPOINT=$(AFL_FRIDA_PERSISTENT_ADDR) \ $(ROOT)afl-fuzz \ @@ -120,7 +127,8 @@ frida_entry: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_DRIVER_HOOK_OBJ) | $(BUILD_DIR) -- \ $(TEST_BIN) $(AFLPP_DRIVER_DUMMY_INPUT) -frida_js_load: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_DRIVER_HOOK_OBJ) | $(BUILD_DIR) +frida_js_load: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_FRIDA_DRIVER_HOOK_OBJ) | $(BUILD_DIR) + AFL_PRELOAD=$(AFL_PRELOAD) \ AFL_FRIDA_JS_SCRIPT=load.js \ $(ROOT)afl-fuzz \ -D \ @@ -128,10 +136,12 @@ frida_js_load: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_DRIVER_HOOK_OBJ) | $(BUILD_DI -O \ -i $(TEST_DATA_DIR) \ -o $(FRIDA_OUT) \ + -t 10000+ \ -- \ $(TEST_BIN) $(AFLPP_DRIVER_DUMMY_INPUT) -frida_js_cmodule: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_DRIVER_HOOK_OBJ) | $(BUILD_DIR) +frida_js_cmodule: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_FRIDA_DRIVER_HOOK_OBJ) | $(BUILD_DIR) + AFL_PRELOAD=$(AFL_PRELOAD) \ AFL_FRIDA_JS_SCRIPT=cmodule.js \ $(ROOT)afl-fuzz \ -D \ @@ -146,7 +156,7 @@ debug: $(AFLPP_DRIVER_DUMMY_INPUT) echo $(AFL_FRIDA_PERSISTENT_ADDR) gdb \ --ex 'set environment LD_PRELOAD=$(ROOT)afl-frida-trace.so' \ - --ex 'set environment AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_DRIVER_HOOK_OBJ)' \ + --ex 'set environment AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_FRIDA_DRIVER_HOOK_OBJ)' \ --ex 'set environment AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR)' \ --ex 'set disassembly-flavor intel' \ --args $(TEST_BIN) $(AFLPP_DRIVER_DUMMY_INPUT) diff --git a/frida_mode/test/png/persistent/hook/load.js b/frida_mode/test/png/persistent/hook/load.js index ce4374ae..ea4d28c3 100644 --- a/frida_mode/test/png/persistent/hook/load.js +++ b/frida_mode/test/png/persistent/hook/load.js @@ -19,7 +19,7 @@ Afl.setPersistentAddress(persistent_addr); const path = Afl.module.path; const dir = path.substring(0, path.lastIndexOf("/")); -const mod = Module.load(`${dir}/frida_mode/build/hook.so`); +const mod = Module.load(`${dir}/frida_mode/build/frida_hook.so`); const hook = mod.getExportByName('afl_persistent_hook'); Afl.setPersistentHook(hook); diff --git a/frida_mode/test/proj4/GNUmakefile b/frida_mode/test/proj4/GNUmakefile index e324a5d0..8555ebad 100644 --- a/frida_mode/test/proj4/GNUmakefile +++ b/frida_mode/test/proj4/GNUmakefile @@ -2,7 +2,7 @@ PWD:=$(shell pwd)/ ROOT:=$(shell realpath $(PWD)../../..)/ BUILD_DIR:=$(PWD)build/ -AFLPP_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/hook.so +AFLPP_FRIDA_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/frida_hook.so LIBPROJ4_BUILD_DIR:=$(BUILD_DIR)libproj4/ HARNESS_BUILD_DIR:=$(BUILD_DIR)harness/ @@ -26,7 +26,7 @@ LDFLAGS += -lpthread TEST_BIN:=$(BUILD_DIR)test ifeq "$(shell uname)" "Darwin" -TEST_BIN_LDFLAGS:=-undefined dynamic_lookup +TEST_BIN_LDFLAGS:=-undefined dynamic_lookup -Wl,-no_pie endif TEST_DATA_DIR:=$(BUILD_DIR)in/ @@ -46,16 +46,18 @@ ifeq "$(ARCH)" "i686" endif endif +GET_SYMBOL_ADDR:=$(ROOT)frida_mode/util/get_symbol_addr.sh + ifeq "$(ARCH)" "aarch64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x0000aaaaaaaaa000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x0000aaaaaaaaa000) endif ifeq "$(ARCH)" "x86_64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x0000555555554000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x0000555555554000) endif ifeq "$(ARCH)" "x86" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x56555000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x56555000) endif .PHONY: all clean frida hook @@ -130,7 +132,7 @@ $(TEST_DATA_FILE): | $(TEST_DATA_DIR) clean: rm -rf $(BUILD_DIR) -frida: $(TEST_BIN) $(AFLPP_DRIVER_HOOK_OBJ) $(TEST_DATA_FILE) +frida: $(TEST_BIN) $(AFLPP_FRIDA_DRIVER_HOOK_OBJ) $(TEST_DATA_FILE) AFL_DEBUG_CHILD=1 \ AFL_DISABLE_TRIM=1 \ AFL_FRIDA_PERSISTENT_CNT=1000000 \ @@ -142,7 +144,7 @@ frida: $(TEST_BIN) $(AFLPP_DRIVER_HOOK_OBJ) $(TEST_DATA_FILE) AFL_SKIP_CPUFREQ=1 \ AFL_SKIP_CRASHES=1 \ AFL_TESTCACHE_SIZE=2 \ - AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_DRIVER_HOOK_OBJ) \ + AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_FRIDA_DRIVER_HOOK_OBJ) \ AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR) \ AFL_ENTRYPOINT=$(AFL_FRIDA_PERSISTENT_ADDR) \ $(ROOT)afl-fuzz \ diff --git a/frida_mode/test/re2/GNUmakefile b/frida_mode/test/re2/GNUmakefile index e1c5347d..ce95df3b 100644 --- a/frida_mode/test/re2/GNUmakefile +++ b/frida_mode/test/re2/GNUmakefile @@ -2,7 +2,8 @@ PWD:=$(shell pwd)/ ROOT:=$(shell realpath $(PWD)../../..)/ BUILD_DIR:=$(PWD)build/ -AFLPP_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/hook.so +AFLPP_FRIDA_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/frida_hook.so +AFLPP_QEMU_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/qemu_hook.so LIBRE2_BUILD_DIR:=$(BUILD_DIR)libre2/ HARNESS_BUILD_DIR:=$(BUILD_DIR)harness/ @@ -46,18 +47,20 @@ ifeq "$(ARCH)" "i686" endif endif -AFL_QEMU_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x4000000000) +GET_SYMBOL_ADDR:=$(ROOT)frida_mode/util/get_symbol_addr.sh + +AFL_QEMU_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x4000000000) ifeq "$(ARCH)" "aarch64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x0000aaaaaaaaa000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x0000aaaaaaaaa000) endif ifeq "$(ARCH)" "x86_64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x0000555555554000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x0000555555554000) endif ifeq "$(ARCH)" "x86" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x56555000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x56555000) endif .PHONY: all clean qemu frida hook @@ -128,8 +131,8 @@ $(AFLPP_DRIVER_DUMMY_INPUT): | $(TEST_DATA_DIR) clean: rm -rf $(BUILD_DIR) -qemu: $(TEST_BIN) $(AFLPP_DRIVER_HOOK_OBJ) $(AFLPP_DRIVER_DUMMY_INPUT) - AFL_QEMU_PERSISTENT_HOOK=$(AFLPP_DRIVER_HOOK_OBJ) \ +qemu: $(TEST_BIN) $(AFLPP_QEMU_DRIVER_HOOK_OBJ) $(AFLPP_DRIVER_DUMMY_INPUT) + AFL_QEMU_PERSISTENT_HOOK=$(AFLPP_QEMU_DRIVER_HOOK_OBJ) \ AFL_ENTRYPOINT=$(AFL_QEMU_PERSISTENT_ADDR) \ AFL_QEMU_PERSISTENT_ADDR=$(AFL_QEMU_PERSISTENT_ADDR) \ AFL_QEMU_PERSISTENT_GPR=1 \ @@ -142,8 +145,8 @@ qemu: $(TEST_BIN) $(AFLPP_DRIVER_HOOK_OBJ) $(AFLPP_DRIVER_DUMMY_INPUT) -- \ $(TEST_BIN) $(AFLPP_DRIVER_DUMMY_INPUT) -frida: $(TEST_BIN) $(AFLPP_DRIVER_HOOK_OBJ) $(AFLPP_DRIVER_DUMMY_INPUT) - AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_DRIVER_HOOK_OBJ) \ +frida: $(TEST_BIN) $(AFLPP_FRIDA_DRIVER_HOOK_OBJ) $(AFLPP_DRIVER_DUMMY_INPUT) + AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_FRIDA_DRIVER_HOOK_OBJ) \ AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR) \ AFL_ENTRYPOINT=$(AFL_FRIDA_PERSISTENT_ADDR) \ $(ROOT)afl-fuzz \ diff --git a/frida_mode/test/re2/get_symbol_addr.py b/frida_mode/test/re2/get_symbol_addr.py deleted file mode 100755 index 1c46e010..00000000 --- a/frida_mode/test/re2/get_symbol_addr.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/python3 -import argparse -from elftools.elf.elffile import ELFFile - -def process_file(file, symbol, base): - with open(file, 'rb') as f: - elf = ELFFile(f) - symtab = elf.get_section_by_name('.symtab') - mains = symtab.get_symbol_by_name(symbol) - if len(mains) != 1: - print ("Failed to find main") - return 1 - - main_addr = mains[0]['st_value'] - main = base + main_addr - print ("0x%016x" % main) - return 0 - -def hex_value(x): - return int(x, 16) - -def main(): - parser = argparse.ArgumentParser(description='Process some integers.') - parser.add_argument('-f', '--file', dest='file', type=str, - help='elf file name', required=True) - parser.add_argument('-s', '--symbol', dest='symbol', type=str, - help='symbol name', required=True) - parser.add_argument('-b', '--base', dest='base', type=hex_value, - help='elf base address', required=True) - - args = parser.parse_args() - return process_file (args.file, args.symbol, args.base) - -if __name__ == "__main__": - ret = main() - exit(ret) diff --git a/frida_mode/test/sqlite/GNUmakefile b/frida_mode/test/sqlite/GNUmakefile new file mode 100644 index 00000000..80e0a939 --- /dev/null +++ b/frida_mode/test/sqlite/GNUmakefile @@ -0,0 +1,166 @@ +PWD:=$(shell pwd)/ +ROOT:=$(shell realpath $(PWD)../../..)/ +BUILD_DIR:=$(PWD)build/ + +SQLITE_BUILD_DIR:=$(BUILD_DIR)sqlite/ +SQLITE_BUILD_SRC_DIR:=$(SQLITE_BUILD_DIR)src/ + +AFLPP_DRIVER:=$(ROOT)utils/aflpp_driver/libAFLQemuDriver.a + +AFLPP_DRIVER:=$(ROOT)utils/aflpp_driver/libAFLQemuDriver.a +AFLPP_FRIDA_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/frida_hook.so +AFLPP_QEMU_DRIVER_HOOK_OBJ=$(ROOT)frida_mode/build/qemu_hook.so + + +CFLAGS += -fpermissive + +LDFLAGS += -lpthread + +TEST_BIN:=$(SQLITE_BUILD_DIR)ossfuzz +SQLITE_TEST_DIR:=$(BUILD_DIR)in/ +AFLPP_DRIVER_DUMMY_INPUT:=$(SQLITE_TEST_DIR)in + +SQLITE_CFLAGS:= -DSQLITE_MAX_LENGTH=128000000 \ + -DSQLITE_MAX_SQL_LENGTH=128000000 \ + -DSQLITE_MAX_MEMORY=25000000 \ + -DSQLITE_PRINTF_PRECISION_LIMIT=1048576 \ + -DSQLITE_DEBUG=1 \ + -DSQLITE_MAX_PAGE_COUNT=16384 + +QEMU_OUT:=$(BUILD_DIR)qemu-out +FRIDA_OUT:=$(BUILD_DIR)frida-out + +ifndef ARCH + +ARCH=$(shell uname -m) +ifeq "$(ARCH)" "aarch64" + ARCH:=arm64 +endif + +ifeq "$(ARCH)" "i686" + ARCH:=x86 +endif +endif + +GET_SYMBOL_ADDR:=$(ROOT)frida_mode/util/get_symbol_addr.sh + +AFL_QEMU_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x4000000000) + +ifeq "$(ARCH)" "aarch64" + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x0000aaaaaaaaa000) +endif + +ifeq "$(ARCH)" "x86_64" + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x0000555555554000) +endif + +ifeq "$(ARCH)" "x86" + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(TEST_BIN) LLVMFuzzerTestOneInput 0x56555000) +endif + +.PHONY: all clean qemu frida hook sqlite + +all: $(TEST_BIN) + make -C $(ROOT)frida_mode/ + +32: + CXXFLAGS="-m32" LDFLAGS="-m32" ARCH="x86" make all + +$(BUILD_DIR): + mkdir -p $@ + +########## SQLITE ####### + +$(AFLPP_DRIVER): + make -C $(ROOT) + +$(SQLITE_BUILD_DIR): | $(BUILD_DIR) + mkdir $@ + +$(SQLITE_BUILD_DIR)sqlite3.tar.gz: | $(SQLITE_BUILD_DIR) + curl 'https://sqlite.org/src/tarball/sqlite.tar.gz?r=c78cbf2e86850cc6' -o $@ + +$(SQLITE_BUILD_SRC_DIR): $(SQLITE_BUILD_DIR)sqlite3.tar.gz + mkdir -p $@ + tar xzvf $< --strip-components 1 -C $@ + +$(SQLITE_TEST_DIR): | $(SQLITE_BUILD_SRC_DIR) + mkdir -p $@ + find $(SQLITE_BUILD_SRC_DIR) -name "*.test" | xargs -L1 -I%% cp -v %% $@ + +$(SQLITE_BUILD_SRC_DIR)Makefile: | $(SQLITE_BUILD_SRC_DIR) + cd $(SQLITE_BUILD_SRC_DIR) && \ + CFLAGS="$(SQLITE_CFLAGS)" \ + ASAN_OPTIONS=detect_leaks=0 \ + ./configure + +$(SQLITE_BUILD_SRC_DIR).libs/libsqlite3.so: $(SQLITE_BUILD_SRC_DIR)Makefile + CFLAGS="$(SQLITE_CFLAGS)" \ + ASAN_OPTIONS=detect_leaks=0 \ + make -C $(SQLITE_BUILD_SRC_DIR) -j $(shell nproc) + +$(SQLITE_BUILD_SRC_DIR)sqlite3.o: $(SQLITE_BUILD_SRC_DIR).libs/libsqlite3.so + CFLAGS="$(SQLITE_CFLAGS)" \ + ASAN_OPTIONS=detect_leaks=0 \ + make -C $(SQLITE_BUILD_SRC_DIR) -j $(shell nproc) sqlite3.c + +$(SQLITE_BUILD_DIR)ossfuzz.o: $(SQLITE_BUILD_SRC_DIR)sqlite3.o + $(CC) -I $(SQLITE_BUILD_SRC_DIR) -c $(SQLITE_BUILD_SRC_DIR)test/ossfuzz.c -o $@ + +$(TEST_BIN): $(SQLITE_BUILD_DIR)ossfuzz.o + $(CXX) -o $(TEST_BIN) \ + $(SQLITE_BUILD_DIR)ossfuzz.o \ + $(SQLITE_BUILD_SRC_DIR)sqlite3.o \ + $(AFLPP_DRIVER) \ + -l pthread \ + -l dl + +sqlite: $(SQLITE_TEST_DIR) $(TEST_BIN) + +########## DUMMY ####### + +$(AFLPP_DRIVER_DUMMY_INPUT): | $(SQLITE_TEST_DIR) + truncate -s 1M $@ + +###### TEST DATA ####### + +clean: + rm -rf $(BUILD_DIR) + +qemu: $(TEST_BIN) $(AFLPP_QEMU_DRIVER_HOOK_OBJ) $(AFLPP_DRIVER_DUMMY_INPUT) | $(SQLITE_TEST_DIR) + AFL_QEMU_PERSISTENT_CNT=1000000 \ + AFL_QEMU_PERSISTENT_HOOK=$(AFLPP_QEMU_DRIVER_HOOK_OBJ) \ + AFL_ENTRYPOINT=$(AFL_QEMU_PERSISTENT_ADDR) \ + AFL_QEMU_PERSISTENT_ADDR=$(AFL_QEMU_PERSISTENT_ADDR) \ + AFL_QEMU_PERSISTENT_GPR=1 \ + $(ROOT)afl-fuzz \ + -D \ + -V 30 \ + -Q \ + -i $(SQLITE_TEST_DIR) \ + -o $(QEMU_OUT) \ + -- \ + $(TEST_BIN) $(AFLPP_DRIVER_DUMMY_INPUT) + +frida: $(TEST_BIN) $(AFLPP_FRIDA_DRIVER_HOOK_OBJ) $(AFLPP_DRIVER_DUMMY_INPUT) | $(SQLITE_TEST_DIR) + AFL_FRIDA_PERSISTENT_CNT=1000000 \ + AFL_FRIDA_PERSISTENT_HOOK=$(AFLPP_FRIDA_DRIVER_HOOK_OBJ) \ + AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR) \ + AFL_ENTRYPOINT=$(AFL_FRIDA_PERSISTENT_ADDR) \ + $(ROOT)afl-fuzz \ + -D \ + -V 30 \ + -O \ + -i $(SQLITE_TEST_DIR) \ + -o $(FRIDA_OUT) \ + -- \ + $(TEST_BIN) $(AFLPP_DRIVER_DUMMY_INPUT) + +debug: + gdb \ + --ex 'set environment LD_PRELOAD=$(ROOT)afl-frida-trace.so' \ + --ex 'set environment AFL_QEMU_DRIVER_NO_HOOK=1' \ + --ex 'set disassembly-flavor intel' \ + --ex 'b main' \ + --ex 'r < $(SQLITE_TEST_DIR)0034ecacd5427aafc6b97413da2053b36de5059f' \ + $(TEST_BIN) diff --git a/frida_mode/test/sqlite/Makefile b/frida_mode/test/sqlite/Makefile new file mode 100644 index 00000000..f83e2992 --- /dev/null +++ b/frida_mode/test/sqlite/Makefile @@ -0,0 +1,17 @@ +all: + @echo trying to use GNU make... + @gmake all || echo please install GNUmake + +32: + @echo trying to use GNU make... + @gmake 32 || echo please install GNUmake + +clean: + @gmake clean + +frida: + @gmake frida + +debug: + @gmake debug + diff --git a/frida_mode/test/testinstr/GNUmakefile b/frida_mode/test/testinstr/GNUmakefile index a35073ab..3701ddc8 100644 --- a/frida_mode/test/testinstr/GNUmakefile +++ b/frida_mode/test/testinstr/GNUmakefile @@ -53,6 +53,13 @@ frida: $(TESTINSTBIN) $(TESTINSTR_DATA_FILE) $(TESTINSTBIN) @@ debug: + echo $(AFL_FRIDA_PERSISTENT_ADDR) + gdb \ + --ex 'set environment LD_PRELOAD=$(ROOT)afl-frida-trace.so' \ + --ex 'set disassembly-flavor intel' \ + --args $(TESTINSTBIN) $(TESTINSTR_DATA_FILE) + +debug: gdb \ --ex 'set environment LD_PRELOAD=$(ROOT)afl-frida-trace.so' \ --ex 'set disassembly-flavor intel' \ diff --git a/frida_mode/test/unstable/GNUmakefile b/frida_mode/test/unstable/GNUmakefile index fed417a3..938d7c17 100644 --- a/frida_mode/test/unstable/GNUmakefile +++ b/frida_mode/test/unstable/GNUmakefile @@ -22,18 +22,20 @@ ifeq "$(ARCH)" "i686" endif endif -AFL_QEMU_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(UNSTABLE_BIN) -s run_test -b 0x4000000000) +GET_SYMBOL_ADDR:=$(ROOT)frida_mode/util/get_symbol_addr.sh + +AFL_QEMU_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(UNSTABLE_BIN) run_test 0x4000000000) ifeq "$(ARCH)" "aarch64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(UNSTABLE_BIN) -s run_test -b 0x0000aaaaaaaaa000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(UNSTABLE_BIN) run_test 0x0000aaaaaaaaa000) endif ifeq "$(ARCH)" "x86_64" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(UNSTABLE_BIN) -s run_test -b 0x0000555555554000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(UNSTABLE_BIN) run_test 0x0000555555554000) endif ifeq "$(ARCH)" "x86" - AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(UNSTABLE_BIN) -s run_test -b 0x56555000) + AFL_FRIDA_PERSISTENT_ADDR=$(shell $(GET_SYMBOL_ADDR) $(UNSTABLE_BIN) run_test 0x56555000) endif .PHONY: all 32 clean qemu frida diff --git a/frida_mode/test/unstable/get_symbol_addr.py b/frida_mode/test/unstable/get_symbol_addr.py deleted file mode 100755 index 1c46e010..00000000 --- a/frida_mode/test/unstable/get_symbol_addr.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/python3 -import argparse -from elftools.elf.elffile import ELFFile - -def process_file(file, symbol, base): - with open(file, 'rb') as f: - elf = ELFFile(f) - symtab = elf.get_section_by_name('.symtab') - mains = symtab.get_symbol_by_name(symbol) - if len(mains) != 1: - print ("Failed to find main") - return 1 - - main_addr = mains[0]['st_value'] - main = base + main_addr - print ("0x%016x" % main) - return 0 - -def hex_value(x): - return int(x, 16) - -def main(): - parser = argparse.ArgumentParser(description='Process some integers.') - parser.add_argument('-f', '--file', dest='file', type=str, - help='elf file name', required=True) - parser.add_argument('-s', '--symbol', dest='symbol', type=str, - help='symbol name', required=True) - parser.add_argument('-b', '--base', dest='base', type=hex_value, - help='elf base address', required=True) - - args = parser.parse_args() - return process_file (args.file, args.symbol, args.base) - -if __name__ == "__main__": - ret = main() - exit(ret) diff --git a/frida_mode/ts/lib/afl.ts b/frida_mode/ts/lib/afl.ts index 6da7fabc..6326c099 100644 --- a/frida_mode/ts/lib/afl.ts +++ b/frida_mode/ts/lib/afl.ts @@ -120,6 +120,13 @@ class Afl { } /** + * See `AFL_FRIDA_INST_JIT`. + */ + public static setInstrumentJit(): void { + Afl.jsApiSetInstrumentJit(); + } + + /** * See `AFL_INST_LIBS`. */ public static setInstrumentLibraries(): void { @@ -133,6 +140,13 @@ class Afl { Afl.jsApiSetInstrumentNoOptimize(); } + /* + * See `AFL_FRIDA_INST_SEED` + */ + public static setInstrumentSeed(seed: NativePointer): void { + Afl.jsApiSetInstrumentSeed(seed); + } + /** * See `AFL_FRIDA_INST_TRACE_UNIQUE`. */ @@ -273,6 +287,11 @@ class Afl { "void", ["pointer"]); + private static readonly jsApiSetInstrumentJit = Afl.jsApiGetFunction( + "js_api_set_instrument_jit", + "void", + []); + private static readonly jsApiSetInstrumentLibraries = Afl.jsApiGetFunction( "js_api_set_instrument_libraries", "void", @@ -283,6 +302,11 @@ class Afl { "void", []); + private static readonly jsApiSetInstrumentSeed = Afl.jsApiGetFunction( + "js_api_set_instrument_seed", + "void", + ["uint64"]); + private static readonly jsApiSetInstrumentTrace = Afl.jsApiGetFunction( "js_api_set_instrument_trace", "void", @@ -371,3 +395,5 @@ class Afl { } } + +export { Afl }; diff --git a/frida_mode/ts/package.json b/frida_mode/ts/package.json index 47b693ed..191eb597 100644 --- a/frida_mode/ts/package.json +++ b/frida_mode/ts/package.json @@ -1,32 +1,32 @@ { - "name": "@worksbutnottested/aflplusplus-frida", - "version": "1.0.0", - "description": "AFLplusplus Frida Mode", - "main": "./dist/frida.js", - "types": "./dist/frida.d.ts", - "files": [ - "/dist/" - ], - "repository": { - "type": "git", - "url": "git@github.com:worksbutnottested/AFLplusplus.git" - }, - "publishConfig": { - "cache": "~/.npm", - "registry": "https://npm.pkg.github.com/@worksbutnottested" - }, - "scripts": { - "prepare": "npm run build", - "build": "tsc", - "lint": "tslint -p tslint.json" - }, - "devDependencies": { - "@types/node": "^14.14.2", - "typescript": "^4.0.3", - "typescript-tslint-plugin": "^0.5.5", - "tslint": "^6.1.3" - }, - "dependencies": { - "@types/frida-gum": "^16.2.0" - } + "name": "@worksbutnottested/aflplusplus-frida", + "version": "1.0.1", + "description": "AFLplusplus Frida Mode", + "main": "./dist/afl.js", + "types": "./dist/afl.d.ts", + "files": [ + "/dist/" + ], + "repository": { + "type": "git", + "url": "git@github.com:worksbutnottested/AFLplusplus.git" + }, + "publishConfig": { + "cache": "~/.npm", + "registry": "https://npm.pkg.github.com/@worksbutnottested" + }, + "scripts": { + "prepare": "npm run build", + "build": "tsc", + "lint": "tslint -p tslint.json" + }, + "devDependencies": { + "@types/node": "^14.14.2", + "typescript": "^4.0.3", + "typescript-tslint-plugin": "^0.5.5", + "tslint": "^6.1.3" + }, + "dependencies": { + "@types/frida-gum": "^16.2.0" } +} diff --git a/frida_mode/util/bin2c.c b/frida_mode/util/bin2c.c new file mode 100644 index 00000000..899d0101 --- /dev/null +++ b/frida_mode/util/bin2c.c @@ -0,0 +1,117 @@ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +void fatal(char *msg) { + + perror(msg); + exit(1); + +} + +void bin2c_write(char *name, char *output, unsigned char *buff, size_t size) { + + int fd = open(output, O_CREAT | O_WRONLY | O_TRUNC, 00660); + if (fd < 0) { fatal("open"); } + + /* Write the array definition */ + dprintf(fd, "unsigned char %s[] = {\n", name); + + /* 12 bytes per row, just like xxd means we fit an 80 character width */ + for (size_t i = 0; i < size; i += 12) { + + for (size_t j = 0; j < 12; j++) { + + size_t idx = i + j; + + /* If we get to the end of the input, then break */ + if (idx >= size) { break; } + + /* If we are writing the first column, then we need a leading indent */ + if (j == 0) { dprintf(fd, " "); } + + /* Write the hexadecimal byte value */ + dprintf(fd, "0x%02x", buff[idx]); + + /* If we have just written the last byte, then stop */ + if (idx == size - 1) { break; } + + /* + * If we have written the last byte in a row, then follow with a comma + * and a newline + */ + if (j == 11) { + + dprintf(fd, ",\n"); + + /* + * Otherwise, follow with a command and a space + */ + + } else { + + dprintf(fd, ", "); + + } + + } + + } + + /* Write the closing brace for the array */ + dprintf(fd, "\n};\n"); + + /* Write a parameter describing the length of the array */ + dprintf(fd, "unsigned int %s_len = %lu;\n", name, size); + + if (close(fd) < 0) { fatal("close"); } + +} + +void bin2c(char *name, char *input, char *output) { + + int fd = open(input, O_RDONLY); + if (fd < 0) { fatal("open(input)"); } + + size_t size = lseek(fd, 0, SEEK_END); + if (size < 0) { fatal("lseek(SEEK_END)"); } + + if (lseek(fd, 0, SEEK_SET) < 0) { fatal("lseek(SEEK_SET)"); } + + unsigned char *buff = malloc(size); + if (buff == NULL) { fatal("malloc(size)"); } + + if (read(fd, buff, size) != size) { fatal("read(size)"); } + + bin2c_write(name, output, buff, size); + + free(buff); + if (close(fd) < 0) { fatal("close(fd_in)"); } + +} + +int main(int argc, char **argv) { + + if (argc < 4) { + + dprintf(STDERR_FILENO, "%s <name> <input> <output>\n", argv[0]); + return 1; + + } + + char *name = argv[1]; + char *input = argv[2]; + char *output = argv[3]; + + dprintf(STDOUT_FILENO, "bin2c:\n"); + dprintf(STDOUT_FILENO, "\tname: %s\n", name); + dprintf(STDOUT_FILENO, "\tinput: %s\n", input); + dprintf(STDOUT_FILENO, "\toutput: %s\n", output); + + bin2c(name, input, output); + + return 0; + +} + diff --git a/frida_mode/util/get_symbol_addr.sh b/frida_mode/util/get_symbol_addr.sh new file mode 100755 index 00000000..f5d8df91 --- /dev/null +++ b/frida_mode/util/get_symbol_addr.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# set -x +target="$1" +symbol="$2" +base="$3" + +test -z "$target" -o -z "$symbol" -o '!' -e "$target" && exit 0 + +test $(uname -s) = "Darwin" && symbol=_"$symbol" + +file "$target" | grep -q executable && { + nm "$target" | grep -i "T $symbol" | awk '{print"0x"$1}' + exit 0 +} + +hex_base=$(echo "$3" | awk '{sub("^0x","");print $0}' | tr a-f A-F ) +nm "$target" | grep -i "T $symbol" | awk '{print$1}' | tr a-f A-F | \ + xargs echo "ibase=16;obase=10;$hex_base + " | bc | tr A-F a-f | awk '{print "0x"$0}' +exit 0 diff --git a/include/envs.h b/include/envs.h index f89e8e62..26cc250f 100644 --- a/include/envs.h +++ b/include/envs.h @@ -56,9 +56,11 @@ static char *afl_environment_variables[] = { "AFL_FRIDA_DEBUG_MAPS", "AFL_FRIDA_EXCLUDE_RANGES", "AFL_FRIDA_INST_DEBUG_FILE", + "AFL_FRIDA_INST_JIT", "AFL_FRIDA_INST_NO_OPTIMIZE", "AFL_FRIDA_INST_NO_PREFETCH", "AFL_FRIDA_INST_RANGES", + "AFL_FRIDA_INST_SEED", "AFL_FRIDA_INST_TRACE", "AFL_FRIDA_INST_TRACE_UNIQUE", "AFL_FRIDA_JS_SCRIPT", diff --git a/instrumentation/SanitizerCoverageLTO.so.cc b/instrumentation/SanitizerCoverageLTO.so.cc index 372af003..91b81910 100644 --- a/instrumentation/SanitizerCoverageLTO.so.cc +++ b/instrumentation/SanitizerCoverageLTO.so.cc @@ -516,6 +516,8 @@ bool ModuleSanitizerCoverage::instrumentModule( for (auto &F : M) { + if (!isInInstrumentList(&F) || !F.size()) { continue; } + for (auto &BB : F) { for (auto &IN : BB) { @@ -759,6 +761,12 @@ bool ModuleSanitizerCoverage::instrumentModule( uint64_t literalLength = Str2.size(); uint64_t optLength = ilen->getZExtValue(); + if (optLength > literalLength + 1) { + + optLength = Str2.length() + 1; + + } + if (literalLength + 1 == optLength) { Str2.append("\0", 1); // add null byte @@ -862,6 +870,12 @@ bool ModuleSanitizerCoverage::instrumentModule( uint64_t literalLength = optLen; optLen = ilen->getZExtValue(); + if (optLen > thestring.length() + 1) { + + optLen = thestring.length() + 1; + + } + if (optLen < 2) { continue; } if (literalLength + 1 == optLen) { // add null byte thestring.append("\0", 1); diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c index 3f518b55..b01ea987 100644 --- a/instrumentation/afl-compiler-rt.o.c +++ b/instrumentation/afl-compiler-rt.o.c @@ -299,8 +299,9 @@ static void __afl_map_shm(void) { if (!getenv("AFL_QUIET")) fprintf(stderr, - "Warning: AFL++ tools will need to set AFL_MAP_SIZE to %u " - "to be able to run this instrumented program!\n", + "Warning: AFL++ tools might need to set AFL_MAP_SIZE to %u " + "to be able to run this instrumented program if this " + "crashes!\n", __afl_final_loc); } diff --git a/instrumentation/afl-llvm-dict2file.so.cc b/instrumentation/afl-llvm-dict2file.so.cc index e2b44b21..9daa75a8 100644 --- a/instrumentation/afl-llvm-dict2file.so.cc +++ b/instrumentation/afl-llvm-dict2file.so.cc @@ -154,6 +154,7 @@ bool AFLdict2filePass::runOnModule(Module &M) { for (auto &F : M) { if (isIgnoreFunction(&F)) continue; + if (!isInInstrumentList(&F) || !F.size()) { continue; } /* Some implementation notes. * @@ -428,6 +429,12 @@ bool AFLdict2filePass::runOnModule(Module &M) { uint64_t literalLength = Str2.length(); uint64_t optLength = ilen->getZExtValue(); + if (optLength > literalLength + 1) { + + optLength = Str2.length() + 1; + + } + if (literalLength + 1 == optLength) { Str2.append("\0", 1); // add null byte @@ -534,7 +541,12 @@ bool AFLdict2filePass::runOnModule(Module &M) { uint64_t literalLength = optLen; optLen = ilen->getZExtValue(); - if (optLen > thestring.length()) { optLen = thestring.length(); } + if (optLen > thestring.length() + 1) { + + optLen = thestring.length() + 1; + + } + if (optLen < 2) { continue; } if (literalLength + 1 == optLen) { // add null byte thestring.append("\0", 1); diff --git a/instrumentation/afl-llvm-lto-instrumentation.so.cc b/instrumentation/afl-llvm-lto-instrumentation.so.cc index bb9b9279..263d947d 100644 --- a/instrumentation/afl-llvm-lto-instrumentation.so.cc +++ b/instrumentation/afl-llvm-lto-instrumentation.so.cc @@ -546,6 +546,12 @@ bool AFLLTOPass::runOnModule(Module &M) { uint64_t literalLength = Str2.size(); uint64_t optLength = ilen->getZExtValue(); + if (optLength > literalLength + 1) { + + optLength = Str2.length() + 1; + + } + if (literalLength + 1 == optLength) { Str2.append("\0", 1); // add null byte @@ -649,6 +655,7 @@ bool AFLLTOPass::runOnModule(Module &M) { uint64_t literalLength = optLen; optLen = ilen->getZExtValue(); + if (optLen > literalLength + 1) { optLen = literalLength + 1; } if (optLen < 2) { continue; } if (literalLength + 1 == optLen) { // add null byte thestring.append("\0", 1); diff --git a/instrumentation/afl-llvm-pass.so.cc b/instrumentation/afl-llvm-pass.so.cc index 94b77f7d..ecf28f31 100644 --- a/instrumentation/afl-llvm-pass.so.cc +++ b/instrumentation/afl-llvm-pass.so.cc @@ -438,9 +438,9 @@ bool AFLCoverage::runOnModule(Module &M) { fprintf(stderr, "FUNCTION: %s (%zu)\n", F.getName().str().c_str(), F.size()); - if (!isInInstrumentList(&F)) continue; + if (!isInInstrumentList(&F)) { continue; } - if (F.size() < function_minimum_size) continue; + if (F.size() < function_minimum_size) { continue; } std::list<Value *> todo; for (auto &BB : F) { diff --git a/instrumentation/cmplog-instructions-pass.cc b/instrumentation/cmplog-instructions-pass.cc index ad334d3b..0562c5b2 100644 --- a/instrumentation/cmplog-instructions-pass.cc +++ b/instrumentation/cmplog-instructions-pass.cc @@ -104,7 +104,6 @@ Iterator Unique(Iterator first, Iterator last) { bool CmpLogInstructions::hookInstrs(Module &M) { std::vector<Instruction *> icomps; - std::vector<SwitchInst *> switches; LLVMContext & C = M.getContext(); Type * VoidTy = Type::getVoidTy(C); @@ -222,6 +221,18 @@ bool CmpLogInstructions::hookInstrs(Module &M) { FunctionCallee cmplogHookInsN = cN; #endif + GlobalVariable *AFLCmplogPtr = M.getNamedGlobal("__afl_cmp_map"); + + if (!AFLCmplogPtr) { + + AFLCmplogPtr = new GlobalVariable(M, PointerType::get(Int8Ty, 0), false, + GlobalValue::ExternalWeakLinkage, 0, + "__afl_cmp_map"); + + } + + Constant *Null = Constant::getNullValue(PointerType::get(Int8Ty, 0)); + /* iterate over all functions, bbs and instruction and add suitable calls */ for (auto &F : M) { @@ -238,164 +249,6 @@ bool CmpLogInstructions::hookInstrs(Module &M) { } - SwitchInst *switchInst = nullptr; - if ((switchInst = dyn_cast<SwitchInst>(BB.getTerminator()))) { - - if (switchInst->getNumCases() > 1) { switches.push_back(switchInst); } - - } - - } - - } - - } - - // unique the collected switches - switches.erase(Unique(switches.begin(), switches.end()), switches.end()); - - // Instrument switch values for cmplog - if (switches.size()) { - - if (!be_quiet) - errs() << "Hooking " << switches.size() << " switch instructions\n"; - - for (auto &SI : switches) { - - Value * Val = SI->getCondition(); - unsigned int max_size = Val->getType()->getIntegerBitWidth(), cast_size; - unsigned char do_cast = 0; - - if (!SI->getNumCases() || max_size < 16) { - - // if (!be_quiet) errs() << "skip trivial switch..\n"; - continue; - - } - - if (max_size % 8) { - - max_size = (((max_size / 8) + 1) * 8); - do_cast = 1; - - } - - IRBuilder<> IRB(SI->getParent()); - IRB.SetInsertPoint(SI); - - if (max_size > 128) { - - if (!be_quiet) { - - fprintf(stderr, - "Cannot handle this switch bit size: %u (truncating)\n", - max_size); - - } - - max_size = 128; - do_cast = 1; - - } - - // do we need to cast? - switch (max_size) { - - case 8: - case 16: - case 32: - case 64: - case 128: - cast_size = max_size; - break; - default: - cast_size = 128; - do_cast = 1; - - } - - Value *CompareTo = Val; - - if (do_cast) { - - CompareTo = - IRB.CreateIntCast(CompareTo, IntegerType::get(C, cast_size), false); - - } - - for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; - ++i) { - -#if LLVM_VERSION_MAJOR < 5 - ConstantInt *cint = i.getCaseValue(); -#else - ConstantInt *cint = i->getCaseValue(); -#endif - - if (cint) { - - std::vector<Value *> args; - args.push_back(CompareTo); - - Value *new_param = cint; - - if (do_cast) { - - new_param = - IRB.CreateIntCast(cint, IntegerType::get(C, cast_size), false); - - } - - if (new_param) { - - args.push_back(new_param); - ConstantInt *attribute = ConstantInt::get(Int8Ty, 1); - args.push_back(attribute); - if (cast_size != max_size) { - - ConstantInt *bitsize = - ConstantInt::get(Int8Ty, (max_size / 8) - 1); - args.push_back(bitsize); - - } - - switch (cast_size) { - - case 8: - IRB.CreateCall(cmplogHookIns1, args); - break; - case 16: - IRB.CreateCall(cmplogHookIns2, args); - break; - case 32: - IRB.CreateCall(cmplogHookIns4, args); - break; - case 64: - IRB.CreateCall(cmplogHookIns8, args); - break; - case 128: -#ifdef WORD_SIZE_64 - if (max_size == 128) { - - IRB.CreateCall(cmplogHookIns16, args); - - } else { - - IRB.CreateCall(cmplogHookInsN, args); - - } - -#endif - break; - default: - break; - - } - - } - - } - } } @@ -409,8 +262,15 @@ bool CmpLogInstructions::hookInstrs(Module &M) { for (auto &selectcmpInst : icomps) { - IRBuilder<> IRB(selectcmpInst->getParent()); - IRB.SetInsertPoint(selectcmpInst); + IRBuilder<> IRB2(selectcmpInst->getParent()); + IRB2.SetInsertPoint(selectcmpInst); + LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr); + CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null); + auto ThenTerm = + SplitBlockAndInsertIfThen(is_not_null, selectcmpInst, false); + + IRBuilder<> IRB(ThenTerm); Value *op0 = selectcmpInst->getOperand(0); Value *op1 = selectcmpInst->getOperand(1); @@ -601,7 +461,7 @@ bool CmpLogInstructions::hookInstrs(Module &M) { } - if (switches.size() || icomps.size()) + if (icomps.size()) return true; else return false; diff --git a/instrumentation/cmplog-routines-pass.cc b/instrumentation/cmplog-routines-pass.cc index a5992c9a..1e2610f2 100644 --- a/instrumentation/cmplog-routines-pass.cc +++ b/instrumentation/cmplog-routines-pass.cc @@ -184,6 +184,18 @@ bool CmpLogRoutines::hookRtns(Module &M) { FunctionCallee cmplogGccStdC = c4; #endif + GlobalVariable *AFLCmplogPtr = M.getNamedGlobal("__afl_cmp_map"); + + if (!AFLCmplogPtr) { + + AFLCmplogPtr = new GlobalVariable(M, PointerType::get(Int8Ty, 0), false, + GlobalValue::ExternalWeakLinkage, 0, + "__afl_cmp_map"); + + } + + Constant *Null = Constant::getNullValue(PointerType::get(Int8Ty, 0)); + /* iterate over all functions, bbs and instruction and add suitable calls */ for (auto &F : M) { @@ -289,8 +301,15 @@ bool CmpLogRoutines::hookRtns(Module &M) { Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1); - IRBuilder<> IRB(callInst->getParent()); - IRB.SetInsertPoint(callInst); + IRBuilder<> IRB2(callInst->getParent()); + IRB2.SetInsertPoint(callInst); + + LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr); + CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null); + auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, callInst, false); + + IRBuilder<> IRB(ThenTerm); std::vector<Value *> args; Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy); @@ -308,8 +327,15 @@ bool CmpLogRoutines::hookRtns(Module &M) { Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1); - IRBuilder<> IRB(callInst->getParent()); - IRB.SetInsertPoint(callInst); + IRBuilder<> IRB2(callInst->getParent()); + IRB2.SetInsertPoint(callInst); + + LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr); + CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null); + auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, callInst, false); + + IRBuilder<> IRB(ThenTerm); std::vector<Value *> args; Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy); @@ -327,8 +353,15 @@ bool CmpLogRoutines::hookRtns(Module &M) { Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1); - IRBuilder<> IRB(callInst->getParent()); - IRB.SetInsertPoint(callInst); + IRBuilder<> IRB2(callInst->getParent()); + IRB2.SetInsertPoint(callInst); + + LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr); + CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null); + auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, callInst, false); + + IRBuilder<> IRB(ThenTerm); std::vector<Value *> args; Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy); @@ -346,8 +379,15 @@ bool CmpLogRoutines::hookRtns(Module &M) { Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1); - IRBuilder<> IRB(callInst->getParent()); - IRB.SetInsertPoint(callInst); + IRBuilder<> IRB2(callInst->getParent()); + IRB2.SetInsertPoint(callInst); + + LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr); + CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null); + auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, callInst, false); + + IRBuilder<> IRB(ThenTerm); std::vector<Value *> args; Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy); @@ -365,8 +405,15 @@ bool CmpLogRoutines::hookRtns(Module &M) { Value *v1P = callInst->getArgOperand(0), *v2P = callInst->getArgOperand(1); - IRBuilder<> IRB(callInst->getParent()); - IRB.SetInsertPoint(callInst); + IRBuilder<> IRB2(callInst->getParent()); + IRB2.SetInsertPoint(callInst); + + LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr); + CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null); + auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, callInst, false); + + IRBuilder<> IRB(ThenTerm); std::vector<Value *> args; Value * v1Pcasted = IRB.CreatePointerCast(v1P, i8PtrTy); diff --git a/instrumentation/cmplog-switches-pass.cc b/instrumentation/cmplog-switches-pass.cc new file mode 100644 index 00000000..c42d44fe --- /dev/null +++ b/instrumentation/cmplog-switches-pass.cc @@ -0,0 +1,414 @@ +/* + american fuzzy lop++ - LLVM CmpLog instrumentation + -------------------------------------------------- + + Written by Andrea Fioraldi <andreafioraldi@gmail.com> + + Copyright 2015, 2016 Google Inc. All rights reserved. + Copyright 2019-2020 AFLplusplus Project. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <iostream> +#include <list> +#include <string> +#include <fstream> +#include <sys/time.h> + +#include "llvm/Config/llvm-config.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ValueTracking.h" + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) + #include "llvm/IR/Verifier.h" + #include "llvm/IR/DebugInfo.h" +#else + #include "llvm/Analysis/Verifier.h" + #include "llvm/DebugInfo.h" + #define nullptr 0 +#endif + +#include <set> +#include "afl-llvm-common.h" + +using namespace llvm; + +namespace { + +class CmpLogInstructions : public ModulePass { + + public: + static char ID; + CmpLogInstructions() : ModulePass(ID) { + + initInstrumentList(); + + } + + bool runOnModule(Module &M) override; + +#if LLVM_VERSION_MAJOR < 4 + const char *getPassName() const override { + +#else + StringRef getPassName() const override { + +#endif + return "cmplog instructions"; + + } + + private: + bool hookInstrs(Module &M); + +}; + +} // namespace + +char CmpLogInstructions::ID = 0; + +template <class Iterator> +Iterator Unique(Iterator first, Iterator last) { + + while (first != last) { + + Iterator next(first); + last = std::remove(++next, last, *first); + first = next; + + } + + return last; + +} + +bool CmpLogInstructions::hookInstrs(Module &M) { + + std::vector<SwitchInst *> switches; + LLVMContext & C = M.getContext(); + + Type * VoidTy = Type::getVoidTy(C); + IntegerType *Int8Ty = IntegerType::getInt8Ty(C); + IntegerType *Int16Ty = IntegerType::getInt16Ty(C); + IntegerType *Int32Ty = IntegerType::getInt32Ty(C); + IntegerType *Int64Ty = IntegerType::getInt64Ty(C); + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c1 = M.getOrInsertFunction("__cmplog_ins_hook1", VoidTy, Int8Ty, Int8Ty, + Int8Ty +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogHookIns1 = cast<Function>(c1); +#else + FunctionCallee cmplogHookIns1 = c1; +#endif + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c2 = M.getOrInsertFunction("__cmplog_ins_hook2", VoidTy, Int16Ty, Int16Ty, + Int8Ty +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogHookIns2 = cast<Function>(c2); +#else + FunctionCallee cmplogHookIns2 = c2; +#endif + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c4 = M.getOrInsertFunction("__cmplog_ins_hook4", VoidTy, Int32Ty, Int32Ty, + Int8Ty +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogHookIns4 = cast<Function>(c4); +#else + FunctionCallee cmplogHookIns4 = c4; +#endif + +#if LLVM_VERSION_MAJOR < 9 + Constant * +#else + FunctionCallee +#endif + c8 = M.getOrInsertFunction("__cmplog_ins_hook8", VoidTy, Int64Ty, Int64Ty, + Int8Ty +#if LLVM_VERSION_MAJOR < 5 + , + NULL +#endif + ); +#if LLVM_VERSION_MAJOR < 9 + Function *cmplogHookIns8 = cast<Function>(c8); +#else + FunctionCallee cmplogHookIns8 = c8; +#endif + + GlobalVariable *AFLCmplogPtr = M.getNamedGlobal("__afl_cmp_map"); + + if (!AFLCmplogPtr) { + + AFLCmplogPtr = new GlobalVariable(M, PointerType::get(Int8Ty, 0), false, + GlobalValue::ExternalWeakLinkage, 0, + "__afl_cmp_map"); + + } + + Constant *Null = Constant::getNullValue(PointerType::get(Int8Ty, 0)); + + /* iterate over all functions, bbs and instruction and add suitable calls */ + for (auto &F : M) { + + if (!isInInstrumentList(&F)) continue; + + for (auto &BB : F) { + + SwitchInst *switchInst = nullptr; + if ((switchInst = dyn_cast<SwitchInst>(BB.getTerminator()))) { + + if (switchInst->getNumCases() > 1) { switches.push_back(switchInst); } + + } + + } + + } + + // unique the collected switches + switches.erase(Unique(switches.begin(), switches.end()), switches.end()); + + // Instrument switch values for cmplog + if (switches.size()) { + + if (!be_quiet) + errs() << "Hooking " << switches.size() << " switch instructions\n"; + + for (auto &SI : switches) { + + Value * Val = SI->getCondition(); + unsigned int max_size = Val->getType()->getIntegerBitWidth(), cast_size; + unsigned char do_cast = 0; + + if (!SI->getNumCases() || max_size < 16) { + + // if (!be_quiet) errs() << "skip trivial switch..\n"; + continue; + + } + + if (max_size % 8) { + + max_size = (((max_size / 8) + 1) * 8); + do_cast = 1; + + } + + IRBuilder<> IRB2(SI->getParent()); + IRB2.SetInsertPoint(SI); + + LoadInst *CmpPtr = IRB2.CreateLoad(AFLCmplogPtr); + CmpPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); + auto is_not_null = IRB2.CreateICmpNE(CmpPtr, Null); + auto ThenTerm = SplitBlockAndInsertIfThen(is_not_null, SI, false); + + IRBuilder<> IRB(ThenTerm); + + if (max_size > 128) { + + if (!be_quiet) { + + fprintf(stderr, + "Cannot handle this switch bit size: %u (truncating)\n", + max_size); + + } + + max_size = 128; + do_cast = 1; + + } + + // do we need to cast? + switch (max_size) { + + case 8: + case 16: + case 32: + case 64: + case 128: + cast_size = max_size; + break; + default: + cast_size = 128; + do_cast = 1; + + } + + Value *CompareTo = Val; + + if (do_cast) { + + CompareTo = + IRB.CreateIntCast(CompareTo, IntegerType::get(C, cast_size), false); + + } + + for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; + ++i) { + +#if LLVM_VERSION_MAJOR < 5 + ConstantInt *cint = i.getCaseValue(); +#else + ConstantInt *cint = i->getCaseValue(); +#endif + + if (cint) { + + std::vector<Value *> args; + args.push_back(CompareTo); + + Value *new_param = cint; + + if (do_cast) { + + new_param = + IRB.CreateIntCast(cint, IntegerType::get(C, cast_size), false); + + } + + if (new_param) { + + args.push_back(new_param); + ConstantInt *attribute = ConstantInt::get(Int8Ty, 1); + args.push_back(attribute); + if (cast_size != max_size) { + + ConstantInt *bitsize = + ConstantInt::get(Int8Ty, (max_size / 8) - 1); + args.push_back(bitsize); + + } + + switch (cast_size) { + + case 8: + IRB.CreateCall(cmplogHookIns1, args); + break; + case 16: + IRB.CreateCall(cmplogHookIns2, args); + break; + case 32: + IRB.CreateCall(cmplogHookIns4, args); + break; + case 64: + IRB.CreateCall(cmplogHookIns8, args); + break; + case 128: +#ifdef WORD_SIZE_64 + if (max_size == 128) { + + IRB.CreateCall(cmplogHookIns16, args); + + } else { + + IRB.CreateCall(cmplogHookInsN, args); + + } + +#endif + break; + default: + break; + + } + + } + + } + + } + + } + + } + + if (switches.size()) + return true; + else + return false; + +} + +bool CmpLogInstructions::runOnModule(Module &M) { + + if (getenv("AFL_QUIET") == NULL) + printf("Running cmplog-switches-pass by andreafioraldi@gmail.com\n"); + else + be_quiet = 1; + hookInstrs(M); + verifyModule(M); + + return true; + +} + +static void registerCmpLogInstructionsPass(const PassManagerBuilder &, + legacy::PassManagerBase &PM) { + + auto p = new CmpLogInstructions(); + PM.add(p); + +} + +static RegisterStandardPasses RegisterCmpLogInstructionsPass( + PassManagerBuilder::EP_OptimizerLast, registerCmpLogInstructionsPass); + +static RegisterStandardPasses RegisterCmpLogInstructionsPass0( + PassManagerBuilder::EP_EnabledOnOptLevel0, registerCmpLogInstructionsPass); + +#if LLVM_VERSION_MAJOR >= 11 +static RegisterStandardPasses RegisterCmpLogInstructionsPassLTO( + PassManagerBuilder::EP_FullLinkTimeOptimizationLast, + registerCmpLogInstructionsPass); +#endif + diff --git a/instrumentation/compare-transform-pass.so.cc b/instrumentation/compare-transform-pass.so.cc index 3ecba4e6..f5dd4a53 100644 --- a/instrumentation/compare-transform-pass.so.cc +++ b/instrumentation/compare-transform-pass.so.cc @@ -313,27 +313,18 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, ConstantInt *ilen = dyn_cast<ConstantInt>(op2); if (ilen) { - uint64_t len = ilen->getZExtValue(); // if len is zero this is a pointless call but allow real // implementation to worry about that - if (len < 2) continue; + if (ilen->getZExtValue() < 2) { continue; } - if (isMemcmp) { - - // if size of compare is larger than constant string this is - // likely a bug but allow real implementation to worry about - // that - uint64_t literalLength = HasStr1 ? Str1.size() : Str2.size(); - if (literalLength + 1 < ilen->getZExtValue()) continue; - - } - - } else if (isMemcmp) + } else if (isMemcmp) { // this *may* supply a len greater than the constant string at // runtime so similarly we don't want to have to handle that continue; + } + } calls.push_back(callInst); @@ -421,7 +412,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, } if (TmpConstStr.length() < 2 || - (TmpConstStr.length() == 2 && !TmpConstStr[1])) { + (TmpConstStr.length() == 2 && TmpConstStr[1] == 0)) { continue; diff --git a/src/afl-cc.c b/src/afl-cc.c index 980e5d86..9899f973 100644 --- a/src/afl-cc.c +++ b/src/afl-cc.c @@ -514,14 +514,14 @@ static void edit_params(u32 argc, char **argv, char **envp) { unsetenv("AFL_LD"); unsetenv("AFL_LD_CALLER"); + if (cmplog_mode) { if (lto_mode && !have_c) { cc_params[cc_par_cnt++] = alloc_printf( - "-Wl,-mllvm=-load=%s/cmplog-routines-pass.so", obj_path); - cc_params[cc_par_cnt++] = alloc_printf( - "-Wl,-mllvm=-load=%s/cmplog-instructions-pass.so", obj_path); + "-Wl,-mllvm=-load=%s/cmplog-switches-pass.so", obj_path); + cc_params[cc_par_cnt++] = alloc_printf( "-Wl,-mllvm=-load=%s/split-switches-pass.so", obj_path); @@ -531,13 +531,7 @@ static void edit_params(u32 argc, char **argv, char **envp) { cc_params[cc_par_cnt++] = "-load"; cc_params[cc_par_cnt++] = "-Xclang"; cc_params[cc_par_cnt++] = - alloc_printf("%s/cmplog-routines-pass.so", obj_path); - - cc_params[cc_par_cnt++] = "-Xclang"; - cc_params[cc_par_cnt++] = "-load"; - cc_params[cc_par_cnt++] = "-Xclang"; - cc_params[cc_par_cnt++] = - alloc_printf("%s/cmplog-instructions-pass.so", obj_path); + alloc_printf("%s/cmplog-switches-pass.so", obj_path); // reuse split switches from laf cc_params[cc_par_cnt++] = "-Xclang"; @@ -643,6 +637,33 @@ static void edit_params(u32 argc, char **argv, char **envp) { } + if (cmplog_mode) { + + if (lto_mode && !have_c) { + + cc_params[cc_par_cnt++] = alloc_printf( + "-Wl,-mllvm=-load=%s/cmplog-instructions-pass.so", obj_path); + cc_params[cc_par_cnt++] = alloc_printf( + "-Wl,-mllvm=-load=%s/cmplog-routines-pass.so", obj_path); + + } else { + + cc_params[cc_par_cnt++] = "-Xclang"; + cc_params[cc_par_cnt++] = "-load"; + cc_params[cc_par_cnt++] = "-Xclang"; + cc_params[cc_par_cnt++] = + alloc_printf("%s/cmplog-instructions-pass.so", obj_path); + + cc_params[cc_par_cnt++] = "-Xclang"; + cc_params[cc_par_cnt++] = "-load"; + cc_params[cc_par_cnt++] = "-Xclang"; + cc_params[cc_par_cnt++] = + alloc_printf("%s/cmplog-routines-pass.so", obj_path); + + } + + } + // cc_params[cc_par_cnt++] = "-Qunused-arguments"; // in case LLVM is installed not via a package manager or "make install" @@ -746,6 +767,11 @@ static void edit_params(u32 argc, char **argv, char **envp) { cc_params[cc_par_cnt++] = afllib; +#ifdef __APPLE__ + cc_params[cc_par_cnt++] = "-undefined"; + cc_params[cc_par_cnt++] = "dynamic_lookup"; +#endif + } continue; @@ -768,7 +794,9 @@ static void edit_params(u32 argc, char **argv, char **envp) { if (!strcmp(cur, "-E")) preprocessor_only = 1; if (!strcmp(cur, "-shared")) shared_linking = 1; if (!strcmp(cur, "-Wl,-r")) partial_linking = 1; - if (!strcmp(cur, "-Wl,-i")) partial_linking = 1; + if (!strcmp(cur, "-Wl,--relocatable")) partial_linking = 1; + if (!strcmp(cur, "-r")) partial_linking = 1; + if (!strcmp(cur, "--relocatable")) partial_linking = 1; if (!strcmp(cur, "-c")) have_c = 1; if (!strncmp(cur, "-O", 2)) have_o = 1; diff --git a/src/afl-fuzz-bitmap.c b/src/afl-fuzz-bitmap.c index 97f10e6f..0a9242a5 100644 --- a/src/afl-fuzz-bitmap.c +++ b/src/afl-fuzz-bitmap.c @@ -551,19 +551,18 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) { } - if (cksum) - afl->queue_top->exec_cksum = cksum; - else - cksum = afl->queue_top->exec_cksum = - hash64(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST); - - if (afl->schedule >= FAST && afl->schedule <= RARE) { + /* AFLFast schedule? update the new queue entry */ + if (cksum) { afl->queue_top->n_fuzz_entry = cksum % N_FUZZ_SIZE; afl->n_fuzz[afl->queue_top->n_fuzz_entry] = 1; } + /* due to classify counts we have to recalculate the checksum */ + cksum = afl->queue_top->exec_cksum = + hash64(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST); + /* Try to calibrate inline; this also calls update_bitmap_score() when successful. */ diff --git a/src/afl-fuzz-mutators.c b/src/afl-fuzz-mutators.c index e27d6fae..79a47744 100644 --- a/src/afl-fuzz-mutators.c +++ b/src/afl-fuzz-mutators.c @@ -393,6 +393,7 @@ u8 trim_case_custom(afl_state_t *afl, struct queue_entry *q, u8 *in_buf, if (afl->stop_soon || fault == FSRV_RUN_ERROR) { goto abort_trimming; } + classify_counts(&afl->fsrv); cksum = hash64(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST); } diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index f03249e9..7274f679 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -2057,7 +2057,7 @@ havoc_stage: temp_len = new_len; if (out_buf != custom_havoc_buf) { - afl_realloc(AFL_BUF_PARAM(out), temp_len); + out_buf = afl_realloc(AFL_BUF_PARAM(out), temp_len); if (unlikely(!afl->out_buf)) { PFATAL("alloc"); } memcpy(out_buf, custom_havoc_buf, temp_len); @@ -2102,7 +2102,7 @@ havoc_stage: case 8 ... 9: { - /* Set word to interesting value, randomly choosing endian. */ + /* Set word to interesting value, little endian. */ if (temp_len < 2) { break; } @@ -2119,7 +2119,7 @@ havoc_stage: case 10 ... 11: { - /* Set word to interesting value, randomly choosing endian. */ + /* Set word to interesting value, big endian. */ if (temp_len < 2) { break; } @@ -2136,7 +2136,7 @@ havoc_stage: case 12 ... 13: { - /* Set dword to interesting value, randomly choosing endian. */ + /* Set dword to interesting value, little endian. */ if (temp_len < 4) { break; } @@ -2153,7 +2153,7 @@ havoc_stage: case 14 ... 15: { - /* Set dword to interesting value, randomly choosing endian. */ + /* Set dword to interesting value, big endian. */ if (temp_len < 4) { break; } diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c index d2689c94..b759532c 100644 --- a/src/afl-fuzz-queue.c +++ b/src/afl-fuzz-queue.c @@ -1135,12 +1135,10 @@ inline u8 *queue_testcase_get(afl_state_t *afl, struct queue_entry *q) { do_once = 1; // release unneeded memory - u8 *ptr = ck_realloc( + afl->q_testcase_cache = ck_realloc( afl->q_testcase_cache, (afl->q_testcase_max_cache_entries + 1) * sizeof(size_t)); - if (ptr) { afl->q_testcase_cache = (struct queue_entry **)ptr; } - } /* Cache full. We neet to evict one or more to map one. diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c index 49856a9f..e876beea 100644 --- a/src/afl-fuzz-run.c +++ b/src/afl-fuzz-run.c @@ -413,7 +413,7 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem, // note: from_queue seems to only be set during initialization if (afl->afl_env.afl_no_ui || from_queue) { - WARNF("instability detected during calibration\n"); + WARNF("instability detected during calibration"); } else if (afl->debug) { diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index bd9b6691..0c7b6e42 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -143,7 +143,7 @@ static void usage(u8 *argv0, int more_help) { " -x dict_file - fuzzer dictionary (see README.md, specify up to 4 " "times)\n\n" - "Testing settings:\n" + "Test settings:\n" " -s seed - use a fixed seed for the RNG\n" " -V seconds - fuzz for a specified time then terminate\n" " -E execs - fuzz for an approx. no. of total executions then " @@ -158,7 +158,7 @@ static void usage(u8 *argv0, int more_help) { " -F path - sync to a foreign fuzzer queue directory (requires " "-M, can\n" " be specified up to %u times)\n" - " -d - skip deterministic fuzzing in -M mode\n" + // " -d - skip deterministic fuzzing in -M mode\n" " -T text - text banner to show on the screen\n" " -I command - execute this command/script when a new crash is " "found\n" diff --git a/src/afl-showmap.c b/src/afl-showmap.c index 936d3bc4..5c899e69 100644 --- a/src/afl-showmap.c +++ b/src/afl-showmap.c @@ -401,14 +401,23 @@ static u32 read_file(u8 *in_file) { if (fstat(fd, &st) || !st.st_size) { - WARNF("Zero-sized input file '%s'.", in_file); + if (!be_quiet && !quiet_mode) { + + WARNF("Zero-sized input file '%s'.", in_file); + + } } if (st.st_size > MAX_FILE) { - WARNF("Input file '%s' is too large, only reading %u bytes.", in_file, - MAX_FILE); + if (!be_quiet && !quiet_mode) { + + WARNF("Input file '%s' is too large, only reading %u bytes.", in_file, + MAX_FILE); + + } + in_len = MAX_FILE; } else { @@ -748,7 +757,7 @@ u32 execute_testcases(u8 *dir) { } - if (st.st_size > MAX_FILE && !be_quiet) { + if (st.st_size > MAX_FILE && !be_quiet && !quiet_mode) { WARNF("Test case '%s' is too big (%s, limit is %s), partial reading", fn2, stringify_mem_size(val_buf[0], sizeof(val_buf[0]), st.st_size), @@ -853,7 +862,8 @@ static void usage(u8 *argv0) { "AFL_PRELOAD: LD_PRELOAD / DYLD_INSERT_LIBRARIES settings for target\n" "AFL_PRINT_FILENAMES: If set, the filename currently processed will be " "printed to stdout\n" - "AFL_QUIET: do not print extra informational output\n", + "AFL_QUIET: do not print extra informational output\n" + "AFL_NO_FORKSRV: run target via execve instead of using the forkserver\n", argv0, MEM_LIMIT, doc_path); exit(1); @@ -1097,6 +1107,11 @@ int main(int argc, char **argv_orig, char **envp) { check_environment_vars(envp); + if (getenv("AFL_NO_FORKSRV")) { /* if set, use the fauxserver */ + fsrv->use_fauxsrv = true; + + } + if (getenv("AFL_DEBUG")) { DEBUGF(""); diff --git a/src/afl-tmin.c b/src/afl-tmin.c index 6656712a..2d80abe4 100644 --- a/src/afl-tmin.c +++ b/src/afl-tmin.c @@ -877,12 +877,13 @@ static void usage(u8 *argv0) { " the target was compiled for\n" "AFL_PRELOAD: LD_PRELOAD / DYLD_INSERT_LIBRARIES settings for target\n" "AFL_TMIN_EXACT: require execution paths to match for crashing inputs\n" + "AFL_NO_FORKSRV: run target via execve instead of using the forkserver\n" "ASAN_OPTIONS: custom settings for ASAN\n" " (must contain abort_on_error=1 and symbolize=0)\n" "MSAN_OPTIONS: custom settings for MSAN\n" " (must contain exitcode="STRINGIFY(MSAN_ERROR)" and symbolize=0)\n" - "TMPDIR: directory to use for temporary input files\n" - , argv0, EXEC_TIMEOUT, MEM_LIMIT, doc_path); + "TMPDIR: directory to use for temporary input files\n", + argv0, EXEC_TIMEOUT, MEM_LIMIT, doc_path); exit(1); @@ -1104,6 +1105,12 @@ int main(int argc, char **argv_orig, char **envp) { if (optind == argc || !in_file || !output_file) { usage(argv[0]); } check_environment_vars(envp); + + if (getenv("AFL_NO_FORKSRV")) { /* if set, use the fauxserver */ + fsrv->use_fauxsrv = true; + + } + setenv("AFL_NO_AUTODICT", "1", 1); /* initialize cmplog_mode */ diff --git a/unicorn_mode/helper_scripts/template_test_harness.py b/unicorn_mode/helper_scripts/template_test_harness.py deleted file mode 100644 index 93c526cc..00000000 --- a/unicorn_mode/helper_scripts/template_test_harness.py +++ /dev/null @@ -1,104 +0,0 @@ -""" - template_test_harness.py - - Template which loads the context of a process into a Unicorn Engine, - instance, loads a custom (mutated) inputs, and executes the - desired code. Designed to be used in conjunction with one of the - Unicorn Context Dumper scripts. - - Author: - Nathan Voss <njvoss299@gmail.com> -""" - -import argparse - -from unicorn import * -from unicorn.x86_const import * # TODO: Set correct architecture here as necessary - -import unicorn_loader - -# Simple stand-in heap to prevent OS/kernel issues -unicorn_heap = None - -# Start and end address of emulation -START_ADDRESS = # TODO: Set start address here -END_ADDRESS = # TODO: Set end address here - -""" - Implement target-specific hooks in here. - Stub out, skip past, and re-implement necessary functionality as appropriate -""" -def unicorn_hook_instruction(uc, address, size, user_data): - - # TODO: Setup hooks and handle anything you need to here - # - For example, hook malloc/free/etc. and handle it internally - pass - -#------------------------ -#---- Main test function - -def main(): - - parser = argparse.ArgumentParser() - parser.add_argument('context_dir', type=str, help="Directory containing process context") - parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input content") - parser.add_argument('-d', '--debug', default=False, action="store_true", help="Dump trace info") - args = parser.parse_args() - - print("Loading context from {}".format(args.context_dir)) - uc = unicorn_loader.AflUnicornEngine(args.context_dir, enable_trace=args.debug, debug_print=False) - - # Instantiate the hook function to avoid emulation errors - global unicorn_heap - unicorn_heap = unicorn_loader.UnicornSimpleHeap(uc, debug_print=True) - uc.hook_add(UC_HOOK_CODE, unicorn_hook_instruction) - - # Execute 1 instruction just to startup the forkserver - # NOTE: This instruction will be executed again later, so be sure that - # there are no negative consequences to the overall execution state. - # If there are, change the later call to emu_start to no re-execute - # the first instruction. - print("Starting the forkserver by executing 1 instruction") - try: - uc.emu_start(START_ADDRESS, 0, 0, count=1) - except UcError as e: - print("ERROR: Failed to execute a single instruction (error: {})!".format(e)) - return - - # Allocate a buffer and load a mutated input and put it into the right spot - if args.input_file: - print("Loading input content from {}".format(args.input_file)) - input_file = open(args.input_file, 'rb') - input_content = input_file.read() - input_file.close() - - # TODO: Apply constraints to mutated input here - raise exceptions.NotImplementedError('No constraints on the mutated inputs have been set!') - - # Allocate a new buffer and put the input into it - buf_addr = unicorn_heap.malloc(len(input_content)) - uc.mem_write(buf_addr, input_content) - print("Allocated mutated input buffer @ 0x{0:016x}".format(buf_addr)) - - # TODO: Set the input into the state so it will be handled - raise exceptions.NotImplementedError('The mutated input was not loaded into the Unicorn state!') - - # Run the test - print("Executing from 0x{0:016x} to 0x{1:016x}".format(START_ADDRESS, END_ADDRESS)) - try: - result = uc.emu_start(START_ADDRESS, END_ADDRESS, timeout=0, count=0) - except UcError as e: - # If something went wrong during emulation a signal is raised to force this - # script to crash in a way that AFL can detect ('uc.force_crash()' should be - # called for any condition that you want AFL to treat as a crash). - print("Execution failed with error: {}".format(e)) - uc.dump_regs() - uc.force_crash(e) - - print("Final register state:") - uc.dump_regs() - - print("Done.") - -if __name__ == "__main__": - main() diff --git a/unicorn_mode/helper_scripts/unicorn_loader.py b/unicorn_mode/helper_scripts/unicorn_loader.py index 1914a83d..c48a7572 100644 --- a/unicorn_mode/helper_scripts/unicorn_loader.py +++ b/unicorn_mode/helper_scripts/unicorn_loader.py @@ -20,15 +20,16 @@ import time import zlib # Unicorn imports -from unicorn import * -from unicorn.arm_const import * -from unicorn.arm64_const import * -from unicorn.x86_const import * -from unicorn.mips_const import * +from unicornafl import * +from unicornafl.arm_const import * +from unicornafl.arm64_const import * +from unicornafl.x86_const import * +from unicornafl.mips_const import * # If Capstone libraries are availible (only check once) try: from capstone import * + CAPSTONE_EXISTS = 1 except: CAPSTONE_EXISTS = 0 @@ -44,29 +45,38 @@ MAX_ALLOWABLE_SEG_SIZE = 1024 * 1024 * 1024 # Alignment functions to align all memory segments to Unicorn page boundaries (4KB pages only) ALIGN_PAGE_DOWN = lambda x: x & ~(UNICORN_PAGE_SIZE - 1) -ALIGN_PAGE_UP = lambda x: (x + UNICORN_PAGE_SIZE - 1) & ~(UNICORN_PAGE_SIZE-1) +ALIGN_PAGE_UP = lambda x: (x + UNICORN_PAGE_SIZE - 1) & ~(UNICORN_PAGE_SIZE - 1) + +# --------------------------------------- +# ---- Unicorn-based heap implementation -#--------------------------------------- -#---- Unicorn-based heap implementation class UnicornSimpleHeap(object): - """ Use this class to provide a simple heap implementation. This should - be used if malloc/free calls break things during emulation. This heap also - implements basic guard-page capabilities which enable immediate notice of - heap overflow and underflows. + """Use this class to provide a simple heap implementation. This should + be used if malloc/free calls break things during emulation. This heap also + implements basic guard-page capabilities which enable immediate notice of + heap overflow and underflows. """ # Helper data-container used to track chunks class HeapChunk(object): def __init__(self, actual_addr, total_size, data_size): - self.total_size = total_size # Total size of the chunk (including padding and guard page) - self.actual_addr = actual_addr # Actual start address of the chunk - self.data_size = data_size # Size requested by the caller of actual malloc call - self.data_addr = actual_addr + UNICORN_PAGE_SIZE # Address where data actually starts + self.total_size = ( + total_size # Total size of the chunk (including padding and guard page) + ) + self.actual_addr = actual_addr # Actual start address of the chunk + self.data_size = ( + data_size # Size requested by the caller of actual malloc call + ) + self.data_addr = ( + actual_addr + UNICORN_PAGE_SIZE + ) # Address where data actually starts # Returns true if the specified buffer is completely within the chunk, else false def is_buffer_in_chunk(self, addr, size): - if addr >= self.data_addr and ((addr + size) <= (self.data_addr + self.data_size)): + if addr >= self.data_addr and ( + (addr + size) <= (self.data_addr + self.data_size) + ): return True else: return False @@ -75,9 +85,9 @@ class UnicornSimpleHeap(object): HEAP_MIN_ADDR = 0x00002000 HEAP_MAX_ADDR = 0xFFFFFFFF - _uc = None # Unicorn engine instance to interact with - _chunks = [] # List of all known chunks - _debug_print = False # True to print debug information + _uc = None # Unicorn engine instance to interact with + _chunks = [] # List of all known chunks + _debug_print = False # True to print debug information def __init__(self, uc, debug_print=False): self._uc = uc @@ -98,7 +108,11 @@ class UnicornSimpleHeap(object): self._uc.mem_map(addr, total_chunk_size, UC_PROT_READ | UC_PROT_WRITE) chunk = self.HeapChunk(addr, total_chunk_size, size) if self._debug_print: - print("Allocating 0x{0:x}-byte chunk @ 0x{1:016x}".format(chunk.data_size, chunk.data_addr)) + print( + "Allocating 0x{0:x}-byte chunk @ 0x{1:016x}".format( + chunk.data_size, chunk.data_addr + ) + ) break except UcError as e: continue @@ -110,19 +124,26 @@ class UnicornSimpleHeap(object): def calloc(self, size, count): # Simple wrapper around malloc with calloc() args - return self.malloc(size*count) + return self.malloc(size * count) def realloc(self, ptr, new_size): # Wrapper around malloc(new_size) / memcpy(new, old, old_size) / free(old) if self._debug_print: - print("Reallocating chunk @ 0x{0:016x} to be 0x{1:x} bytes".format(ptr, new_size)) + print( + "Reallocating chunk @ 0x{0:016x} to be 0x{1:x} bytes".format( + ptr, new_size + ) + ) old_chunk = None for chunk in self._chunks: if chunk.data_addr == ptr: old_chunk = chunk new_chunk_addr = self.malloc(new_size) if old_chunk != None: - self._uc.mem_write(new_chunk_addr, str(self._uc.mem_read(old_chunk.data_addr, old_chunk.data_size))) + self._uc.mem_write( + new_chunk_addr, + str(self._uc.mem_read(old_chunk.data_addr, old_chunk.data_size)), + ) self.free(old_chunk.data_addr) return new_chunk_addr @@ -130,7 +151,11 @@ class UnicornSimpleHeap(object): for chunk in self._chunks: if chunk.is_buffer_in_chunk(addr, 1): if self._debug_print: - print("Freeing 0x{0:x}-byte chunk @ 0x{0:016x}".format(chunk.req_size, chunk.data_addr)) + print( + "Freeing 0x{0:x}-byte chunk @ 0x{0:016x}".format( + chunk.req_size, chunk.data_addr + ) + ) self._uc.mem_unmap(chunk.actual_addr, chunk.total_size) self._chunks.remove(chunk) return True @@ -139,19 +164,27 @@ class UnicornSimpleHeap(object): # Implements basic guard-page functionality def __check_mem_access(self, uc, access, address, size, value, user_data): for chunk in self._chunks: - if address >= chunk.actual_addr and ((address + size) <= (chunk.actual_addr + chunk.total_size)): + if address >= chunk.actual_addr and ( + (address + size) <= (chunk.actual_addr + chunk.total_size) + ): if chunk.is_buffer_in_chunk(address, size) == False: if self._debug_print: - print("Heap over/underflow attempting to {0} 0x{1:x} bytes @ {2:016x}".format( \ - "write" if access == UC_MEM_WRITE else "read", size, address)) + print( + "Heap over/underflow attempting to {0} 0x{1:x} bytes @ {2:016x}".format( + "write" if access == UC_MEM_WRITE else "read", + size, + address, + ) + ) # Force a memory-based crash uc.force_crash(UcError(UC_ERR_READ_PROT)) -#--------------------------- -#---- Loading function -class AflUnicornEngine(Uc): +# --------------------------- +# ---- Loading function + +class AflUnicornEngine(Uc): def __init__(self, context_directory, enable_trace=False, debug_print=False): """ Initializes an AflUnicornEngine instance, which extends standard the UnicornEngine @@ -166,51 +199,56 @@ class AflUnicornEngine(Uc): # Make sure the index file exists and load it index_file_path = os.path.join(context_directory, INDEX_FILE_NAME) if not os.path.isfile(index_file_path): - raise Exception("Index file not found. Expected it to be at {}".format(index_file_path)) + raise Exception( + "Index file not found. Expected it to be at {}".format(index_file_path) + ) # Load the process context from the index file if debug_print: print("Loading process context index from {}".format(index_file_path)) - index_file = open(index_file_path, 'r') + index_file = open(index_file_path, "r") context = json.load(index_file) index_file.close() # Check the context to make sure we have the basic essential components - if 'arch' not in context: + if "arch" not in context: raise Exception("Couldn't find architecture information in index file") - if 'regs' not in context: + if "regs" not in context: raise Exception("Couldn't find register information in index file") - if 'segments' not in context: + if "segments" not in context: raise Exception("Couldn't find segment/memory information in index file") # Set the UnicornEngine instance's architecture and mode - self._arch_str = context['arch']['arch'] + self._arch_str = context["arch"]["arch"] arch, mode = self.__get_arch_and_mode(self._arch_str) Uc.__init__(self, arch, mode) # Load the registers - regs = context['regs'] + regs = context["regs"] reg_map = self.__get_register_map(self._arch_str) self.__load_registers(regs, reg_map, debug_print) # If we have extra FLOATING POINT regs, load them in! - if 'regs_extended' in context: - if context['regs_extended']: - regs_extended = context['regs_extended'] - reg_map = self.__get_registers_extended(self._arch_str) - self.__load_registers(regs_extended, reg_map, debug_print) + if "regs_extended" in context: + if context["regs_extended"]: + regs_extended = context["regs_extended"] + reg_map = self.__get_registers_extended(self._arch_str) + self.__load_registers(regs_extended, reg_map, debug_print) # For ARM, sometimes the stack pointer is erased ??? (I think I fixed this (issue with ordering of dumper.py, I'll keep the write anyways) if self.__get_arch_and_mode(self.get_arch_str())[0] == UC_ARCH_ARM: - self.reg_write(UC_ARM_REG_SP, regs['sp']) + self.reg_write(UC_ARM_REG_SP, regs["sp"]) # Setup the memory map and load memory content - self.__map_segments(context['segments'], context_directory, debug_print) + self.__map_segments(context["segments"], context_directory, debug_print) if enable_trace: self.hook_add(UC_HOOK_BLOCK, self.__trace_block) self.hook_add(UC_HOOK_CODE, self.__trace_instruction) self.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, self.__trace_mem_access) - self.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, self.__trace_mem_invalid_access) + self.hook_add( + UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, + self.__trace_mem_invalid_access, + ) if debug_print: print("Done loading context.") @@ -225,13 +263,19 @@ class AflUnicornEngine(Uc): return self._arch_str def force_crash(self, uc_error): - """ This function should be called to indicate to AFL that a crash occurred during emulation. - You can pass the exception received from Uc.emu_start + """This function should be called to indicate to AFL that a crash occurred during emulation. + You can pass the exception received from Uc.emu_start """ mem_errors = [ - UC_ERR_READ_UNMAPPED, UC_ERR_READ_PROT, UC_ERR_READ_UNALIGNED, - UC_ERR_WRITE_UNMAPPED, UC_ERR_WRITE_PROT, UC_ERR_WRITE_UNALIGNED, - UC_ERR_FETCH_UNMAPPED, UC_ERR_FETCH_PROT, UC_ERR_FETCH_UNALIGNED, + UC_ERR_READ_UNMAPPED, + UC_ERR_READ_PROT, + UC_ERR_READ_UNALIGNED, + UC_ERR_WRITE_UNMAPPED, + UC_ERR_WRITE_PROT, + UC_ERR_WRITE_UNALIGNED, + UC_ERR_FETCH_UNMAPPED, + UC_ERR_FETCH_PROT, + UC_ERR_FETCH_UNALIGNED, ] if uc_error.errno in mem_errors: # Memory error - throw SIGSEGV @@ -245,13 +289,18 @@ class AflUnicornEngine(Uc): def dump_regs(self): """ Dumps the contents of all the registers to STDOUT """ - for reg in sorted(self.__get_register_map(self._arch_str).items(), key=lambda reg: reg[0]): + for reg in sorted( + self.__get_register_map(self._arch_str).items(), key=lambda reg: reg[0] + ): print(">>> {0:>4}: 0x{1:016x}".format(reg[0], self.reg_read(reg[1]))) def dump_regs_extended(self): """ Dumps the contents of all the registers to STDOUT """ try: - for reg in sorted(self.__get_registers_extended(self._arch_str).items(), key=lambda reg: reg[0]): + for reg in sorted( + self.__get_registers_extended(self._arch_str).items(), + key=lambda reg: reg[0], + ): print(">>> {0:>4}: 0x{1:016x}".format(reg[0], self.reg_read(reg[1]))) except Exception as e: print("ERROR: Are extended registers loaded?") @@ -290,8 +339,8 @@ class AflUnicornEngine(Uc): struct.unpack('<Q', self.mem_read(addr, 8))[0])) """ - #----------------------------- - #---- Loader Helper Functions + # ----------------------------- + # ---- Loader Helper Functions def __load_registers(self, regs, reg_map, debug_print): for register, value in regs.items(): @@ -307,7 +356,11 @@ class AflUnicornEngine(Uc): reg_write_retry = False except Exception as e: if debug_print: - print("ERROR writing register: {}, value: {} -- {}".format(register, value, repr(e))) + print( + "ERROR writing register: {}, value: {} -- {}".format( + register, value, repr(e) + ) + ) if reg_write_retry: if debug_print: @@ -316,7 +369,11 @@ class AflUnicornEngine(Uc): self.reg_write(reg_map[register.lower()], int(value, 16)) except Exception as e: if debug_print: - print("ERROR writing hex string register: {}, value: {} -- {}".format(register, value, repr(e))) + print( + "ERROR writing hex string register: {}, value: {} -- {}".format( + register, value, repr(e) + ) + ) def __map_segment(self, name, address, size, perms, debug_print=False): # - size is unsigned and must be != 0 @@ -330,24 +387,30 @@ class AflUnicornEngine(Uc): if mem_start_aligned != mem_start or mem_end_aligned != mem_end: print("Aligning segment to page boundary:") print(" name: {}".format(name)) - print(" start: {0:016x} -> {1:016x}".format(mem_start, mem_start_aligned)) + print( + " start: {0:016x} -> {1:016x}".format(mem_start, mem_start_aligned) + ) print(" end: {0:016x} -> {1:016x}".format(mem_end, mem_end_aligned)) - print("Mapping segment from {0:016x} - {1:016x} with perm={2}: {3}".format(mem_start_aligned, mem_end_aligned, perms, name)) - if(mem_start_aligned < mem_end_aligned): + print( + "Mapping segment from {0:016x} - {1:016x} with perm={2}: {3}".format( + mem_start_aligned, mem_end_aligned, perms, name + ) + ) + if mem_start_aligned < mem_end_aligned: self.mem_map(mem_start_aligned, mem_end_aligned - mem_start_aligned, perms) - def __map_segments(self, segment_list, context_directory, debug_print=False): for segment in segment_list: # Get the segment information from the index - name = segment['name'] - seg_start = segment['start'] - seg_end = segment['end'] - perms = \ - (UC_PROT_READ if segment['permissions']['r'] == True else 0) | \ - (UC_PROT_WRITE if segment['permissions']['w'] == True else 0) | \ - (UC_PROT_EXEC if segment['permissions']['x'] == True else 0) + name = segment["name"] + seg_start = segment["start"] + seg_end = segment["end"] + perms = ( + (UC_PROT_READ if segment["permissions"]["r"] == True else 0) + | (UC_PROT_WRITE if segment["permissions"]["w"] == True else 0) + | (UC_PROT_EXEC if segment["permissions"]["x"] == True else 0) + ) if debug_print: print("Handling segment {}".format(name)) @@ -376,48 +439,86 @@ class AflUnicornEngine(Uc): # Map memory into the address space if it is of an acceptable size. if (seg_end - seg_start) > MAX_ALLOWABLE_SEG_SIZE: if debug_print: - print("Skipping segment (LARGER THAN {0}) from {1:016x} - {2:016x} with perm={3}: {4}".format(MAX_ALLOWABLE_SEG_SIZE, seg_start, seg_end, perms, name)) + print( + "Skipping segment (LARGER THAN {0}) from {1:016x} - {2:016x} with perm={3}: {4}".format( + MAX_ALLOWABLE_SEG_SIZE, seg_start, seg_end, perms, name + ) + ) continue - elif not found: # Make sure it's not already mapped - if overlap_start: # Partial overlap (start) + elif not found: # Make sure it's not already mapped + if overlap_start: # Partial overlap (start) self.__map_segment(name, tmp, seg_end - tmp, perms, debug_print) - elif overlap_end: # Patrial overlap (end) - self.__map_segment(name, seg_start, tmp - seg_start, perms, debug_print) - else: # Not found - self.__map_segment(name, seg_start, seg_end - seg_start, perms, debug_print) + elif overlap_end: # Patrial overlap (end) + self.__map_segment( + name, seg_start, tmp - seg_start, perms, debug_print + ) + else: # Not found + self.__map_segment( + name, seg_start, seg_end - seg_start, perms, debug_print + ) else: if debug_print: print("Segment {} already mapped. Moving on.".format(name)) # Load the content (if available) - if 'content_file' in segment and len(segment['content_file']) > 0: - content_file_path = os.path.join(context_directory, segment['content_file']) + if "content_file" in segment and len(segment["content_file"]) > 0: + content_file_path = os.path.join( + context_directory, segment["content_file"] + ) if not os.path.isfile(content_file_path): - raise Exception("Unable to find segment content file. Expected it to be at {}".format(content_file_path)) - #if debug_print: + raise Exception( + "Unable to find segment content file. Expected it to be at {}".format( + content_file_path + ) + ) + # if debug_print: # print("Loading content for segment {} from {}".format(name, segment['content_file'])) - content_file = open(content_file_path, 'rb') + content_file = open(content_file_path, "rb") compressed_content = content_file.read() content_file.close() self.mem_write(seg_start, zlib.decompress(compressed_content)) else: if debug_print: - print("No content found for segment {0} @ {1:016x}".format(name, seg_start)) - self.mem_write(seg_start, b'\x00' * (seg_end - seg_start)) + print( + "No content found for segment {0} @ {1:016x}".format( + name, seg_start + ) + ) + self.mem_write(seg_start, b"\x00" * (seg_end - seg_start)) def __get_arch_and_mode(self, arch_str): arch_map = { - "x64" : [ UC_X86_REG_RIP, UC_ARCH_X86, UC_MODE_64 ], - "x86" : [ UC_X86_REG_EIP, UC_ARCH_X86, UC_MODE_32 ], - "arm64be" : [ UC_ARM64_REG_PC, UC_ARCH_ARM64, UC_MODE_ARM | UC_MODE_BIG_ENDIAN ], - "arm64le" : [ UC_ARM64_REG_PC, UC_ARCH_ARM64, UC_MODE_ARM | UC_MODE_LITTLE_ENDIAN ], - "armbe" : [ UC_ARM_REG_PC, UC_ARCH_ARM, UC_MODE_ARM | UC_MODE_BIG_ENDIAN ], - "armle" : [ UC_ARM_REG_PC, UC_ARCH_ARM, UC_MODE_ARM | UC_MODE_LITTLE_ENDIAN ], - "armbethumb": [ UC_ARM_REG_PC, UC_ARCH_ARM, UC_MODE_THUMB | UC_MODE_BIG_ENDIAN ], - "armlethumb": [ UC_ARM_REG_PC, UC_ARCH_ARM, UC_MODE_THUMB | UC_MODE_LITTLE_ENDIAN ], - "mips" : [ UC_MIPS_REG_PC, UC_ARCH_MIPS, UC_MODE_MIPS32 | UC_MODE_BIG_ENDIAN ], - "mipsel" : [ UC_MIPS_REG_PC, UC_ARCH_MIPS, UC_MODE_MIPS32 | UC_MODE_LITTLE_ENDIAN ], + "x64": [UC_X86_REG_RIP, UC_ARCH_X86, UC_MODE_64], + "x86": [UC_X86_REG_EIP, UC_ARCH_X86, UC_MODE_32], + "arm64be": [ + UC_ARM64_REG_PC, + UC_ARCH_ARM64, + UC_MODE_ARM | UC_MODE_BIG_ENDIAN, + ], + "arm64le": [ + UC_ARM64_REG_PC, + UC_ARCH_ARM64, + UC_MODE_ARM | UC_MODE_LITTLE_ENDIAN, + ], + "armbe": [UC_ARM_REG_PC, UC_ARCH_ARM, UC_MODE_ARM | UC_MODE_BIG_ENDIAN], + "armle": [UC_ARM_REG_PC, UC_ARCH_ARM, UC_MODE_ARM | UC_MODE_LITTLE_ENDIAN], + "armbethumb": [ + UC_ARM_REG_PC, + UC_ARCH_ARM, + UC_MODE_THUMB | UC_MODE_BIG_ENDIAN, + ], + "armlethumb": [ + UC_ARM_REG_PC, + UC_ARCH_ARM, + UC_MODE_THUMB | UC_MODE_LITTLE_ENDIAN, + ], + "mips": [UC_MIPS_REG_PC, UC_ARCH_MIPS, UC_MODE_MIPS32 | UC_MODE_BIG_ENDIAN], + "mipsel": [ + UC_MIPS_REG_PC, + UC_ARCH_MIPS, + UC_MODE_MIPS32 | UC_MODE_LITTLE_ENDIAN, + ], } return (arch_map[arch_str][1], arch_map[arch_str][2]) @@ -430,140 +531,140 @@ class AflUnicornEngine(Uc): arch = "mips" registers = { - "x64" : { - "rax": UC_X86_REG_RAX, - "rbx": UC_X86_REG_RBX, - "rcx": UC_X86_REG_RCX, - "rdx": UC_X86_REG_RDX, - "rsi": UC_X86_REG_RSI, - "rdi": UC_X86_REG_RDI, - "rbp": UC_X86_REG_RBP, - "rsp": UC_X86_REG_RSP, - "r8": UC_X86_REG_R8, - "r9": UC_X86_REG_R9, - "r10": UC_X86_REG_R10, - "r11": UC_X86_REG_R11, - "r12": UC_X86_REG_R12, - "r13": UC_X86_REG_R13, - "r14": UC_X86_REG_R14, - "r15": UC_X86_REG_R15, - "rip": UC_X86_REG_RIP, - "efl": UC_X86_REG_EFLAGS, - "cs": UC_X86_REG_CS, - "ds": UC_X86_REG_DS, - "es": UC_X86_REG_ES, - "fs": UC_X86_REG_FS, - "gs": UC_X86_REG_GS, - "ss": UC_X86_REG_SS, + "x64": { + "rax": UC_X86_REG_RAX, + "rbx": UC_X86_REG_RBX, + "rcx": UC_X86_REG_RCX, + "rdx": UC_X86_REG_RDX, + "rsi": UC_X86_REG_RSI, + "rdi": UC_X86_REG_RDI, + "rbp": UC_X86_REG_RBP, + "rsp": UC_X86_REG_RSP, + "r8": UC_X86_REG_R8, + "r9": UC_X86_REG_R9, + "r10": UC_X86_REG_R10, + "r11": UC_X86_REG_R11, + "r12": UC_X86_REG_R12, + "r13": UC_X86_REG_R13, + "r14": UC_X86_REG_R14, + "r15": UC_X86_REG_R15, + "rip": UC_X86_REG_RIP, + "efl": UC_X86_REG_EFLAGS, + "cs": UC_X86_REG_CS, + "ds": UC_X86_REG_DS, + "es": UC_X86_REG_ES, + "fs": UC_X86_REG_FS, + "gs": UC_X86_REG_GS, + "ss": UC_X86_REG_SS, }, - "x86" : { - "eax": UC_X86_REG_EAX, - "ebx": UC_X86_REG_EBX, - "ecx": UC_X86_REG_ECX, - "edx": UC_X86_REG_EDX, - "esi": UC_X86_REG_ESI, - "edi": UC_X86_REG_EDI, - "ebp": UC_X86_REG_EBP, - "eip": UC_X86_REG_EIP, - "esp": UC_X86_REG_ESP, - "efl": UC_X86_REG_EFLAGS, + "x86": { + "eax": UC_X86_REG_EAX, + "ebx": UC_X86_REG_EBX, + "ecx": UC_X86_REG_ECX, + "edx": UC_X86_REG_EDX, + "esi": UC_X86_REG_ESI, + "edi": UC_X86_REG_EDI, + "ebp": UC_X86_REG_EBP, + "eip": UC_X86_REG_EIP, + "esp": UC_X86_REG_ESP, + "efl": UC_X86_REG_EFLAGS, # Segment registers removed... # They caused segfaults (from unicorn?) when they were here }, - "arm" : { - "r0": UC_ARM_REG_R0, - "r1": UC_ARM_REG_R1, - "r2": UC_ARM_REG_R2, - "r3": UC_ARM_REG_R3, - "r4": UC_ARM_REG_R4, - "r5": UC_ARM_REG_R5, - "r6": UC_ARM_REG_R6, - "r7": UC_ARM_REG_R7, - "r8": UC_ARM_REG_R8, - "r9": UC_ARM_REG_R9, - "r10": UC_ARM_REG_R10, - "r11": UC_ARM_REG_R11, - "r12": UC_ARM_REG_R12, - "pc": UC_ARM_REG_PC, - "sp": UC_ARM_REG_SP, - "lr": UC_ARM_REG_LR, - "cpsr": UC_ARM_REG_CPSR + "arm": { + "r0": UC_ARM_REG_R0, + "r1": UC_ARM_REG_R1, + "r2": UC_ARM_REG_R2, + "r3": UC_ARM_REG_R3, + "r4": UC_ARM_REG_R4, + "r5": UC_ARM_REG_R5, + "r6": UC_ARM_REG_R6, + "r7": UC_ARM_REG_R7, + "r8": UC_ARM_REG_R8, + "r9": UC_ARM_REG_R9, + "r10": UC_ARM_REG_R10, + "r11": UC_ARM_REG_R11, + "r12": UC_ARM_REG_R12, + "pc": UC_ARM_REG_PC, + "sp": UC_ARM_REG_SP, + "lr": UC_ARM_REG_LR, + "cpsr": UC_ARM_REG_CPSR, }, - "arm64" : { - "x0": UC_ARM64_REG_X0, - "x1": UC_ARM64_REG_X1, - "x2": UC_ARM64_REG_X2, - "x3": UC_ARM64_REG_X3, - "x4": UC_ARM64_REG_X4, - "x5": UC_ARM64_REG_X5, - "x6": UC_ARM64_REG_X6, - "x7": UC_ARM64_REG_X7, - "x8": UC_ARM64_REG_X8, - "x9": UC_ARM64_REG_X9, - "x10": UC_ARM64_REG_X10, - "x11": UC_ARM64_REG_X11, - "x12": UC_ARM64_REG_X12, - "x13": UC_ARM64_REG_X13, - "x14": UC_ARM64_REG_X14, - "x15": UC_ARM64_REG_X15, - "x16": UC_ARM64_REG_X16, - "x17": UC_ARM64_REG_X17, - "x18": UC_ARM64_REG_X18, - "x19": UC_ARM64_REG_X19, - "x20": UC_ARM64_REG_X20, - "x21": UC_ARM64_REG_X21, - "x22": UC_ARM64_REG_X22, - "x23": UC_ARM64_REG_X23, - "x24": UC_ARM64_REG_X24, - "x25": UC_ARM64_REG_X25, - "x26": UC_ARM64_REG_X26, - "x27": UC_ARM64_REG_X27, - "x28": UC_ARM64_REG_X28, - "pc": UC_ARM64_REG_PC, - "sp": UC_ARM64_REG_SP, - "fp": UC_ARM64_REG_FP, - "lr": UC_ARM64_REG_LR, - "nzcv": UC_ARM64_REG_NZCV, + "arm64": { + "x0": UC_ARM64_REG_X0, + "x1": UC_ARM64_REG_X1, + "x2": UC_ARM64_REG_X2, + "x3": UC_ARM64_REG_X3, + "x4": UC_ARM64_REG_X4, + "x5": UC_ARM64_REG_X5, + "x6": UC_ARM64_REG_X6, + "x7": UC_ARM64_REG_X7, + "x8": UC_ARM64_REG_X8, + "x9": UC_ARM64_REG_X9, + "x10": UC_ARM64_REG_X10, + "x11": UC_ARM64_REG_X11, + "x12": UC_ARM64_REG_X12, + "x13": UC_ARM64_REG_X13, + "x14": UC_ARM64_REG_X14, + "x15": UC_ARM64_REG_X15, + "x16": UC_ARM64_REG_X16, + "x17": UC_ARM64_REG_X17, + "x18": UC_ARM64_REG_X18, + "x19": UC_ARM64_REG_X19, + "x20": UC_ARM64_REG_X20, + "x21": UC_ARM64_REG_X21, + "x22": UC_ARM64_REG_X22, + "x23": UC_ARM64_REG_X23, + "x24": UC_ARM64_REG_X24, + "x25": UC_ARM64_REG_X25, + "x26": UC_ARM64_REG_X26, + "x27": UC_ARM64_REG_X27, + "x28": UC_ARM64_REG_X28, + "pc": UC_ARM64_REG_PC, + "sp": UC_ARM64_REG_SP, + "fp": UC_ARM64_REG_FP, + "lr": UC_ARM64_REG_LR, + "nzcv": UC_ARM64_REG_NZCV, "cpsr": UC_ARM_REG_CPSR, }, - "mips" : { - "0" : UC_MIPS_REG_ZERO, - "at": UC_MIPS_REG_AT, - "v0": UC_MIPS_REG_V0, - "v1": UC_MIPS_REG_V1, - "a0": UC_MIPS_REG_A0, - "a1": UC_MIPS_REG_A1, - "a2": UC_MIPS_REG_A2, - "a3": UC_MIPS_REG_A3, - "t0": UC_MIPS_REG_T0, - "t1": UC_MIPS_REG_T1, - "t2": UC_MIPS_REG_T2, - "t3": UC_MIPS_REG_T3, - "t4": UC_MIPS_REG_T4, - "t5": UC_MIPS_REG_T5, - "t6": UC_MIPS_REG_T6, - "t7": UC_MIPS_REG_T7, - "t8": UC_MIPS_REG_T8, - "t9": UC_MIPS_REG_T9, - "s0": UC_MIPS_REG_S0, - "s1": UC_MIPS_REG_S1, - "s2": UC_MIPS_REG_S2, - "s3": UC_MIPS_REG_S3, - "s4": UC_MIPS_REG_S4, - "s5": UC_MIPS_REG_S5, - "s6": UC_MIPS_REG_S6, - "s7": UC_MIPS_REG_S7, - "s8": UC_MIPS_REG_S8, - "k0": UC_MIPS_REG_K0, - "k1": UC_MIPS_REG_K1, - "gp": UC_MIPS_REG_GP, - "pc": UC_MIPS_REG_PC, - "sp": UC_MIPS_REG_SP, - "fp": UC_MIPS_REG_FP, - "ra": UC_MIPS_REG_RA, - "hi": UC_MIPS_REG_HI, - "lo": UC_MIPS_REG_LO - } + "mips": { + "0": UC_MIPS_REG_ZERO, + "at": UC_MIPS_REG_AT, + "v0": UC_MIPS_REG_V0, + "v1": UC_MIPS_REG_V1, + "a0": UC_MIPS_REG_A0, + "a1": UC_MIPS_REG_A1, + "a2": UC_MIPS_REG_A2, + "a3": UC_MIPS_REG_A3, + "t0": UC_MIPS_REG_T0, + "t1": UC_MIPS_REG_T1, + "t2": UC_MIPS_REG_T2, + "t3": UC_MIPS_REG_T3, + "t4": UC_MIPS_REG_T4, + "t5": UC_MIPS_REG_T5, + "t6": UC_MIPS_REG_T6, + "t7": UC_MIPS_REG_T7, + "t8": UC_MIPS_REG_T8, + "t9": UC_MIPS_REG_T9, + "s0": UC_MIPS_REG_S0, + "s1": UC_MIPS_REG_S1, + "s2": UC_MIPS_REG_S2, + "s3": UC_MIPS_REG_S3, + "s4": UC_MIPS_REG_S4, + "s5": UC_MIPS_REG_S5, + "s6": UC_MIPS_REG_S6, + "s7": UC_MIPS_REG_S7, + "s8": UC_MIPS_REG_S8, + "k0": UC_MIPS_REG_K0, + "k1": UC_MIPS_REG_K1, + "gp": UC_MIPS_REG_GP, + "pc": UC_MIPS_REG_PC, + "sp": UC_MIPS_REG_SP, + "fp": UC_MIPS_REG_FP, + "ra": UC_MIPS_REG_RA, + "hi": UC_MIPS_REG_HI, + "lo": UC_MIPS_REG_LO, + }, } return registers[arch] @@ -577,51 +678,50 @@ class AflUnicornEngine(Uc): arch = "mips" registers = { - "arm": { - "d0": UC_ARM_REG_D0, - "d1": UC_ARM_REG_D1, - "d2": UC_ARM_REG_D2, - "d3": UC_ARM_REG_D3, - "d4": UC_ARM_REG_D4, - "d5": UC_ARM_REG_D5, - "d6": UC_ARM_REG_D6, - "d7": UC_ARM_REG_D7, - "d8": UC_ARM_REG_D8, - "d9": UC_ARM_REG_D9, - "d10": UC_ARM_REG_D10, - "d11": UC_ARM_REG_D11, - "d12": UC_ARM_REG_D12, - "d13": UC_ARM_REG_D13, - "d14": UC_ARM_REG_D14, - "d15": UC_ARM_REG_D15, - "d16": UC_ARM_REG_D16, - "d17": UC_ARM_REG_D17, - "d18": UC_ARM_REG_D18, - "d19": UC_ARM_REG_D19, - "d20": UC_ARM_REG_D20, - "d21": UC_ARM_REG_D21, - "d22": UC_ARM_REG_D22, - "d23": UC_ARM_REG_D23, - "d24": UC_ARM_REG_D24, - "d25": UC_ARM_REG_D25, - "d26": UC_ARM_REG_D26, - "d27": UC_ARM_REG_D27, - "d28": UC_ARM_REG_D28, - "d29": UC_ARM_REG_D29, - "d30": UC_ARM_REG_D30, - "d31": UC_ARM_REG_D31, - "fpscr": UC_ARM_REG_FPSCR + "arm": { + "d0": UC_ARM_REG_D0, + "d1": UC_ARM_REG_D1, + "d2": UC_ARM_REG_D2, + "d3": UC_ARM_REG_D3, + "d4": UC_ARM_REG_D4, + "d5": UC_ARM_REG_D5, + "d6": UC_ARM_REG_D6, + "d7": UC_ARM_REG_D7, + "d8": UC_ARM_REG_D8, + "d9": UC_ARM_REG_D9, + "d10": UC_ARM_REG_D10, + "d11": UC_ARM_REG_D11, + "d12": UC_ARM_REG_D12, + "d13": UC_ARM_REG_D13, + "d14": UC_ARM_REG_D14, + "d15": UC_ARM_REG_D15, + "d16": UC_ARM_REG_D16, + "d17": UC_ARM_REG_D17, + "d18": UC_ARM_REG_D18, + "d19": UC_ARM_REG_D19, + "d20": UC_ARM_REG_D20, + "d21": UC_ARM_REG_D21, + "d22": UC_ARM_REG_D22, + "d23": UC_ARM_REG_D23, + "d24": UC_ARM_REG_D24, + "d25": UC_ARM_REG_D25, + "d26": UC_ARM_REG_D26, + "d27": UC_ARM_REG_D27, + "d28": UC_ARM_REG_D28, + "d29": UC_ARM_REG_D29, + "d30": UC_ARM_REG_D30, + "d31": UC_ARM_REG_D31, + "fpscr": UC_ARM_REG_FPSCR, } } - return registers[arch]; - #--------------------------- - # Callbacks for tracing + return registers[arch] + # --------------------------- + # Callbacks for tracing # TODO: Extra mode for Capstone (i.e. Cs(cs_arch, cs_mode + cs_extra) not implemented - def __trace_instruction(self, uc, address, size, user_data): if CAPSTONE_EXISTS == 1: # If Capstone is installed then we'll dump disassembly, otherwise just dump the binary. @@ -651,11 +751,23 @@ class AflUnicornEngine(Uc): cs = Cs(cs_arch, cs_mode) mem = uc.mem_read(address, size) if bit_size == 4: - for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size): - print(" Instr: {:#08x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr)) + for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite( + bytes(mem), size + ): + print( + " Instr: {:#08x}:\t{}\t{}".format( + address, cs_mnemonic, cs_opstr + ) + ) else: - for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size): - print(" Instr: {:#16x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr)) + for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite( + bytes(mem), size + ): + print( + " Instr: {:#16x}:\t{}\t{}".format( + address, cs_mnemonic, cs_opstr + ) + ) else: print(" Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size)) @@ -664,15 +776,27 @@ class AflUnicornEngine(Uc): def __trace_mem_access(self, uc, access, address, size, value, user_data): if access == UC_MEM_WRITE: - print(" >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value)) + print( + " >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format( + address, size, value + ) + ) else: print(" >>> Read: addr=0x{0:016x} size={1}".format(address, size)) def __trace_mem_invalid_access(self, uc, access, address, size, value, user_data): if access == UC_MEM_WRITE_UNMAPPED: - print(" >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value)) + print( + " >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format( + address, size, value + ) + ) else: - print(" >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size)) + print( + " >>> INVALID Read: addr=0x{0:016x} size={1}".format( + address, size + ) + ) def bit_size_arch(self): arch = self.get_arch() diff --git a/unicorn_mode/unicornafl b/unicorn_mode/unicornafl -Subproject 0d82727f2b477de82fa355edef9bc158bd25d37 +Subproject 019b871539fe9ed3f41d882385a8b02c243d49a diff --git a/utils/aflpp_driver/GNUmakefile b/utils/aflpp_driver/GNUmakefile index ad99b893..c282a9f3 100644 --- a/utils/aflpp_driver/GNUmakefile +++ b/utils/aflpp_driver/GNUmakefile @@ -15,28 +15,28 @@ aflpp_driver.o: aflpp_driver.c -$(LLVM_BINDIR)clang -I. -I../../include $(CFLAGS) -c aflpp_driver.c libAFLDriver.a: aflpp_driver.o - ar ru libAFLDriver.a aflpp_driver.o - cp -vf libAFLDriver.a ../../ + @ar rc libAFLDriver.a aflpp_driver.o + @cp -vf libAFLDriver.a ../../ debug: $(LLVM_BINDIR)clang -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.o ../../src/afl-performance.c $(LLVM_BINDIR)clang -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c #$(LLVM_BINDIR)clang -S -emit-llvm -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.ll ../../src/afl-performance.c #$(LLVM_BINDIR)clang -S -emit-llvm -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c - ar ru libAFLDriver.a afl-performance.o aflpp_driver.o + ar rc libAFLDriver.a afl-performance.o aflpp_driver.o aflpp_qemu_driver.o: aflpp_qemu_driver.c -$(LLVM_BINDIR)clang $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c libAFLQemuDriver.a: aflpp_qemu_driver.o - -ar ru libAFLQemuDriver.a aflpp_qemu_driver.o - -cp -vf libAFLQemuDriver.a ../../ + @-ar rc libAFLQemuDriver.a aflpp_qemu_driver.o + @-cp -vf libAFLQemuDriver.a ../../ aflpp_qemu_driver_hook.so: aflpp_qemu_driver_hook.o - -test -e aflpp_qemu_driver_hook.o && $(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so || echo "Note: Optional aflpp_qemu_driver_hook.so not built." + @-test -e aflpp_qemu_driver_hook.o && $(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so || echo "Note: Optional aflpp_qemu_driver_hook.so not built." aflpp_qemu_driver_hook.o: aflpp_qemu_driver_hook.c - -test -e ../../qemu_mode/qemuafl/qemuafl/api.h && $(LLVM_BINDIR)clang $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c || echo "Note: Optional aflpp_qemu_driver_hook.o not built." + @-test -e ../../qemu_mode/qemuafl/qemuafl/api.h && $(LLVM_BINDIR)clang $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c || echo "Note: Optional aflpp_qemu_driver_hook.o not built." test: debug #clang -S -emit-llvm -D_DEBUG=\"1\" -I../../include -Wl,--allow-multiple-definition -funroll-loops -o aflpp_driver_test.ll aflpp_driver_test.c diff --git a/utils/aflpp_driver/README.md b/utils/aflpp_driver/README.md index f03c2fe3..4ca59776 100644 --- a/utils/aflpp_driver/README.md +++ b/utils/aflpp_driver/README.md @@ -22,6 +22,8 @@ or `@@` as command line parameters. ## aflpp_qemu_driver +Note that you can use the driver too for frida_mode (`-O`). + aflpp_qemu_driver is used for libfuzzer `LLVMFuzzerTestOneInput()` targets that are to be fuzzed in qemu_mode. So we compile them with clang/clang++, without -fsantize=fuzzer or afl-clang-fast, and link in libAFLQemuDriver.a: @@ -34,3 +36,8 @@ Then just do (where the name of the binary is `fuzz`): AFL_QEMU_PERSISTENT_ADDR=0x$(nm fuzz | grep "T LLVMFuzzerTestOneInput" | awk '{print $1}') AFL_QEMU_PERSISTENT_HOOK=/path/to/aflpp_qemu_driver_hook.so afl-fuzz -Q ... -- ./fuzz` ``` + +if you use afl-cmin or `afl-showmap -C` with the aflpp_qemu_driver you need to +set the set same AFL_QEMU_... (or AFL_FRIDA_...) environment variables. +If you want to use afl-showmap (without -C) or afl-cmin.bash then you may not +set these environment variables and rather set `AFL_QEMU_DRIVER_NO_HOOK=1`. diff --git a/utils/aflpp_driver/aflpp_qemu_driver.c b/utils/aflpp_driver/aflpp_qemu_driver.c index 79de5af6..99a4c9a8 100644 --- a/utils/aflpp_driver/aflpp_qemu_driver.c +++ b/utils/aflpp_driver/aflpp_qemu_driver.c @@ -1,3 +1,4 @@ +#include <stdio.h> #include <stdint.h> #include <stdlib.h> #include <unistd.h> @@ -27,6 +28,9 @@ int main(int argc, char **argv) { } else { + fprintf(stderr, + "Using shared-memory testcases. To read via stdin, set " + "AFL_QEMU_DRIVER_NO_HOOK=1.\n"); uint8_t dummy_input[1024000] = {0}; LLVMFuzzerTestOneInput(dummy_input, 1); diff --git a/utils/qbdi_mode/README.md b/utils/qbdi_mode/README.md index 641a6e85..cf5d3359 100755 --- a/utils/qbdi_mode/README.md +++ b/utils/qbdi_mode/README.md @@ -1,5 +1,9 @@ # qbdi-based binary-only instrumentation for afl-fuzz +NOTE: this code is outdated and first would need to be adapted to the current +afl++ versions first. +Try afl_frida or fpicker [https://github.com/ttdennis/fpicker/](https://github.com/ttdennis/fpicker/) first, maybe they suite your need. + ## 1) Introduction The code in ./qbdi_mode allows you to build a standalone feature that |