diff options
70 files changed, 2638 insertions, 859 deletions
diff --git a/Dockerfile b/Dockerfile index 1947f211..7bb60610 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,6 +9,9 @@ RUN apt-get update && apt-get install -y \ clang \ clang-9 \ flex \ + git \ + python3.7 \ + python3.7-dev \ gcc-9 \ gcc-9-plugin-dev \ gcc-9-multilib \ @@ -23,10 +26,12 @@ RUN apt-get update && apt-get install -y \ ca-certificates \ libpixman-1-dev \ && rm -rf /var/lib/apt/lists/* + ARG CC=gcc-9 ARG CXX=g++-9 ARG LLVM_CONFIG=llvm-config-9 -COPY . /app -RUN cd /app && make clean && make distrib && \ - make install && cd .. && rm -rf /app -WORKDIR /work + +RUN git clone https://github.com/vanhauser-thc/AFLplusplus + +RUN cd AFLplusplus && make clean && make distrib && \ + make install && cd .. && rm -rf AFLplusplus diff --git a/Makefile b/Makefile index 703ed673..459cae5f 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ VERSION = $(shell grep '^\#define VERSION ' ../config.h | cut -d '"' -f2) # PROGS intentionally omit afl-as, which gets installed elsewhere. PROGS = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze -SH_PROGS = afl-plot afl-cmin afl-whatsup afl-system-config +SH_PROGS = afl-plot afl-cmin afl-cmin.bash afl-whatsup afl-system-config MANPAGES=$(foreach p, $(PROGS) $(SH_PROGS), $(p).8) ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1" @@ -48,6 +48,14 @@ ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .te CFLAGS_OPT = -march=native endif +ifneq "$(shell uname -m)" "x86_64" + ifneq "$(shell uname -m)" "i386" + ifneq "$(shell uname -m)" "amd64" + AFL_NO_X86=1 + endif + endif +endif + CFLAGS ?= -O3 -funroll-loops $(CFLAGS_OPT) CFLAGS += -Wall -g -Wno-pointer-sign -I include/ \ -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \ @@ -55,17 +63,17 @@ CFLAGS += -Wall -g -Wno-pointer-sign -I include/ \ AFL_FUZZ_FILES = $(wildcard src/afl-fuzz*.c) -ifneq "($filter %3.7m, $(shell python3.7m-config --includes 2>/dev/null)" "" +ifneq "$(filter %3.7m, $(shell python3.7m-config --includes 2>/dev/null))" "" PYTHON_INCLUDE ?= $(shell python3.7m-config --includes) PYTHON_LIB ?= $(shell python3.7m-config --ldflags) PYTHON_VERSION = 3.7m else - ifneq "($filter %3.7, $(shell python3.7-config --includes) 2> /dev/null" "" + ifneq "$(filter %3.7, $(shell python3.7-config --includes 2>/dev/null))" "" PYTHON_INCLUDE ?= $(shell python3.7-config --includes) PYTHON_LIB ?= $(shell python3.7-config --ldflags) PYTHON_VERSION = 3.7 else - ifneq "($filter %2.7, $(shell python2.7-config --includes) 2> /dev/null" "" + ifneq "$(filter %2.7, $(shell python2.7-config --includes 2>/dev/null))" "" PYTHON_INCLUDE ?= $(shell python2.7-config --includes) PYTHON_LIB ?= $(shell python2.7-config --ldflags) PYTHON_VERSION = 2.7 @@ -77,14 +85,14 @@ PYTHON_INCLUDE ?= $(shell test -e /usr/include/python3.7m && echo /usr/include/p PYTHON_INCLUDE ?= $(shell test -e /usr/include/python3.7 && echo /usr/include/python3.7) PYTHON_INCLUDE ?= $(shell test -e /usr/include/python2.7 && echo /usr/include/python2.7) -ifneq "($filter %3.7m, $(PYTHON_INCLUDE))" "" +ifneq "$(filter %3.7m, $(PYTHON_INCLUDE))" "" PYTHON_VERSION ?= 3.7m PYTHON_LIB ?= -lpython3.7m else - ifneq "($filter %3.7, $(PYTHON_INCLUDE))" "" + ifneq "$(filter %3.7, $(PYTHON_INCLUDE))" "" PYTHON_VERSION ?= 3.7 else - ifneq "($filter %2.7, $(PYTHON_INCLUDE))" "" + ifneq "$(filter %2.7, $(PYTHON_INCLUDE))" "" PYTHON_VERSION ?= 2.7 PYTHON_LIB ?= -lpython2.7 else diff --git a/README.md b/README.md index 2edca8af..dc43d5d2 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ get any feature improvements since November 2017. Among other changes afl++ has a more performant llvm_mode, supports - llvm up to version 10, QEMU 3.1, more speed and crashfixes for QEMU, + llvm up to version 11, QEMU 3.1, more speed and crashfixes for QEMU, better *BSD and Android support and much, much more. Additionally the following features and patches have been integrated: @@ -204,7 +204,7 @@ superior to blind fuzzing or coverage-only tools. PLEASE NOTE: llvm_mode compilation with afl-clang-fast/afl-clang-fast++ instead of afl-gcc/afl-g++ is much faster and has a few cool features. See llvm_mode/ - however few code does not compile with llvm. -We support llvm versions 3.8.0 to 10. +We support llvm versions 3.8.0 to 11. When source code is available, instrumentation can be injected by a companion tool that works as a drop-in replacement for gcc or clang in any standard build @@ -227,7 +227,7 @@ For C++ programs, you'd would also want to set `CXX=/path/to/afl/afl-g++`. The clang wrappers (afl-clang and afl-clang++) can be used in the same way; clang users may also opt to leverage a higher-performance instrumentation mode, as described in [llvm_mode/README.md](llvm_mode/README.md). -Clang/LLVM has a much better performance and works with LLVM version 3.8.0 to 10. +Clang/LLVM has a much better performance and works with LLVM version 3.8.0 to 11. Using the LAF Intel performance enhancements are also recommended, see [llvm_mode/README.laf-intel.md](llvm_mode/README.laf-intel.md) @@ -272,7 +272,7 @@ $ ./build_qemu_support.sh For additional instructions and caveats, see [qemu_mode/README.md](qemu_mode/README.md). The mode is approximately 2-5x slower than compile-time instrumentation, is -less conductive to parallelization, and may have some other quirks. +less conducive to parallelization, and may have some other quirks. If [afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) works for your binary, then you can use afl-fuzz normally and it will have twice diff --git a/afl-cmin b/afl-cmin index 1dd782d8..9179628e 100755 --- a/afl-cmin +++ b/afl-cmin @@ -1,470 +1,464 @@ -#!/usr/bin/env bash -# -# american fuzzy lop++ - corpus minimization tool -# --------------------------------------------- -# -# Originally written by Michal Zalewski -# -# Copyright 2014, 2015 Google Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# This tool tries to find the smallest subset of files in the input directory -# that still trigger the full range of instrumentation data points seen in -# the starting corpus. This has two uses: -# -# - Screening large corpora of input files before using them as a seed for -# afl-fuzz. The tool will remove functionally redundant files and likely -# leave you with a much smaller set. -# -# (In this case, you probably also want to consider running afl-tmin on -# the individual files later on to reduce their size.) -# -# - Minimizing the corpus generated organically by afl-fuzz, perhaps when -# planning to feed it to more resource-intensive tools. The tool achieves -# this by removing all entries that used to trigger unique behaviors in the -# past, but have been made obsolete by later finds. +#!/usr/bin/env sh +THISPATH=`dirname ${0}` +export PATH=${THISPATH}:$PATH +awk -f - -- ${@+"$@"} <<'EOF' +#!/usr/bin/awk -f + +# awk script to minimize a test corpus of input files # -# Note that the tool doesn't modify the files themselves. For that, you want -# afl-tmin. +# based on afl-cmin bash script written by Michal Zalewski +# rewritten by Heiko Eißfeldt (hexcoder-) +# tested with: +# gnu awk (x86 Linux) +# bsd awk (x86 *BSD) +# mawk (arm32 raspbian) # -# This script must use bash because other shells may have hardcoded limits on -# array sizes. +# uses getopt.awk package from Arnold Robbins # - -echo "corpus minimization tool for afl-fuzz by Michal Zalewski" -echo - -######### -# SETUP # -######### - -# Process command-line options... - -MEM_LIMIT=200 -TIMEOUT=none - -unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \ - AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE UNICORN_MODE - -while getopts "+i:o:f:m:t:eQUCh" opt; do - - case "$opt" in - - "h") - ;; - - "i") - IN_DIR="$OPTARG" - ;; - - "o") - OUT_DIR="$OPTARG" - ;; - "f") - STDIN_FILE="$OPTARG" - ;; - "m") - MEM_LIMIT="$OPTARG" - MEM_LIMIT_GIVEN=1 - ;; - "t") - TIMEOUT="$OPTARG" - ;; - "e") - EXTRA_PAR="$EXTRA_PAR -e" - ;; - "C") - export AFL_CMIN_CRASHES_ONLY=1 - ;; - "Q") - EXTRA_PAR="$EXTRA_PAR -Q" - test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250 - QEMU_MODE=1 - ;; - "U") - EXTRA_PAR="$EXTRA_PAR -U" - test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250 - UNICORN_MODE=1 - ;; - "?") - exit 1 - ;; - - esac - -done - -shift $((OPTIND-1)) - -TARGET_BIN="$1" - -if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then - - cat 1>&2 <<_EOF_ -Usage: $0 [ options ] -- /path/to/target_app [ ... ] - -Required parameters: - - -i dir - input directory with the starting corpus - -o dir - output directory for minimized files - -Execution control settings: - - -f file - location read by the fuzzed program (stdin) - -m megs - memory limit for child process ($MEM_LIMIT MB) - -t msec - run time limit for child process (none) - -Q - use binary-only instrumentation (QEMU mode) - -U - use unicorn-based instrumentation (Unicorn mode) - -Minimization settings: - - -C - keep crashing inputs, reject everything else - -e - solve for edge coverage only, ignore hit counts - -For additional tips, please consult docs/README. - -_EOF_ - exit 1 -fi - -# Do a sanity check to discourage the use of /tmp, since we can't really -# handle this safely from a shell script. - -if [ "$AFL_ALLOW_TMP" = "" ]; then - - echo "$IN_DIR" | grep -qE '^(/var)?/tmp/' - T1="$?" - - echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/' - T2="$?" - - echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/' - T3="$?" - - echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/' - T4="$?" - - echo "$PWD" | grep -qE '^(/var)?/tmp/' - T5="$?" - - if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then - echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2 +# external tools used by this script: +# test +# grep +# rm +# mkdir +# ln +# cp +# pwd +# which +# cd +# find +# stat +# sort +# cut +# and afl-showmap from this project :-) + +# getopt.awk --- Do C library getopt(3) function in awk + +# External variables: +# Optind -- index in ARGV of first nonoption argument +# Optarg -- string value of argument to current option +# Opterr -- if nonzero, print our own diagnostic +# Optopt -- current option letter + +# Returns: +# -1 at end of options +# "?" for unrecognized option +# <c> a character representing the current option + +# Private Data: +# _opti -- index in multiflag option, e.g., -abc + +function getopt(argc, argv, options, thisopt, i) +{ + if (length(options) == 0) # no options given + return -1 + + if (argv[Optind] == "--") { # all done + Optind++ + _opti = 0 + return -1 + } else if (argv[Optind] !~ /^-[^:\t ]/) { + _opti = 0 + return -1 + } + if (_opti == 0) + _opti = 2 + thisopt = substr(argv[Optind], _opti, 1) + Optopt = thisopt + i = index(options, thisopt) + if (i == 0) { + if (Opterr) + printf("%c -- invalid option\n", thisopt) > "/dev/stderr" + if (_opti >= length(argv[Optind])) { + Optind++ + _opti = 0 + } else + _opti++ + return "?" + } + if (substr(options, i + 1, 1) == ":") { + # get option argument + if (length(substr(argv[Optind], _opti + 1)) > 0) + Optarg = substr(argv[Optind], _opti + 1) + else + Optarg = argv[++Optind] + _opti = 0 + } else + Optarg = "" + if (_opti == 0 || _opti >= length(argv[Optind])) { + Optind++ + _opti = 0 + } else + _opti++ + return thisopt +} + +function usage() { + print \ +"Usage: afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \ +"\n" \ +"Required parameters:\n" \ +"\n" \ +" -i dir - input directory with starting corpus\n" \ +" -o dir - output directory for minimized files\n" \ +"\n" \ +"Execution control settings:\n" \ +"\n" \ +" -f file - location read by the fuzzed program (stdin)\n" \ +" -m megs - memory limit for child process ("mem_limit" MB)\n" \ +" -t msec - run time limit for child process (none)\n" \ +" -Q - use binary-only instrumentation (QEMU mode)\n" \ +" -U - use unicorn-based instrumentation (unicorn mode)\n" \ +"\n" \ +"Minimization settings:\n" \ +" -C - keep crashing inputs, reject everything else\n" \ +" -e - solve for edge coverage only, ignore hit counts\n" \ +"\n" \ +"For additional tips, please consult docs/README.md\n" \ +"\n" \ + > "/dev/stderr" + exit 1 +} + +function exists_and_is_executable(binarypath) { + return 0 == system("test -f "binarypath" -a -x "binarypath) +} + +BEGIN { + print "corpus minimization tool for afl++ (awk version)\n" + + # defaults + extra_par = "" + # process options + Opterr = 1 # default is to diagnose + Optind = 1 # skip ARGV[0] + while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eCQU?")) != -1) { + if (_go_c == "i") { + if (!Optarg) usage() + if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + in_dir = Optarg + continue + } else + if (_go_c == "o") { + if (!Optarg) usage() + if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + out_dir = Optarg + continue + } else + if (_go_c == "f") { + if (!Optarg) usage() + if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + stdin_file = Optarg + continue + } else + if (_go_c == "m") { + if (!Optarg) usage() + if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + mem_limit = Optarg + mem_limit_given = 1 + continue + } else + if (_go_c == "t") { + if (!Optarg) usage() + if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + timeout = Optarg + continue + } else + if (_go_c == "C") { + ENVIRON["AFL_CMIN_CRASHES_ONLY"] = 1 + continue + } else + if (_go_c == "e") { + extra_par = extra_par " -e" + continue + } else + if (_go_c == "Q") { + if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + extra_par = extra_par " -Q" + if ( !mem_limit_given ) mem_limit = "250" + qemu_mode = 1 + continue + } else + if (_go_c == "U") { + if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"} + extra_par = extra_par " -U" + if ( !mem_limit_given ) mem_limit = "250" + unicorn_mode = 1 + continue + } else + if (_go_c == "?") { + exit 1 + } else + usage() + } # while options + + if (!mem_limit) mem_limit = 200 + if (!timeout) timeout = "none" + + # get program args + i = 0 + prog_args_string = "" + for (; Optind < ARGC; Optind++) { + prog_args[i++] = ARGV[Optind] + if (i > 1) + prog_args_string = prog_args_string" "ARGV[Optind] + } + + # sanity checks + if (!prog_args[0] || !in_dir || !out_dir) usage() + + target_bin = prog_args[0] + + # Do a sanity check to discourage the use of /tmp, since we can't really + # handle this safely from an awk script. + + if (!ENVIRON["AFL_ALLOW_TMP"]) { + dirlist[0] = in_dir + dirlist[1] = target_bin + dirlist[2] = out_dir + dirlist[3] = stdin_file + "pwd" | getline dirlist[4] # current directory + for (dirind in dirlist) { + dir = dirlist[dirind] + + if (dir ~ /^(\/var)?\/tmp/) { + print "[-] Error: do not use this script in /tmp or /var/tmp." > "/dev/stderr" + exit 1 + } + } + delete dirlist + } + + # If @@ is specified, but there's no -f, let's come up with a temporary input + # file name. + + trace_dir = out_dir "/.traces" + + if (!stdin_file) { + found_atat = 0 + for (prog_args_ind in prog_args) { + if ("@@" == prog_args[prog_args_ind]) { + found_atat = 1 + break + } + } + if (found_atat) { + stdin_file = trace_dir "/.cur_input" + } + } + + # Check for obvious errors. + + if (mem_limit && mem_limit != "none" && mem_limit < 5) { + print "[-] Error: dangerously low memory limit." > "/dev/stderr" exit 1 - fi - -fi - -# If @@ is specified, but there's no -f, let's come up with a temporary input -# file name. - -TRACE_DIR="$OUT_DIR/.traces" + } -if [ "$STDIN_FILE" = "" ]; then - - if echo "$*" | grep -qF '@@'; then - STDIN_FILE="$TRACE_DIR/.cur_input" - fi - -fi - -# Check for obvious errors. - -if [ ! "$MEM_LIMIT" = "none" ]; then - - if [ "$MEM_LIMIT" -lt "5" ]; then - echo "[-] Error: dangerously low memory limit." 1>&2 + if (timeout && timeout != "none" && timeout < 10) { + print "[-] Error: dangerously low timeout." > "/dev/stderr" exit 1 - fi - -fi - -if [ ! "$TIMEOUT" = "none" ]; then - - if [ "$TIMEOUT" -lt "10" ]; then - echo "[-] Error: dangerously low timeout." 1>&2 + } + + if (target_bin && !exists_and_is_executable(target_bin)) { + + "which "target_bin" 2>/dev/null" | getline tnew + if (!tnew || !exists_and_is_executable(tnew)) { + print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr" + exit 1 + } + target_bin = tnew + } + + if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !unicorn_mode) { + if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) { + print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr" + exit 1 + } + } + + if (0 != system( "test -d "in_dir )) { + print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr" exit 1 - fi + } -fi + if (0 == system( "test -d "in_dir"/queue" )) { + in_dir = in_dir "/queue" + } -if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then + system("rm -rf "trace_dir" 2>/dev/null"); + system("rm "out_dir"/id[:_]* 2>/dev/null") - TNEW="`which "$TARGET_BIN" 2>/dev/null`" - - if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then - echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2 + if (0 == system( "test -d "out_dir" -a -e "out_dir"/*" )) { + print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr" exit 1 - fi - - TARGET_BIN="$TNEW" - -fi + } -if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" -a "$UNICORN_MODE" = "" ]; then - - if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then - echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2 + # Check for the more efficient way to copy files... + if (0 != system("mkdir -p -m 0700 "trace_dir)) { + print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr" exit 1 - fi - -fi - -if [ ! -d "$IN_DIR" ]; then - echo "[-] Error: directory '$IN_DIR' not found." 1>&2 - exit 1 -fi - -test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue" - -find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null -rm -rf "$TRACE_DIR" 2>/dev/null - -rmdir "$OUT_DIR" 2>/dev/null - -if [ -d "$OUT_DIR" ]; then - echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it first." 1>&2 - exit 1 -fi - -mkdir -m 700 -p "$TRACE_DIR" || exit 1 - -if [ ! "$STDIN_FILE" = "" ]; then - rm -f "$STDIN_FILE" || exit 1 - touch "$STDIN_FILE" || exit 1 -fi - -if [ "$AFL_PATH" = "" ]; then - SHOWMAP="${0%/afl-cmin}/afl-showmap" -else - SHOWMAP="$AFL_PATH/afl-showmap" -fi - -if [ ! -x "$SHOWMAP" ]; then - echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2 - rm -rf "$TRACE_DIR" - exit 1 -fi - -IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`)) - -if [ "$IN_COUNT" = "0" ]; then - echo "[+] Hmm, no inputs in the target directory. Nothing to be done." - rm -rf "$TRACE_DIR" - exit 1 -fi - -FIRST_FILE=`ls "$IN_DIR" | head -1` - -# Make sure that we're not dealing with a directory. - -if [ -d "$IN_DIR/$FIRST_FILE" ]; then - echo "[-] Error: The target directory contains subdirectories - please fix." 1>&2 - rm -rf "$TRACE_DIR" - exit 1 -fi - -# Check for the more efficient way to copy files... - -if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then - CP_TOOL=ln -else - CP_TOOL=cp -fi - -# Make sure that we can actually get anything out of afl-showmap before we -# waste too much time. - -echo "[*] Testing the target binary..." - -if [ "$STDIN_FILE" = "" ]; then - - AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE" - -else - - cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE" - AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null - -fi - -FIRST_COUNT=$((`grep -c . "$TRACE_DIR/.run_test"`)) - -if [ "$FIRST_COUNT" -gt "0" ]; then - - echo "[+] OK, $FIRST_COUNT tuples recorded." - -else - - echo "[-] Error: no instrumentation output detected (perhaps crash or timeout)." 1>&2 - test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" - exit 1 - -fi - -# Let's roll! - -############################# -# STEP 1: COLLECTING TRACES # -############################# - -echo "[*] Obtaining traces for input files in '$IN_DIR'..." - -( - - CUR=0 - - if [ "$STDIN_FILE" = "" ]; then - - ls "$IN_DIR" | while read -r fn; do - - CUR=$((CUR+1)) - printf "\\r Processing file $CUR/$IN_COUNT... " - - "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn" - - done - - else - - ls "$IN_DIR" | while read -r fn; do - - CUR=$((CUR+1)) - printf "\\r Processing file $CUR/$IN_COUNT... " - - cp "$IN_DIR/$fn" "$STDIN_FILE" - - "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null - - done - - - fi - -) - -echo - -########################## -# STEP 2: SORTING TUPLES # -########################## - -# With this out of the way, we sort all tuples by popularity across all -# datasets. The reasoning here is that we won't be able to avoid the files -# that trigger unique tuples anyway, so we will want to start with them and -# see what's left. - -echo "[*] Sorting trace sets (this may take a while)..." - -ls "$IN_DIR" | sed "s#^#$TRACE_DIR/#" | tr '\n' '\0' | xargs -0 -n 1 cat | \ - sort | uniq -c | sort -k 1,1 -n >"$TRACE_DIR/.all_uniq" - -TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`)) - -echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files." - -##################################### -# STEP 3: SELECTING CANDIDATE FILES # -##################################### - -# The next step is to find the best candidate for each tuple. The "best" -# part is understood simply as the smallest input that includes a particular -# tuple in its trace. Empirical evidence suggests that this produces smaller -# datasets than more involved algorithms that could be still pulled off in -# a shell script. - -echo "[*] Finding best candidates for each tuple..." - -CUR=0 - -ls -rS "$IN_DIR" | while read -r fn; do - - CUR=$((CUR+1)) - printf "\\r Processing file $CUR/$IN_COUNT... " - - sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list" - -done - -echo - -############################## -# STEP 4: LOADING CANDIDATES # -############################## - -# At this point, we have a file of tuple-file pairs, sorted by file size -# in ascending order (as a consequence of ls -rS). By doing sort keyed -# only by tuple (-k 1,1) and configured to output only the first line for -# every key (-s -u), we end up with the smallest file for each tuple. - -echo "[*] Sorting candidate list (be patient)..." - -sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \ - sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script" - -if [ ! -s "$TRACE_DIR/.candidate_script" ]; then - echo "[-] Error: no traces obtained from test cases, check syntax!" 1>&2 - test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" - exit 1 -fi - -# The sed command converted the sorted list to a shell script that populates -# BEST_FILE[tuple]="fname". Let's load that! - -. "$TRACE_DIR/.candidate_script" - -########################## -# STEP 5: WRITING OUTPUT # -########################## - -# The final trick is to grab the top pick for each tuple, unless said tuple is -# already set due to the inclusion of an earlier candidate; and then put all -# tuples associated with the newly-added file to the "already have" list. The -# loop works from least popular tuples and toward the most common ones. - -echo "[*] Processing candidates and writing output files..." - -CUR=0 - -touch "$TRACE_DIR/.already_have" - -while read -r cnt tuple; do - - CUR=$((CUR+1)) - printf "\\r Processing tuple $CUR/$TUPLE_COUNT... " - - # If we already have this tuple, skip it. - - grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue - - FN=${BEST_FILE[tuple]} - - $CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN" - - if [ "$((CUR % 5))" = "0" ]; then - sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp" - mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have" - else - cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have" - fi - -done <"$TRACE_DIR/.all_uniq" - -echo - -OUT_COUNT=`ls -- "$OUT_DIR" | wc -l` - -if [ "$OUT_COUNT" = "1" ]; then - echo "[!] WARNING: All test cases had the same traces, check syntax!" -fi - -echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'." -echo - -test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" + } + + if (stdin_file) { + # truncate input file + printf "" > stdin_file + close( stdin_file ) + } + + if (!ENVIRON["AFL_PATH"]) { + if (0 == system("test -f afl-cmin")) { + showmap = "./afl-showmap" + } else { + "which afl-showmap 2>/dev/null" | getline showmap + } + } else { + showmap = ENVIRON["AFL_PATH"] "/afl-showmap" + } + + if (!showmap || 0 != system("test -x "showmap )) { + print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr" + exit 1 + } + + # get list of input filenames sorted by size + i = 0 + # yuck, gnu stat is option incompatible to bsd stat + # we use a heuristic to differentiate between + # GNU stat and other stats + "stat --version 2>/dev/null" | getline statversion + if (statversion ~ /GNU coreutils/) { + stat_format = "-c '%s %n'" # GNU + } else { + stat_format = "-f '%z %N'" # *BSD, MacOS + } + cmdline = "cd "in_dir" && find . \\( ! -name . -a -type d -prune \\) -o -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" + while (cmdline | getline) { + infilesSmallToBig[i++] = $0 + } + in_count = i + + first_file = infilesSmallToBig[0] + + # Make sure that we're not dealing with a directory. -exit 0 + if (0 == system("test -d "in_dir"/"first_file)) { + print "[-] Error: The input directory contains subdirectories - please fix." > "/dev/stderr" + exit 1 + } + + if (0 == system("ln "in_dir"/"first_file" "trace_dir"/.link_test")) { + cp_tool = "ln" + } else { + cp_tool = "cp" + } + + # Make sure that we can actually get anything out of afl-showmap before we + # waste too much time. + + print "[*] Testing the target binary..." + + if (!stdin_file) { + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"") + } else { + system("cp "in_dir"/"first_file" "stdin_file) + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null") + } + + first_count = 0 + + runtest = trace_dir"/.run_test" + while ((getline < runtest) > 0) { + ++first_count + } + + if (first_count) { + print "[+] OK, "first_count" tuples recorded." + } else { + print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr" + if (!ENVIRON["AFL_KEEP_TRACES"]) { + system("rm -rf "trace_dir" 2>/dev/null") + } + exit 1 + } + + # Let's roll! + + ############################# + # STEP 1: Collecting traces # + ############################# + + print "[*] Obtaining traces for "in_count" input files in '"in_dir"'." + + cur = 0; + if (!stdin_file) { + while (cur < in_count) { + fn = infilesSmallToBig[cur] + ++cur; + printf "\r Processing file "cur"/"in_count + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"fn"\"") + } + } else { + while (cur < in_count) { + fn = infilesSmallToBig[cur] + ++cur + printf "\r Processing file "cur"/"in_count + system("cp "in_dir"/"fn" "stdin_file) + system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null") + } + } + + print "" + + + ####################################################### + # STEP 2: register smallest input file for each tuple # + # STEP 3: copy that file (at most once) # + ####################################################### + + print "[*] Processing traces for input files in '"in_dir"'." + + cur = 0 + out_count = 0 + tuple_count = 0 + + while (cur < in_count) { + fn = infilesSmallToBig[cur] + ++cur + printf "\r Processing file "cur"/"in_count + # create path for the trace file from afl-showmap + tracefile_path = trace_dir"/"fn + # gather all keys, and count them + while ((getline line < tracefile_path) > 0) { + key = line + if (!(key in key_count)) { + ++tuple_count + } + ++key_count[key] + if (! (key in best_file)) { + # this is the best file for this key + best_file[key] = fn + # copy file unless already done + if (! (fn in file_already_copied)) { + system(cp_tool" "in_dir"/"fn" "out_dir"/"fn) + file_already_copied[fn] = "" + ++out_count + } + } + } + close(tracefile_path) + } + + print "" + print "[+] Found "tuple_count" unique tuples across "in_count" files." + + if (out_count == 1) { + print "[!] WARNING: All test cases had the same traces, check syntax!" + } + print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'." + + if (!ENVIRON["AFL_KEEP_TRACES"]) { + system("rm -rf "trace_dir" 2>/dev/null") + } + + exit 0 +} +EOF diff --git a/afl-cmin.bash b/afl-cmin.bash new file mode 100755 index 00000000..1dd782d8 --- /dev/null +++ b/afl-cmin.bash @@ -0,0 +1,470 @@ +#!/usr/bin/env bash +# +# american fuzzy lop++ - corpus minimization tool +# --------------------------------------------- +# +# Originally written by Michal Zalewski +# +# Copyright 2014, 2015 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# This tool tries to find the smallest subset of files in the input directory +# that still trigger the full range of instrumentation data points seen in +# the starting corpus. This has two uses: +# +# - Screening large corpora of input files before using them as a seed for +# afl-fuzz. The tool will remove functionally redundant files and likely +# leave you with a much smaller set. +# +# (In this case, you probably also want to consider running afl-tmin on +# the individual files later on to reduce their size.) +# +# - Minimizing the corpus generated organically by afl-fuzz, perhaps when +# planning to feed it to more resource-intensive tools. The tool achieves +# this by removing all entries that used to trigger unique behaviors in the +# past, but have been made obsolete by later finds. +# +# Note that the tool doesn't modify the files themselves. For that, you want +# afl-tmin. +# +# This script must use bash because other shells may have hardcoded limits on +# array sizes. +# + +echo "corpus minimization tool for afl-fuzz by Michal Zalewski" +echo + +######### +# SETUP # +######### + +# Process command-line options... + +MEM_LIMIT=200 +TIMEOUT=none + +unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \ + AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE UNICORN_MODE + +while getopts "+i:o:f:m:t:eQUCh" opt; do + + case "$opt" in + + "h") + ;; + + "i") + IN_DIR="$OPTARG" + ;; + + "o") + OUT_DIR="$OPTARG" + ;; + "f") + STDIN_FILE="$OPTARG" + ;; + "m") + MEM_LIMIT="$OPTARG" + MEM_LIMIT_GIVEN=1 + ;; + "t") + TIMEOUT="$OPTARG" + ;; + "e") + EXTRA_PAR="$EXTRA_PAR -e" + ;; + "C") + export AFL_CMIN_CRASHES_ONLY=1 + ;; + "Q") + EXTRA_PAR="$EXTRA_PAR -Q" + test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250 + QEMU_MODE=1 + ;; + "U") + EXTRA_PAR="$EXTRA_PAR -U" + test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250 + UNICORN_MODE=1 + ;; + "?") + exit 1 + ;; + + esac + +done + +shift $((OPTIND-1)) + +TARGET_BIN="$1" + +if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then + + cat 1>&2 <<_EOF_ +Usage: $0 [ options ] -- /path/to/target_app [ ... ] + +Required parameters: + + -i dir - input directory with the starting corpus + -o dir - output directory for minimized files + +Execution control settings: + + -f file - location read by the fuzzed program (stdin) + -m megs - memory limit for child process ($MEM_LIMIT MB) + -t msec - run time limit for child process (none) + -Q - use binary-only instrumentation (QEMU mode) + -U - use unicorn-based instrumentation (Unicorn mode) + +Minimization settings: + + -C - keep crashing inputs, reject everything else + -e - solve for edge coverage only, ignore hit counts + +For additional tips, please consult docs/README. + +_EOF_ + exit 1 +fi + +# Do a sanity check to discourage the use of /tmp, since we can't really +# handle this safely from a shell script. + +if [ "$AFL_ALLOW_TMP" = "" ]; then + + echo "$IN_DIR" | grep -qE '^(/var)?/tmp/' + T1="$?" + + echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/' + T2="$?" + + echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/' + T3="$?" + + echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/' + T4="$?" + + echo "$PWD" | grep -qE '^(/var)?/tmp/' + T5="$?" + + if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then + echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2 + exit 1 + fi + +fi + +# If @@ is specified, but there's no -f, let's come up with a temporary input +# file name. + +TRACE_DIR="$OUT_DIR/.traces" + +if [ "$STDIN_FILE" = "" ]; then + + if echo "$*" | grep -qF '@@'; then + STDIN_FILE="$TRACE_DIR/.cur_input" + fi + +fi + +# Check for obvious errors. + +if [ ! "$MEM_LIMIT" = "none" ]; then + + if [ "$MEM_LIMIT" -lt "5" ]; then + echo "[-] Error: dangerously low memory limit." 1>&2 + exit 1 + fi + +fi + +if [ ! "$TIMEOUT" = "none" ]; then + + if [ "$TIMEOUT" -lt "10" ]; then + echo "[-] Error: dangerously low timeout." 1>&2 + exit 1 + fi + +fi + +if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then + + TNEW="`which "$TARGET_BIN" 2>/dev/null`" + + if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then + echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2 + exit 1 + fi + + TARGET_BIN="$TNEW" + +fi + +if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" -a "$UNICORN_MODE" = "" ]; then + + if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then + echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2 + exit 1 + fi + +fi + +if [ ! -d "$IN_DIR" ]; then + echo "[-] Error: directory '$IN_DIR' not found." 1>&2 + exit 1 +fi + +test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue" + +find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null +rm -rf "$TRACE_DIR" 2>/dev/null + +rmdir "$OUT_DIR" 2>/dev/null + +if [ -d "$OUT_DIR" ]; then + echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it first." 1>&2 + exit 1 +fi + +mkdir -m 700 -p "$TRACE_DIR" || exit 1 + +if [ ! "$STDIN_FILE" = "" ]; then + rm -f "$STDIN_FILE" || exit 1 + touch "$STDIN_FILE" || exit 1 +fi + +if [ "$AFL_PATH" = "" ]; then + SHOWMAP="${0%/afl-cmin}/afl-showmap" +else + SHOWMAP="$AFL_PATH/afl-showmap" +fi + +if [ ! -x "$SHOWMAP" ]; then + echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2 + rm -rf "$TRACE_DIR" + exit 1 +fi + +IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`)) + +if [ "$IN_COUNT" = "0" ]; then + echo "[+] Hmm, no inputs in the target directory. Nothing to be done." + rm -rf "$TRACE_DIR" + exit 1 +fi + +FIRST_FILE=`ls "$IN_DIR" | head -1` + +# Make sure that we're not dealing with a directory. + +if [ -d "$IN_DIR/$FIRST_FILE" ]; then + echo "[-] Error: The target directory contains subdirectories - please fix." 1>&2 + rm -rf "$TRACE_DIR" + exit 1 +fi + +# Check for the more efficient way to copy files... + +if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then + CP_TOOL=ln +else + CP_TOOL=cp +fi + +# Make sure that we can actually get anything out of afl-showmap before we +# waste too much time. + +echo "[*] Testing the target binary..." + +if [ "$STDIN_FILE" = "" ]; then + + AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE" + +else + + cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE" + AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null + +fi + +FIRST_COUNT=$((`grep -c . "$TRACE_DIR/.run_test"`)) + +if [ "$FIRST_COUNT" -gt "0" ]; then + + echo "[+] OK, $FIRST_COUNT tuples recorded." + +else + + echo "[-] Error: no instrumentation output detected (perhaps crash or timeout)." 1>&2 + test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" + exit 1 + +fi + +# Let's roll! + +############################# +# STEP 1: COLLECTING TRACES # +############################# + +echo "[*] Obtaining traces for input files in '$IN_DIR'..." + +( + + CUR=0 + + if [ "$STDIN_FILE" = "" ]; then + + ls "$IN_DIR" | while read -r fn; do + + CUR=$((CUR+1)) + printf "\\r Processing file $CUR/$IN_COUNT... " + + "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn" + + done + + else + + ls "$IN_DIR" | while read -r fn; do + + CUR=$((CUR+1)) + printf "\\r Processing file $CUR/$IN_COUNT... " + + cp "$IN_DIR/$fn" "$STDIN_FILE" + + "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null + + done + + + fi + +) + +echo + +########################## +# STEP 2: SORTING TUPLES # +########################## + +# With this out of the way, we sort all tuples by popularity across all +# datasets. The reasoning here is that we won't be able to avoid the files +# that trigger unique tuples anyway, so we will want to start with them and +# see what's left. + +echo "[*] Sorting trace sets (this may take a while)..." + +ls "$IN_DIR" | sed "s#^#$TRACE_DIR/#" | tr '\n' '\0' | xargs -0 -n 1 cat | \ + sort | uniq -c | sort -k 1,1 -n >"$TRACE_DIR/.all_uniq" + +TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`)) + +echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files." + +##################################### +# STEP 3: SELECTING CANDIDATE FILES # +##################################### + +# The next step is to find the best candidate for each tuple. The "best" +# part is understood simply as the smallest input that includes a particular +# tuple in its trace. Empirical evidence suggests that this produces smaller +# datasets than more involved algorithms that could be still pulled off in +# a shell script. + +echo "[*] Finding best candidates for each tuple..." + +CUR=0 + +ls -rS "$IN_DIR" | while read -r fn; do + + CUR=$((CUR+1)) + printf "\\r Processing file $CUR/$IN_COUNT... " + + sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list" + +done + +echo + +############################## +# STEP 4: LOADING CANDIDATES # +############################## + +# At this point, we have a file of tuple-file pairs, sorted by file size +# in ascending order (as a consequence of ls -rS). By doing sort keyed +# only by tuple (-k 1,1) and configured to output only the first line for +# every key (-s -u), we end up with the smallest file for each tuple. + +echo "[*] Sorting candidate list (be patient)..." + +sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \ + sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script" + +if [ ! -s "$TRACE_DIR/.candidate_script" ]; then + echo "[-] Error: no traces obtained from test cases, check syntax!" 1>&2 + test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" + exit 1 +fi + +# The sed command converted the sorted list to a shell script that populates +# BEST_FILE[tuple]="fname". Let's load that! + +. "$TRACE_DIR/.candidate_script" + +########################## +# STEP 5: WRITING OUTPUT # +########################## + +# The final trick is to grab the top pick for each tuple, unless said tuple is +# already set due to the inclusion of an earlier candidate; and then put all +# tuples associated with the newly-added file to the "already have" list. The +# loop works from least popular tuples and toward the most common ones. + +echo "[*] Processing candidates and writing output files..." + +CUR=0 + +touch "$TRACE_DIR/.already_have" + +while read -r cnt tuple; do + + CUR=$((CUR+1)) + printf "\\r Processing tuple $CUR/$TUPLE_COUNT... " + + # If we already have this tuple, skip it. + + grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue + + FN=${BEST_FILE[tuple]} + + $CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN" + + if [ "$((CUR % 5))" = "0" ]; then + sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp" + mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have" + else + cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have" + fi + +done <"$TRACE_DIR/.all_uniq" + +echo + +OUT_COUNT=`ls -- "$OUT_DIR" | wc -l` + +if [ "$OUT_COUNT" = "1" ]; then + echo "[!] WARNING: All test cases had the same traces, check syntax!" +fi + +echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'." +echo + +test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR" + +exit 0 diff --git a/afl-system-config b/afl-system-config index 2a7df17f..1e180d8b 100755 --- a/afl-system-config +++ b/afl-system-config @@ -1,6 +1,6 @@ #!/bin/sh test "$1" = "-h" && { - echo afl-system-config by Marc Heuse + echo 'afl-system-config by Marc Heuse <mh@mh-sec.de>' echo echo $0 echo @@ -12,55 +12,72 @@ test "$1" = "-h" && { exit 1 } +DONE= PLATFORM=`uname -s` -echo This reconfigures the system to have a better fuzzing performance +echo This reconfigures the system to have a better fuzzing performance. if [ '!' "$EUID" = 0 ] && [ '!' `id -u` = 0 ] ; then - echo Error you need to be root to run this - exit 1 + echo "Warning: you need to be root to run this!" + # we do not exit as other mechanisms exist that allows to do this than + # being root. let the errors speak for themselves. fi if [ "$PLATFORM" = "Linux" ] ; then -sysctl -w kernel.core_pattern=core -sysctl -w kernel.randomize_va_space=0 -sysctl -w kernel.sched_child_runs_first=1 -sysctl -w kernel.sched_autogroup_enabled=1 -sysctl -w kernel.sched_migration_cost_ns=50000000 -sysctl -w kernel.sched_latency_ns=250000000 -echo never > /sys/kernel/mm/transparent_hugepage/enabled -test -e /sys/devices/system/cpu/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/scaling_governor -test -e /sys/devices/system/cpu/cpufreq/policy0/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/policy*/scaling_governor -test -e /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor -test -e /sys/devices/system/cpu/intel_pstate/no_turbo && echo 0 > /sys/devices/system/cpu/intel_pstate/no_turbo -test -e /sys/devices/system/cpu/cpufreq/boost && echo 1 > /sys/devices/system/cpu/cpufreq/boost -echo -echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this: -echo '/etc/default/grub:GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"' +{ + sysctl -w kernel.core_pattern=core + sysctl -w kernel.randomize_va_space=0 + sysctl -w kernel.sched_child_runs_first=1 + sysctl -w kernel.sched_autogroup_enabled=1 + sysctl -w kernel.sched_migration_cost_ns=50000000 + sysctl -w kernel.sched_latency_ns=250000000 + echo never > /sys/kernel/mm/transparent_hugepage/enabled + test -e /sys/devices/system/cpu/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/scaling_governor + test -e /sys/devices/system/cpu/cpufreq/policy0/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpufreq/policy*/scaling_governor + test -e /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor && echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor + test -e /sys/devices/system/cpu/intel_pstate/no_turbo && echo 0 > /sys/devices/system/cpu/intel_pstate/no_turbo + test -e /sys/devices/system/cpu/cpufreq/boost && echo 1 > /sys/devices/system/cpu/cpufreq/boost +} > /dev/null + echo Settings applied. + dmesg | egrep -q 'nospectre_v2|spectre_v2=off' || { + echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this: + echo ' /etc/default/grub:GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"' + } + DONE=1 fi if [ "$PLATFORM" = "FreeBSD" ] ; then -sysctl kern.elf32.aslr.enable=0 -sysctl kern.elf64.aslr.enable=0 -echo -echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this: -echo 'sysctl hw.ibrs_disable=1' -echo -echo 'Setting kern.pmap.pg_ps_enabled=0 into /boot/loader.conf might be helpful too.' +{ + sysctl kern.elf32.aslr.enable=0 + sysctl kern.elf64.aslr.enable=0 +} > /dev/null + echo Settings applied. + echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this: + echo ' sysctl hw.ibrs_disable=1' + echo 'Setting kern.pmap.pg_ps_enabled=0 into /boot/loader.conf might be helpful too.' + DONE=1 fi if [ "$PLATFORM" = "OpenBSD" ] ; then -echo -echo 'System security features cannot be disabled on OpenBSD.' + echo + echo 'System security features cannot be disabled on OpenBSD.' + DONE=1 fi if [ "$PLATFORM" = "NetBSD" ] ; then -echo -echo It is recommended to enable unprivileged users to set cpu affinity -echo to be able to use afl-gotcpu meaningfully. -/sbin/sysctl -w security.models.extensions.user_set_cpu_affinity=1 +{ + #echo It is recommended to enable unprivileged users to set cpu affinity + #echo to be able to use afl-gotcpu meaningfully. + /sbin/sysctl -w security.models.extensions.user_set_cpu_affinity=1 +} > /dev/null + echo Settings applied. + DONE=1 fi if [ "$PLATFORM" = "Darwin" ] ; then if [ $(launchctl list 2>/dev/null | grep -q '\.ReportCrash$') ] ; then -echo We unload the default crash reporter here -SL=/System/Library; PL=com.apple.ReportCrash -launchctl unload -w ${SL}/LaunchAgents/${PL}.plist -sudo launchctl unload -w ${SL}/LaunchDaemons/${PL}.Root.plist + echo We unload the default crash reporter here + SL=/System/Library; PL=com.apple.ReportCrash + launchctl unload -w ${SL}/LaunchAgents/${PL}.plist + sudo launchctl unload -w ${SL}/LaunchDaemons/${PL}.Root.plist + echo Settings applied. + else + echo Nothing to do. fi + DONE=1 fi -echo -echo Also use AFL_TMPDIR to use a tmpfs for the input file +test -z "$DONE" && echo Error: Unknown platform: $PLATFORM +test -z "$AFL_TMPDIR" && echo Also use AFL_TMPDIR and point it to a tmpfs for the input file caching diff --git a/docs/ChangeLog b/docs/ChangeLog index 5347d244..5017a803 100644 --- a/docs/ChangeLog +++ b/docs/ChangeLog @@ -21,9 +21,15 @@ Version ++2.60d (develop): - afl-fuzz: - now prints the real python version support compiled in - set stronger performance compile options and little tweaks - - afl-clang-fast now shows in the help output for which llvm version it - was compiled for - - added blacklisted function check in llvm_mode + - Android: prefer bigcores when selecting a CPU + - afl-clang-fast: + - show in the help output for which llvm version it was compiled for + - now does not need to be recompiled between trace-pc and pass + instrumentation. compile normally and set AFL_LLVM_USE_TRACE_PC :) + - llvm 11 is supported + - afl-cmin is now a sh script (invoking awk) instead of bash for portability + the original script is still present as afl-cmin.bash + - added blacklist and whitelisting function check in all modules of llvm_mode - added fix from Debian project to compile libdislocator and libtokencap diff --git a/docs/binaryonly_fuzzing.md b/docs/binaryonly_fuzzing.md new file mode 100644 index 00000000..6eff30d7 --- /dev/null +++ b/docs/binaryonly_fuzzing.md @@ -0,0 +1,161 @@ +# Fuzzing binary-only programs with afl++ + + afl++, libfuzzer and others are great if you have the source code, and + it allows for very fast and coverage guided fuzzing. + + However, if there is only the binary program and no source code available, + then standard `afl-fuzz -n` (dumb mode) is not effective. + + The following is a description of how these binaries can be fuzzed with afl++ + + !!!!! + TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then + use afl -Q qemu_mode, or better: use both in parallel. + !!!!! + + +## QEMU + + Qemu is the "native" solution to the program. + It is available in the ./qemu_mode/ directory and once compiled it can + be accessed by the afl-fuzz -Q command line option. + The speed decrease is at about 50%. + It is the easiest to use alternative and even works for cross-platform binaries. + + Note that there is also honggfuzz: [https://github.com/google/honggfuzz](https://github.com/google/honggfuzz) + which now has a qemu_mode, but its performance is just 1.5%! + + As it is included in afl++ this needs no URL. + + +## WINE+QEMU + + Wine mode can run Win32 PE binaries with the QEMU instrumentation. + It needs Wine, python3 and the pefile python package installed. + + As it is included in afl++ this needs no URL. + + +## UNICORN + + Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar. + In contrast to QEMU, Unicorn does not offer a full system or even userland + emulation. Runtime environment and/or loaders have to be written from scratch, + if needed. On top, block chaining has been removed. This means the speed boost + introduced in the patched QEMU Mode of afl++ cannot simply be ported over to + Unicorn. For further information, check out ./unicorn_mode.txt. + + As it is included in afl++ this needs no URL. + + +## DYNINST + + Dyninst is a binary instrumentation framework similar to Pintool and + Dynamorio (see far below). However whereas Pintool and Dynamorio work at + runtime, dyninst instruments the target at load time, and then let it run - + or save the binary with the changes. + This is great for some things, e.g. fuzzing, and not so effective for others, + e.g. malware analysis. + + So what we can do with dyninst is taking every basic block, and put afl's + instrumention code in there - and then save the binary. + Afterwards we can just fuzz the newly saved target binary with afl-fuzz. + Sounds great? It is. The issue though - it is a non-trivial problem to + insert instructions, which change addresses in the process space, so that + everything is still working afterwards. Hence more often than not binaries + crash when they are run. + + The speed decrease is about 15-35%, depending on the optimization options + used with afl-dyninst. + + So if Dyninst works, it is the best option available. Otherwise it just + doesn't work well. + + [https://github.com/vanhauser-thc/afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) + + +## INTEL-PT + + If you have a newer Intel CPU, you can make use of Intels processor trace. + The big issue with Intel's PT is the small buffer size and the complex + encoding of the debug information collected through PT. + This makes the decoding very CPU intensive and hence slow. + As a result, the overall speed decrease is about 70-90% (depending on + the implementation and other factors). + + There are two afl intel-pt implementations: + + 1. [https://github.com/junxzm1990/afl-pt](https://github.com/junxzm1990/afl-pt) + => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel. + + 2. [https://github.com/hunter-ht-2018/ptfuzzer](https://github.com/hunter-ht-2018/ptfuzzer) + => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must + be used. This one is faster than the other. + + Note that there is also honggfuzz: https://github.com/google/honggfuzz + But its IPT performance is just 6%! + + +## CORESIGHT + + Coresight is ARM's answer to Intel's PT. + There is no implementation so far which handle coresight and getting + it working on an ARM Linux is very difficult due to custom kernel building + on embedded systems is difficult. And finding one that has coresight in + the ARM chip is difficult too. + My guess is that it is slower than Qemu, but faster than Intel PT. + + If anyone finds any coresight implementation for afl please ping me: vh@thc.org + + +## FRIDA + + Frida is a dynamic instrumentation engine like Pintool, Dyninst and Dynamorio. + What is special is that it is written Python, and scripted with Javascript. + It is mostly used to reverse binaries on mobile phones however can be used + everywhere. + + There is a WIP fuzzer available at [https://github.com/andreafioraldi/frida-fuzzer](https://github.com/andreafioraldi/frida-fuzzer) + + +## PIN & DYNAMORIO + + Pintool and Dynamorio are dynamic instrumentation engines, and they can be + used for getting basic block information at runtime. + Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows + whereas Dynamorio is additionally available for ARM and AARCH64. + Dynamorio is also 10x faster than Pintool. + + The big issue with Dynamorio (and therefore Pintool too) is speed. + Dynamorio has a speed decrease of 98-99% + Pintool has a speed decrease of 99.5% + + Hence Dynamorio is the option to go for if everything fails, and Pintool + only if Dynamorio fails too. + + Dynamorio solutions: + * [https://github.com/vanhauser-thc/afl-dynamorio](https://github.com/vanhauser-thc/afl-dynamorio) + * [https://github.com/mxmssh/drAFL](https://github.com/mxmssh/drAFL) + * [https://github.com/googleprojectzero/winafl/](https://github.com/googleprojectzero/winafl/) <= very good but windows only + + Pintool solutions: + * [https://github.com/vanhauser-thc/afl-pin](https://github.com/vanhauser-thc/afl-pin) + * [https://github.com/mothran/aflpin](https://github.com/mothran/aflpin) + * [https://github.com/spinpx/afl_pin_mode](https://github.com/spinpx/afl_pin_mode) <= only old Pintool version supported + + +## Non-AFL solutions + + There are many binary-only fuzzing frameworks. + Some are great for CTFs but don't work with large binaries, others are very + slow but have good path discovery, some are very hard to set-up ... + + * QSYM: [https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym) + * Manticore: [https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore) + * S2E: [https://github.com/S2E](https://github.com/S2E) + * ... please send me any missing that are good + + +## Closing words + + That's it! News, corrections, updates? Send an email to vh@thc.org diff --git a/docs/binaryonly_fuzzing.txt b/docs/binaryonly_fuzzing.txt deleted file mode 100644 index 239fb4b0..00000000 --- a/docs/binaryonly_fuzzing.txt +++ /dev/null @@ -1,144 +0,0 @@ - -Fuzzing binary-only programs with afl++ -======================================= - -afl++, libfuzzer and others are great if you have the source code, and -it allows for very fast and coverage guided fuzzing. - -However, if there is only the binary program and not source code available, -then standard afl++ (dumb mode) is not effective. - -The following is a description of how these can be fuzzed with afl++ - -!!!!! -TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then - use afl -Q qemu_mode, or better: use both in parallel. -!!!!! - - -QEMU ----- -Qemu is the "native" solution to the program. -It is available in the ./qemu_mode/ directory and once compiled it can -be accessed by the afl-fuzz -Q command line option. -The speed decrease is at about 50% -It is the easiest to use alternative and even works for cross-platform binaries. - -As it is included in afl++ this needs no URL. - -WINE+QEMU ---------- -Wine mode can run Win32 PE with the QEMU instrumentation. -It needs Wine, python3 and the pefile python package installed. - -UNICORN -------- -Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar. -In contrast to QEMU, Unicorn does not offer a full system or even userland emulation. -Runtime environment and/or loaders have to be written from scratch, if needed. -On top, block chaining has been removed. This means the speed boost introduced in -to the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn. -For further information, check out ./unicorn_mode.txt. - - -DYNINST -------- -Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio -(see far below). However whereas Pintool and Dynamorio work at runtime, dyninst -instruments the target at load time, and then let it run. -This is great for some things, e.g. fuzzing, and not so effective for others, -e.g. malware analysis. - -So what we can do with dyninst is taking every basic block, and put afl's -instrumention code in there - and then save the binary. -Afterwards we can just fuzz the newly saved target binary with afl-fuzz. -Sounds great? It is. The issue though - it is a non-trivial problem to -insert instructions, which change addresses in the process space, so -everything is still working afterwards. Hence more often than not binaries -crash when they are run (because of instrumentation). - -The speed decrease is about 15-35%, depending on the optimization options -used with afl-dyninst. - -So if dyninst works, it is the best option available. Otherwise it just doesn't -work well. - -https://github.com/vanhauser-thc/afl-dyninst - - -INTEL-PT --------- -If you have a newer Intel CPU, you can make use of Intels processor trace. -The big issue with Intel's PT is the small buffer size and the complex -encoding of the debug information collected through PT. -This makes the decoding very CPU intensive and hence slow. -As a result, the overall speed decrease is about 70-90% (depending on -the implementation and other factors). - -There are two afl intel-pt implementations: - -1. https://github.com/junxzm1990/afl-pt - => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel. - -2. https://github.com/hunter-ht-2018/ptfuzzer - => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must - be used. This one is faster than the other. - - -CORESIGHT ---------- - -Coresight is ARM's answer to Intel's PT. -There is no implementation so far which handle coresight and getting -it working on an ARM Linux is very difficult due to custom kernel building -on embedded systems is difficult. And finding one that has coresight in -the ARM chip is difficult too. -My guess is that it is slower than Qemu, but faster than Intel PT. -If anyone finds any coresight implementation for afl please ping me: -vh@thc.org - - -PIN & DYNAMORIO ---------------- - -Pintool and Dynamorio are dynamic instrumentation engines, and they can be -used for getting basic block information at runtime. -Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows -whereas Dynamorio is additionally available for ARM and AARCH64. -Dynamorio is also 10x faster than Pintool. - -The big issue with Dynamorio (and therefore Pintool too) is speed. -Dynamorio has a speed decrease of 98-99% -Pintool has a speed decrease of 99.5% - -Hence Dynamorio is the option to go for if everything fails, and Pintool -only if Dynamorio fails too. - -Dynamorio solutions: - https://github.com/vanhauser-thc/afl-dynamorio - https://github.com/mxmssh/drAFL - https://github.com/googleprojectzero/winafl/ <= very good but windows only - -Pintool solutions: - https://github.com/vanhauser-thc/afl-pin - https://github.com/mothran/aflpin - https://github.com/spinpx/afl_pin_mode <= only old Pintool version supported - - -Non-AFL solutions ------------------ - -There are many binary-only fuzzing frameworks. Some are great for CTFs but don't -work with large binaries, others are very slow but have good path discovery, -some are very hard to set-up ... - -QSYM: https://github.com/sslab-gatech/qsym -Manticore: https://github.com/trailofbits/manticore -S2E: https://github.com/S2E -<please send me any missing that are good> - - - -That's it! -News, corrections, updates? -Email vh@thc.org diff --git a/experimental/README.experiments b/experimental/README.experiments index af9739bd..543c078c 100644 --- a/experimental/README.experiments +++ b/experimental/README.experiments @@ -28,6 +28,9 @@ Here's a quick overview of the stuff you can find in this directory: mode to speed up certain fuzzing jobs. - post_library - an example of how to build postprocessors for AFL. + + - socket_fuzzing - a LD_PRELOAD library 'redirects' a socket to stdin + for fuzzing access with afl++ Note that the minimize_corpus.sh tool has graduated from the experimental/ directory and is now available as ../afl-cmin. The LLVM mode has likewise diff --git a/include/afl-as.h b/include/afl-as.h index 3af42205..bd5e734a 100644 --- a/include/afl-as.h +++ b/include/afl-as.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h index cd53c703..967e16fe 100644 --- a/include/afl-fuzz.h +++ b/include/afl-fuzz.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/include/alloc-inl.h b/include/alloc-inl.h index 48598ed3..5592b295 100644 --- a/include/alloc-inl.h +++ b/include/alloc-inl.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/include/android-ashmem.h b/include/android-ashmem.h index 35a5ba5e..adddc05f 100755 --- a/include/android-ashmem.h +++ b/include/android-ashmem.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/include/common.h b/include/common.h index 8ab78b41..3b953470 100644 --- a/include/common.h +++ b/include/common.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/include/config.h b/include/config.h index c5a48df0..8b8924f5 100644 --- a/include/config.h +++ b/include/config.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> @@ -67,7 +67,8 @@ #else #define MEM_LIMIT 50 #endif /* ^!WORD_SIZE_64 */ -#else +#else /* NetBSD's kernel needs more space for stack, see discussion for issue \ + #165 */ #define MEM_LIMIT 200 #endif /* Default memory limit when running in QEMU mode (MB): */ diff --git a/include/debug.h b/include/debug.h index 68109927..d6c04935 100644 --- a/include/debug.h +++ b/include/debug.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/include/forkserver.h b/include/forkserver.h index 17bc65af..0fdcba48 100644 --- a/include/forkserver.h +++ b/include/forkserver.h @@ -6,7 +6,7 @@ Forkserver design by Jann Horn <jannhorn@googlemail.com> - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/include/sharedmem.h b/include/sharedmem.h index 3540386d..f92fd8be 100644 --- a/include/sharedmem.h +++ b/include/sharedmem.h @@ -6,7 +6,7 @@ Forkserver design by Jann Horn <jannhorn@googlemail.com> - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/include/types.h b/include/types.h index d5be5920..6aad9762 100644 --- a/include/types.h +++ b/include/types.h @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/libdislocator/libdislocator.so.c b/libdislocator/libdislocator.so.c index 20649470..221a629b 100644 --- a/libdislocator/libdislocator.so.c +++ b/libdislocator/libdislocator.so.c @@ -397,6 +397,29 @@ void* aligned_alloc(size_t align, size_t len) { } +/* specific BSD api mainly checking possible overflow for the size */ + +void* reallocarray(void* ptr, size_t elem_len, size_t elem_cnt) { + + const size_t elem_lim = 1UL << (sizeof(size_t) * 4); + const size_t elem_tot = elem_len * elem_cnt; + void* ret = NULL; + + if ((elem_len >= elem_lim || elem_cnt >= elem_lim) && elem_len > 0 && + elem_cnt > (SIZE_MAX / elem_len)) { + + DEBUGF("reallocarray size overflow (%zu)", elem_tot); + + } else { + + ret = realloc(ptr, elem_tot); + + } + + return ret; + +} + __attribute__((constructor)) void __dislocator_init(void) { u8* tmp = (u8*)getenv("AFL_LD_LIMIT_MB"); diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc index 11451b43..5b7b79e1 100644 --- a/llvm_mode/LLVMInsTrim.so.cc +++ b/llvm_mode/LLVMInsTrim.so.cc @@ -3,10 +3,23 @@ #include <stdarg.h> #include <unistd.h> +#include "llvm/Config/llvm-config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 +typedef long double max_align_t; +#endif + #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/DebugInfo.h" +#else +#include "llvm/Support/CFG.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/DebugInfo.h" +#endif #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LegacyPassManager.h" @@ -16,9 +29,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" #include <unordered_set> #include <random> #include <list> @@ -97,7 +108,7 @@ struct InsTrim : public ModulePass { // ripped from aflgo static bool isBlacklisted(const Function *F) { - static const SmallVector<std::string, 4> Blacklist = { + static const char *Blacklist[] = { "asan.", "llvm.", @@ -144,19 +155,6 @@ struct InsTrim : public ModulePass { // this is our default MarkSetOpt = true; - /* // I dont think this makes sense to port into LLVMInsTrim - char* inst_ratio_str = getenv("AFL_INST_RATIO"); - unsigned int inst_ratio = 100; - if (inst_ratio_str) { - - if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio || - inst_ratio > 100) FATAL("Bad value of AFL_INST_RATIO (must be between 1 - and 100)"); - - } - - */ - LLVMContext &C = M.getContext(); IntegerType *Int8Ty = IntegerType::getInt8Ty(C); IntegerType *Int32Ty = IntegerType::getInt32Ty(C); @@ -186,6 +184,8 @@ struct InsTrim : public ModulePass { StringRef instFilename; unsigned int instLine = 0; +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) for (auto &BB : F) { BasicBlock::iterator IP = BB.getFirstInsertionPt(); @@ -240,6 +240,48 @@ struct InsTrim : public ModulePass { } +#else + for (auto &BB : F) { + + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + IRBuilder<> IRB(&(*IP)); + if (Loc.isUnknown()) Loc = IP->getDebugLoc(); + + } + + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + instLine = cDILoc.getLineNumber(); + instFilename = cDILoc.getFilename(); + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list<std::string>::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif /* Either we couldn't figure out our location or the location is * not whitelisted, so we skip instrumentation. */ if (!instrumentBlock) { @@ -432,28 +474,19 @@ struct InsTrim : public ModulePass { IRB.CreateStore(Incr, MapPtrIdx) ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None)); - /* Set prev_loc to cur_loc >> 1 */ - /* - StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int32Ty, L >> 1), - OldPrev); Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, - None)); - */ - total_instr++; } } - OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n" /*", ratio - %u%%)."*/ - , - total_instr, total_rs, total_hs, + OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n", total_instr, + total_rs, total_hs, getenv("AFL_HARDEN") ? "hardened" : ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) ? "ASAN/MSAN" - : "non-hardened") /*, inst_ratio*/); + : "non-hardened")); return false; } diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile index 9ee6fc8b..6e0a27af 100644 --- a/llvm_mode/Makefile +++ b/llvm_mode/Makefile @@ -29,14 +29,14 @@ ifeq "$(shell uname)" "OpenBSD" LLVM_CONFIG ?= $(BIN_PATH)/llvm-config HAS_OPT = $(shell test -x $(BIN_PATH)/opt && echo 0 || echo 1) ifeq "$(HAS_OPT)" "1" - $(error llvm_mode needs a complete llvm installation (versions 3.8.0 up to 10) -> e.g. "pkg_add llvm-7.0.1p9") + $(error llvm_mode needs a complete llvm installation (versions 3.8.0 up to 11) -> e.g. "pkg_add llvm-7.0.1p9") endif else LLVM_CONFIG ?= llvm-config endif LLVMVER = $(shell $(LLVM_CONFIG) --version 2>/dev/null ) -LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[1-9]' && echo 1 || echo 0 ) +LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[2-9]' && echo 1 || echo 0 ) LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 ) LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//') LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null) @@ -48,7 +48,7 @@ ifeq "$(LLVMVER)" "" endif ifeq "$(LLVM_UNSUPPORTED)" "1" - $(warning llvm_mode only supports llvm versions 3.8.0 up to 10) + $(warning llvm_mode only supports llvm versions 3.8.0 up to 11) endif ifeq "$(LLVM_MAJOR)" "9" @@ -201,7 +201,7 @@ endif ln -sf afl-clang-fast ../afl-clang-fast++ ../libLLVMInsTrim.so: LLVMInsTrim.so.cc MarkNodes.cc | test_deps - $(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL) + -$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL) ../afl-llvm-pass.so: afl-llvm-pass.so.cc | test_deps $(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL) diff --git a/llvm_mode/MarkNodes.cc b/llvm_mode/MarkNodes.cc index 2aeeda8d..7b22bac0 100644 --- a/llvm_mode/MarkNodes.cc +++ b/llvm_mode/MarkNodes.cc @@ -3,11 +3,22 @@ #include <queue> #include <set> #include <vector> + +#include "llvm/Config/llvm-config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 +typedef long double max_align_t; +#endif + #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/BasicBlock.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) #include "llvm/IR/CFG.h" +#else +#include "llvm/Support/CFG.h" +#endif #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -65,16 +76,11 @@ void buildCFG(Function *F) { } - // uint32_t FakeID = 0; for (auto S = F->begin(), E = F->end(); S != E; ++S) { BasicBlock *BB = &*S; uint32_t MyID = LMap[BB]; - // if (succ_begin(BB) == succ_end(BB)) { - // Succs[MyID].push_back(FakeID); - // Marked.insert(MyID); - //} for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { Succs[MyID].push_back(LMap[*I]); @@ -113,7 +119,7 @@ void DFStree(size_t now_id) { } -void turnCFGintoDAG(Function *F) { +void turnCFGintoDAG() { tSuccs = Succs; tag.resize(Blocks.size()); @@ -176,7 +182,7 @@ void DFS(uint32_t now) { } -void DominatorTree(Function *F) { +void DominatorTree() { if (Blocks.empty()) return; uint32_t s = start_point; @@ -390,7 +396,7 @@ void MarkSubGraph(uint32_t ss, uint32_t tt) { } -void MarkVertice(Function *F) { +void MarkVertice() { uint32_t s = start_point; @@ -411,8 +417,6 @@ void MarkVertice(Function *F) { timeStamp = 0; uint32_t t = 0; - // MarkSubGraph(s, t); - // return; while (s != t) { @@ -432,9 +436,9 @@ std::pair<std::vector<BasicBlock *>, std::vector<BasicBlock *> > markNodes( reset(); labelEachBlock(F); buildCFG(F); - turnCFGintoDAG(F); - DominatorTree::DominatorTree(F); - MarkVertice(F); + turnCFGintoDAG(); + DominatorTree::DominatorTree(); + MarkVertice(); std::vector<BasicBlock *> Result, ResultAbove; for (uint32_t x : Markabove) { diff --git a/llvm_mode/README.md b/llvm_mode/README.md index 5afa4dfd..54788aba 100644 --- a/llvm_mode/README.md +++ b/llvm_mode/README.md @@ -5,7 +5,7 @@ ## 1) Introduction -! llvm_mode works with llvm versions 3.8.0 up to 10 ! +! llvm_mode works with llvm versions 3.8.0 up to 11 ! The code in this directory allows you to instrument programs for AFL using true compiler-level instrumentation, instead of the more crude @@ -198,24 +198,23 @@ PS. Because there are task switches still involved, the mode isn't as fast as faster than the normal fork() model, and compared to in-process fuzzing, should be a lot more robust. -## 8) Bonus feature #3: new 'trace-pc-guard' mode +## 8) Bonus feature #3: 'trace-pc-guard' mode -Recent versions of LLVM are shipping with a built-in execution tracing feature +LLVM is shipping with a built-in execution tracing feature that provides AFL with the necessary tracing data without the need to post-process the assembly or install any compiler plugins. See: http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-pcs-with-guards -If you have a sufficiently recent compiler and want to give it a try, build -afl-clang-fast this way: +If you have not an outdated compiler and want to give it a try, build +targets this way: ``` - AFL_TRACE_PC=1 make clean all + libtarget-1.0 $ AFL_LLVM_USE_TRACE_PC=1 make ``` -Note that this mode is currently about 20% slower than "vanilla" afl-clang-fast, +Note that this mode is about 20% slower than "vanilla" afl-clang-fast, and about 5-10% slower than afl-clang. This is likely because the -instrumentation is not inlined, and instead involves a function call. On systems -that support it, compiling your target with -flto should help. - - +instrumentation is not inlined, and instead involves a function call. +On systems that support it, compiling your target with -flto can help +a bit. diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c index ddaa2388..4fbaf9ec 100644 --- a/llvm_mode/afl-clang-fast.c +++ b/llvm_mode/afl-clang-fast.c @@ -212,13 +212,24 @@ static void edit_params(u32 argc, char** argv) { // "-fsanitize-coverage=trace-cmp,trace-div,trace-gep"; // cc_params[cc_par_cnt++] = "-sanitizer-coverage-block-threshold=0"; #else - cc_params[cc_par_cnt++] = "-Xclang"; - cc_params[cc_par_cnt++] = "-load"; - cc_params[cc_par_cnt++] = "-Xclang"; - if (getenv("AFL_LLVM_INSTRIM") != NULL || getenv("INSTRIM_LIB") != NULL) - cc_params[cc_par_cnt++] = alloc_printf("%s/libLLVMInsTrim.so", obj_path); - else - cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path); + if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") || + getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) { + + cc_params[cc_par_cnt++] = + "-fsanitize-coverage=trace-pc-guard"; // edge coverage by default + + } else { + + cc_params[cc_par_cnt++] = "-Xclang"; + cc_params[cc_par_cnt++] = "-load"; + cc_params[cc_par_cnt++] = "-Xclang"; + if (getenv("AFL_LLVM_INSTRIM") != NULL || getenv("INSTRIM_LIB") != NULL) + cc_params[cc_par_cnt++] = alloc_printf("%s/libLLVMInsTrim.so", obj_path); + else + cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path); + + } + #endif /* ^USE_TRACE_PC */ } @@ -292,8 +303,10 @@ static void edit_params(u32 argc, char** argv) { #ifdef USE_TRACE_PC - if (getenv("AFL_INST_RATIO")) - FATAL("AFL_INST_RATIO not available at compile time with 'trace-pc'."); + if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") || + getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) + if (getenv("AFL_INST_RATIO")) + FATAL("AFL_INST_RATIO not available at compile time with 'trace-pc'."); #endif /* USE_TRACE_PC */ @@ -474,6 +487,8 @@ int main(int argc, char** argv) { #ifdef USE_TRACE_PC SAYF(cCYA "afl-clang-fast" VERSION cRST " [tpcg] by <lszekeres@google.com>\n"); +#warning \ + "You do not need to specifically compile with USE_TRACE_PC anymore, setting the environment variable AFL_LLVM_USE_TRACE_PC is enough." #else SAYF(cCYA "afl-clang-fast" VERSION cRST " by <lszekeres@google.com>\n"); #endif /* ^USE_TRACE_PC */ diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc index 15cc6127..2cd23adf 100644 --- a/llvm_mode/afl-llvm-pass.so.cc +++ b/llvm_mode/afl-llvm-pass.so.cc @@ -37,14 +37,26 @@ #include <fstream> #include <sys/time.h> -#include "llvm/IR/DebugInfo.h" -#include "llvm/IR/BasicBlock.h" +#include "llvm/Config/llvm-config.h" +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5 +typedef long double max_align_t; +#endif + #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" + +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/CFG.h" +#else +#include "llvm/DebugInfo.h" +#include "llvm/Support/CFG.h" +#endif using namespace llvm; @@ -78,7 +90,7 @@ class AFLCoverage : public ModulePass { // ripped from aflgo static bool isBlacklisted(const Function *F) { - static const SmallVector<std::string, 4> Blacklist = { + static const char *Blacklist[] = { "asan.", "llvm.", @@ -197,6 +209,8 @@ bool AFLCoverage::runOnModule(Module &M) { * For now, just instrument the block if we are not able * to determine our location. */ DebugLoc Loc = IP->getDebugLoc(); +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) if (Loc) { DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode()); @@ -249,6 +263,47 @@ bool AFLCoverage::runOnModule(Module &M) { } +#else + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + unsigned int instLine = cDILoc.getLineNumber(); + StringRef instFilename = cDILoc.getFilename(); + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list<std::string>::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif + /* Either we couldn't figure out our location or the location is * not whitelisted, so we skip instrumentation. */ if (!instrumentBlock) continue; @@ -273,13 +328,19 @@ bool AFLCoverage::runOnModule(Module &M) { // result: a little more speed and less map pollution int more_than_one = -1; // fprintf(stderr, "BB %u: ", cur_loc); - for (BasicBlock *Pred : predecessors(&BB)) { + for (pred_iterator PI = pred_begin(&BB), E = pred_end(&BB); PI != E; + ++PI) { + + BasicBlock *Pred = *PI; int count = 0; if (more_than_one == -1) more_than_one = 0; // fprintf(stderr, " %p=>", Pred); - for (BasicBlock *Succ : successors(Pred)) { + for (succ_iterator SI = succ_begin(Pred), E = succ_end(Pred); SI != E; + ++SI) { + + BasicBlock *Succ = *SI; // if (count > 0) // fprintf(stderr, "|"); diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc index 0ccce875..e1332a9d 100644 --- a/llvm_mode/compare-transform-pass.so.cc +++ b/llvm_mode/compare-transform-pass.so.cc @@ -18,6 +18,12 @@ #include <stdlib.h> #include <unistd.h> +#include <list> +#include <string> +#include <fstream> +#include <sys/time.h> +#include "llvm/Config/llvm-config.h" + #include "llvm/ADT/Statistic.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" @@ -26,10 +32,19 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) +#include "llvm/IR/Verifier.h" +#include "llvm/IR/DebugInfo.h" +#else +#include "llvm/Analysis/Verifier.h" +#include "llvm/DebugInfo.h" +#define nullptr 0 +#endif + #include <set> using namespace llvm; @@ -42,6 +57,23 @@ class CompareTransform : public ModulePass { static char ID; CompareTransform() : ModulePass(ID) { + char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST"); + if (instWhiteListFilename) { + + std::string line; + std::ifstream fileStream; + fileStream.open(instWhiteListFilename); + if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST"); + getline(fileStream, line); + while (fileStream) { + + myWhitelist.push_back(line); + getline(fileStream, line); + + } + + } + } bool runOnModule(Module &M) override; @@ -57,6 +89,9 @@ class CompareTransform : public ModulePass { } + protected: + std::list<std::string> myWhitelist; + private: bool transformCmps(Module &M, const bool processStrcmp, const bool processMemcmp, const bool processStrncmp, @@ -89,7 +124,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, c = M.getOrInsertFunction("tolower", Int32Ty, Int32Ty #if LLVM_VERSION_MAJOR < 5 , - nullptr + NULL #endif ); #if LLVM_VERSION_MAJOR < 9 @@ -104,6 +139,117 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp, for (auto &BB : F) { + if (!myWhitelist.empty()) { + + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + + bool instrumentBlock = false; + + /* Get the current location using debug information. + * For now, just instrument the block if we are not able + * to determine our location. */ + DebugLoc Loc = IP->getDebugLoc(); +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) + if (Loc) { + + DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode()); + + unsigned int instLine = cDILoc->getLine(); + StringRef instFilename = cDILoc->getFilename(); + + if (instFilename.str().empty()) { + + /* If the original location is empty, try using the inlined location + */ + DILocation *oDILoc = cDILoc->getInlinedAt(); + if (oDILoc) { + + instFilename = oDILoc->getFilename(); + instLine = oDILoc->getLine(); + + } + + } + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list<std::string>::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#else + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + unsigned int instLine = cDILoc.getLineNumber(); + StringRef instFilename = cDILoc.getFilename(); + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list<std::string>::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif + + /* Either we couldn't figure out our location or the location is + * not whitelisted, so we skip instrumentation. */ + if (!instrumentBlock) continue; + + } + for (auto &IN : BB) { CallInst *callInst = nullptr; diff --git a/llvm_mode/split-compares-pass.so.cc b/llvm_mode/split-compares-pass.so.cc index eeac4a55..e16993d6 100644 --- a/llvm_mode/split-compares-pass.so.cc +++ b/llvm_mode/split-compares-pass.so.cc @@ -15,15 +15,34 @@ * limitations under the License. */ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <list> +#include <string> +#include <fstream> +#include <sys/time.h> + +#include "llvm/Config/llvm-config.h" + #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/Verifier.h" #include "llvm/IR/Module.h" #include "llvm/IR/IRBuilder.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) +#include "llvm/IR/Verifier.h" +#include "llvm/IR/DebugInfo.h" +#else +#include "llvm/Analysis/Verifier.h" +#include "llvm/DebugInfo.h" +#define nullptr 0 +#endif using namespace llvm; @@ -35,6 +54,41 @@ class SplitComparesTransform : public ModulePass { static char ID; SplitComparesTransform() : ModulePass(ID) { + char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST"); + if (instWhiteListFilename) { + + std::string line; + std::ifstream fileStream; + fileStream.open(instWhiteListFilename); + if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST"); + getline(fileStream, line); + while (fileStream) { + + myWhitelist.push_back(line); + getline(fileStream, line); + + } + + } + + } + + static bool isBlacklisted(const Function *F) { + + static const char *Blacklist[] = { + + "asan.", "llvm.", "sancov.", "__ubsan_handle_", "ign." + + }; + + for (auto const &BlacklistFunc : Blacklist) { + + if (F->getName().startswith(BlacklistFunc)) { return true; } + + } + + return false; + } bool runOnModule(Module &M) override; @@ -49,6 +103,9 @@ class SplitComparesTransform : public ModulePass { } + protected: + std::list<std::string> myWhitelist; + private: int enableFPSplit; @@ -77,8 +134,121 @@ bool SplitComparesTransform::simplifyCompares(Module &M) { * all integer comparisons with >= and <= predicates to the icomps vector */ for (auto &F : M) { + if (isBlacklisted(&F)) continue; + for (auto &BB : F) { + if (!myWhitelist.empty()) { + + bool instrumentBlock = false; + + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + + /* Get the current location using debug information. + * For now, just instrument the block if we are not able + * to determine our location. */ + DebugLoc Loc = IP->getDebugLoc(); +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) + if (Loc) { + + DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode()); + + unsigned int instLine = cDILoc->getLine(); + StringRef instFilename = cDILoc->getFilename(); + + if (instFilename.str().empty()) { + + /* If the original location is empty, try using the inlined location + */ + DILocation *oDILoc = cDILoc->getInlinedAt(); + if (oDILoc) { + + instFilename = oDILoc->getFilename(); + instLine = oDILoc->getLine(); + + } + + } + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list<std::string>::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#else + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + unsigned int instLine = cDILoc.getLineNumber(); + StringRef instFilename = cDILoc.getFilename(); + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list<std::string>::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif + + /* Either we couldn't figure out our location or the location is + * not whitelisted, so we skip instrumentation. */ + if (!instrumentBlock) continue; + + } + for (auto &IN : BB) { CmpInst *selectcmpInst = nullptr; @@ -165,7 +335,8 @@ bool SplitComparesTransform::simplifyCompares(Module &M) { * block bb it is now at the position where the old IcmpInst was */ Instruction *icmp_np; icmp_np = CmpInst::Create(Instruction::ICmp, new_pred, op0, op1); - bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_np); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_np); /* create a new basic block which holds the new EQ icmp */ Instruction *icmp_eq; @@ -230,7 +401,8 @@ bool SplitComparesTransform::simplifyCompares(Module &M) { * block bb it is now at the position where the old IcmpInst was */ Instruction *fcmp_np; fcmp_np = CmpInst::Create(Instruction::FCmp, new_pred, op0, op1); - bb->getInstList().insert(bb->getTerminator()->getIterator(), fcmp_np); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + fcmp_np); /* create a new basic block which holds the new EQ fcmp */ Instruction *fcmp_eq; @@ -351,20 +523,21 @@ bool SplitComparesTransform::simplifyIntSignedness(Module &M) { s_op0 = BinaryOperator::Create(Instruction::LShr, op0, ConstantInt::get(IntType, bitw - 1)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0); t_op0 = new TruncInst(s_op0, Int1Ty); - bb->getInstList().insert(bb->getTerminator()->getIterator(), t_op0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op0); s_op1 = BinaryOperator::Create(Instruction::LShr, op1, ConstantInt::get(IntType, bitw - 1)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1); t_op1 = new TruncInst(s_op1, Int1Ty); - bb->getInstList().insert(bb->getTerminator()->getIterator(), t_op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_op1); /* compare of the sign bits */ icmp_sign_bit = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_op0, t_op1); - bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_sign_bit); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_sign_bit); /* create a new basic block which is executed if the signedness bit is * different */ @@ -439,6 +612,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { LLVMContext &C = M.getContext(); +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) const DataLayout &dl = M.getDataLayout(); /* define unions with floating point and (sign, exponent, mantissa) triples @@ -453,6 +628,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { } +#endif + std::vector<CmpInst *> fcomps; /* get all EQ, NE, GT, and LT fcmps. if the other two @@ -551,11 +728,11 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction *b_op0, *b_op1; b_op0 = CastInst::Create(Instruction::BitCast, op0, IntegerType::get(C, op_size)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), b_op0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), b_op0); b_op1 = CastInst::Create(Instruction::BitCast, op1, IntegerType::get(C, op_size)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), b_op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), b_op1); /* isolate signs of value of floating point type */ @@ -566,21 +743,22 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { s_s0 = BinaryOperator::Create(Instruction::LShr, b_op0, ConstantInt::get(b_op0->getType(), op_size - 1)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_s0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_s0); t_s0 = new TruncInst(s_s0, Int1Ty); - bb->getInstList().insert(bb->getTerminator()->getIterator(), t_s0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_s0); s_s1 = BinaryOperator::Create(Instruction::LShr, b_op1, ConstantInt::get(b_op1->getType(), op_size - 1)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_s1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_s1); t_s1 = new TruncInst(s_s1, Int1Ty); - bb->getInstList().insert(bb->getTerminator()->getIterator(), t_s1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), t_s1); /* compare of the sign bits */ icmp_sign_bit = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, t_s0, t_s1); - bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_sign_bit); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_sign_bit); /* create a new basic block which is executed if the signedness bits are * equal */ @@ -612,16 +790,16 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction::LShr, b_op1, ConstantInt::get(b_op1->getType(), shiftR_exponent)); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), s_e0); + BasicBlock::iterator(signequal_bb->getTerminator()), s_e0); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), s_e1); + BasicBlock::iterator(signequal_bb->getTerminator()), s_e1); t_e0 = new TruncInst(s_e0, IntExponentTy); t_e1 = new TruncInst(s_e1, IntExponentTy); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), t_e0); + BasicBlock::iterator(signequal_bb->getTerminator()), t_e0); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), t_e1); + BasicBlock::iterator(signequal_bb->getTerminator()), t_e1); if (sizeInBits - precision < exTySizeBytes * 8) { @@ -632,9 +810,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { Instruction::And, t_e1, ConstantInt::get(t_e1->getType(), mask_exponent)); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), m_e0); + BasicBlock::iterator(signequal_bb->getTerminator()), m_e0); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), m_e1); + BasicBlock::iterator(signequal_bb->getTerminator()), m_e1); } else { @@ -662,7 +840,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_exponent = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, m_e0, m_e1); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), icmp_exponent); + BasicBlock::iterator(signequal_bb->getTerminator()), icmp_exponent); icmp_exponent_result = BinaryOperator::Create(Instruction::Xor, icmp_exponent, t_s0); break; @@ -671,7 +849,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_exponent = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, m_e0, m_e1); signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), icmp_exponent); + BasicBlock::iterator(signequal_bb->getTerminator()), icmp_exponent); icmp_exponent_result = BinaryOperator::Create(Instruction::Xor, icmp_exponent, t_s0); break; @@ -680,7 +858,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { } signequal_bb->getInstList().insert( - signequal_bb->getTerminator()->getIterator(), icmp_exponent_result); + BasicBlock::iterator(signequal_bb->getTerminator()), + icmp_exponent_result); { @@ -704,19 +883,19 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { m_f1 = BinaryOperator::Create( Instruction::And, b_op1, ConstantInt::get(b_op1->getType(), mask_fraction)); - middle_bb->getInstList().insert(middle_bb->getTerminator()->getIterator(), - m_f0); - middle_bb->getInstList().insert(middle_bb->getTerminator()->getIterator(), - m_f1); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), m_f0); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), m_f1); if (needTrunc) { t_f0 = new TruncInst(m_f0, IntFractionTy); t_f1 = new TruncInst(m_f1, IntFractionTy); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), t_f0); + BasicBlock::iterator(middle_bb->getTerminator()), t_f0); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), t_f1); + BasicBlock::iterator(middle_bb->getTerminator()), t_f1); } else { @@ -732,9 +911,9 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { t_f0 = new TruncInst(b_op0, IntFractionTy); t_f1 = new TruncInst(b_op1, IntFractionTy); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), t_f0); + BasicBlock::iterator(middle_bb->getTerminator()), t_f0); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), t_f1); + BasicBlock::iterator(middle_bb->getTerminator()), t_f1); } else { @@ -764,7 +943,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_fraction = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_UGT, t_f0, t_f1); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), icmp_fraction); + BasicBlock::iterator(middle_bb->getTerminator()), icmp_fraction); icmp_fraction_result = BinaryOperator::Create(Instruction::Xor, icmp_fraction, t_s0); break; @@ -773,7 +952,7 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { icmp_fraction = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, t_f0, t_f1); middle_bb->getInstList().insert( - middle_bb->getTerminator()->getIterator(), icmp_fraction); + BasicBlock::iterator(middle_bb->getTerminator()), icmp_fraction); icmp_fraction_result = BinaryOperator::Create(Instruction::Xor, icmp_fraction, t_s0); break; @@ -781,8 +960,8 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) { } - middle_bb->getInstList().insert(middle_bb->getTerminator()->getIterator(), - icmp_fraction_result); + middle_bb->getInstList().insert( + BasicBlock::iterator(middle_bb->getTerminator()), icmp_fraction_result); PHINode *PN = PHINode::Create(Int1Ty, 3, ""); @@ -919,18 +1098,21 @@ size_t SplitComparesTransform::splitIntCompares(Module &M, unsigned bitw) { s_op0 = BinaryOperator::Create(Instruction::LShr, op0, ConstantInt::get(OldIntType, bitw / 2)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op0); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op0); op0_high = new TruncInst(s_op0, NewIntType); - bb->getInstList().insert(bb->getTerminator()->getIterator(), op0_high); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + op0_high); s_op1 = BinaryOperator::Create(Instruction::LShr, op1, ConstantInt::get(OldIntType, bitw / 2)); - bb->getInstList().insert(bb->getTerminator()->getIterator(), s_op1); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), s_op1); op1_high = new TruncInst(s_op1, NewIntType); - bb->getInstList().insert(bb->getTerminator()->getIterator(), op1_high); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + op1_high); icmp_high = CmpInst::Create(Instruction::ICmp, pred, op0_high, op1_high); - bb->getInstList().insert(bb->getTerminator()->getIterator(), icmp_high); + bb->getInstList().insert(BasicBlock::iterator(bb->getTerminator()), + icmp_high); /* now we have to destinguish between == != and > < */ if (pred == CmpInst::ICMP_EQ || pred == CmpInst::ICMP_NE) { @@ -1076,13 +1258,19 @@ bool SplitComparesTransform::runOnModule(Module &M) { << "bit: " << splitIntCompares(M, bitw) << " splitted\n"; bitw >>= 1; +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) [[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */ +#endif case 32: errs() << "Split-integer-compare-pass " << bitw << "bit: " << splitIntCompares(M, bitw) << " splitted\n"; bitw >>= 1; +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7) [[clang::fallthrough]]; /*FALLTHRU*/ /* FALLTHROUGH */ +#endif case 16: errs() << "Split-integer-compare-pass " << bitw << "bit: " << splitIntCompares(M, bitw) << " splitted\n"; diff --git a/llvm_mode/split-switches-pass.so.cc b/llvm_mode/split-switches-pass.so.cc index 2743a71a..9101dc26 100644 --- a/llvm_mode/split-switches-pass.so.cc +++ b/llvm_mode/split-switches-pass.so.cc @@ -18,6 +18,13 @@ #include <stdlib.h> #include <unistd.h> +#include <list> +#include <string> +#include <fstream> +#include <sys/time.h> + +#include "llvm/Config/llvm-config.h" + #include "llvm/ADT/Statistic.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/LegacyPassManager.h" @@ -26,10 +33,20 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#if LLVM_VERSION_MAJOR > 3 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4) +#include "llvm/IR/Verifier.h" +#include "llvm/IR/DebugInfo.h" +#else +#include "llvm/Analysis/Verifier.h" +#include "llvm/DebugInfo.h" +#define nullptr 0 +#endif + #include <set> using namespace llvm; @@ -42,6 +59,41 @@ class SplitSwitchesTransform : public ModulePass { static char ID; SplitSwitchesTransform() : ModulePass(ID) { + char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST"); + if (instWhiteListFilename) { + + std::string line; + std::ifstream fileStream; + fileStream.open(instWhiteListFilename); + if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST"); + getline(fileStream, line); + while (fileStream) { + + myWhitelist.push_back(line); + getline(fileStream, line); + + } + + } + + } + + static bool isBlacklisted(const Function *F) { + + static const char *Blacklist[] = { + + "asan.", "llvm.", "sancov.", "__ubsan_handle_", "ign." + + }; + + for (auto const &BlacklistFunc : Blacklist) { + + if (F->getName().startswith(BlacklistFunc)) { return true; } + + } + + return false; + } bool runOnModule(Module &M) override; @@ -71,6 +123,9 @@ class SplitSwitchesTransform : public ModulePass { typedef std::vector<CaseExpr> CaseVector; + protected: + std::list<std::string> myWhitelist; + private: bool splitSwitches(Module &M); bool transformCmps(Module &M, const bool processStrcmp, @@ -96,7 +151,7 @@ BasicBlock *SplitSwitchesTransform::switchConvert( IntegerType * ByteType = IntegerType::get(OrigBlock->getContext(), 8); unsigned BytesInValue = bytesChecked.size(); std::vector<uint8_t> setSizes; - std::vector<std::set<uint8_t>> byteSets(BytesInValue, std::set<uint8_t>()); + std::vector<std::set<uint8_t> > byteSets(BytesInValue, std::set<uint8_t>()); assert(ValTypeBitWidth >= 8 && ValTypeBitWidth <= 64); @@ -169,8 +224,25 @@ BasicBlock *SplitSwitchesTransform::switchConvert( NewNode->getInstList().push_back(Comp); bytesChecked[smallestIndex] = true; - if (std::all_of(bytesChecked.begin(), bytesChecked.end(), - [](bool b) { return b; })) { + bool allBytesAreChecked = true; + + for (std::vector<bool>::iterator BCI = bytesChecked.begin(), + E = bytesChecked.end(); + BCI != E; ++BCI) { + + if (!*BCI) { + + allBytesAreChecked = false; + break; + + } + + } + + // if (std::all_of(bytesChecked.begin(), bytesChecked.end(), + // [](bool b) { return b; })) { + + if (allBytesAreChecked) { assert(Cases.size() == 1); BranchInst::Create(Cases[0].BB, NewDefault, Comp, NewNode); @@ -262,16 +334,132 @@ BasicBlock *SplitSwitchesTransform::switchConvert( bool SplitSwitchesTransform::splitSwitches(Module &M) { +#if (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 7) + LLVMContext &C = M.getContext(); +#endif + std::vector<SwitchInst *> switches; /* iterate over all functions, bbs and instruction and add * all switches to switches vector for later processing */ for (auto &F : M) { + if (isBlacklisted(&F)) continue; + for (auto &BB : F) { SwitchInst *switchInst = nullptr; + if (!myWhitelist.empty()) { + + bool instrumentBlock = false; + BasicBlock::iterator IP = BB.getFirstInsertionPt(); + + /* Get the current location using debug information. + * For now, just instrument the block if we are not able + * to determine our location. */ + DebugLoc Loc = IP->getDebugLoc(); +#if LLVM_VERSION_MAJOR >= 4 || \ + (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7) + if (Loc) { + + DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode()); + + unsigned int instLine = cDILoc->getLine(); + StringRef instFilename = cDILoc->getFilename(); + + if (instFilename.str().empty()) { + + /* If the original location is empty, try using the inlined location + */ + DILocation *oDILoc = cDILoc->getInlinedAt(); + if (oDILoc) { + + instFilename = oDILoc->getFilename(); + instLine = oDILoc->getLine(); + + } + + } + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list<std::string>::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#else + if (!Loc.isUnknown()) { + + DILocation cDILoc(Loc.getAsMDNode(C)); + + unsigned int instLine = cDILoc.getLineNumber(); + StringRef instFilename = cDILoc.getFilename(); + + (void)instLine; + + /* Continue only if we know where we actually are */ + if (!instFilename.str().empty()) { + + for (std::list<std::string>::iterator it = myWhitelist.begin(); + it != myWhitelist.end(); ++it) { + + /* We don't check for filename equality here because + * filenames might actually be full paths. Instead we + * check that the actual filename ends in the filename + * specified in the list. */ + if (instFilename.str().length() >= it->length()) { + + if (instFilename.str().compare( + instFilename.str().length() - it->length(), + it->length(), *it) == 0) { + + instrumentBlock = true; + break; + + } + + } + + } + + } + + } + +#endif + + /* Either we couldn't figure out our location or the location is + * not whitelisted, so we skip instrumentation. */ + if (!instrumentBlock) continue; + + } + if ((switchInst = dyn_cast<SwitchInst>(BB.getTerminator()))) { if (switchInst->getNumCases() < 1) continue; @@ -313,8 +501,7 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) { * if the default block is set as an unreachable we avoid creating one * because will never be a valid target.*/ BasicBlock *NewDefault = nullptr; - NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); - NewDefault->insertInto(F, Default); + NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault", F, Default); BranchInst::Create(Default, NewDefault); /* Prepare cases vector. */ diff --git a/src/afl-analyze.c b/src/afl-analyze.c index 3d4e636e..3de8c037 100644 --- a/src/afl-analyze.c +++ b/src/afl-analyze.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-as.c b/src/afl-as.c index 77ac2f97..8d689385 100644 --- a/src/afl-as.c +++ b/src/afl-as.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-common.c b/src/afl-common.c index 8c2f2b9a..6cb97cdf 100644 --- a/src/afl-common.c +++ b/src/afl-common.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c index de50c73c..77e1d648 100644 --- a/src/afl-forkserver.c +++ b/src/afl-forkserver.c @@ -6,7 +6,7 @@ Forkserver design by Jann Horn <jannhorn@googlemail.com> - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-fuzz-bitmap.c b/src/afl-fuzz-bitmap.c index 3f8256b4..3ffda284 100644 --- a/src/afl-fuzz-bitmap.c +++ b/src/afl-fuzz-bitmap.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-fuzz-extras.c b/src/afl-fuzz-extras.c index fcc7749d..6c6dc28c 100644 --- a/src/afl-fuzz-extras.c +++ b/src/afl-fuzz-extras.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-fuzz-globals.c b/src/afl-fuzz-globals.c index 863ee9ad..e92558d3 100644 --- a/src/afl-fuzz-globals.c +++ b/src/afl-fuzz-globals.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c index 5fe3689e..6efa6227 100644 --- a/src/afl-fuzz-init.c +++ b/src/afl-fuzz-init.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> @@ -184,11 +184,21 @@ void bind_to_free_cpu(void) { "For this platform we do not have free CPU binding code yet. If possible, please supply a PR to https://github.com/vanhauser-thc/AFLplusplus" #endif - for (i = 0; i < cpu_core_count; ++i) - if (!cpu_used[i]) break; + size_t cpu_start = 0; + try: +#ifndef __ANDROID__ + for (i = cpu_start; i < cpu_core_count; i++) + if (!cpu_used[i]) break; if (i == cpu_core_count) { +#else + for (i = cpu_core_count - cpu_start - 1; i > -1; i--) + if (!cpu_used[i]) break; + if (i == -1) { + +#endif + SAYF("\n" cLRD "[-] " cRST "Uh-oh, looks like all %d CPU cores on your system are allocated to\n" " other instances of afl-fuzz (or similar CPU-locked tasks). " @@ -197,12 +207,11 @@ void bind_to_free_cpu(void) { "you are\n" " absolutely sure, you can set AFL_NO_AFFINITY and try again.\n", cpu_core_count); - FATAL("No more free CPU cores"); } - OKF("Found a free CPU core, binding to #%u.", i); + OKF("Found a free CPU core, try binding to #%u.", i); cpu_aff = i; @@ -212,22 +221,31 @@ void bind_to_free_cpu(void) { #elif defined(__NetBSD__) c = cpuset_create(); if (c == NULL) PFATAL("cpuset_create failed"); - cpuset_set(i, c); #endif #if defined(__linux__) - if (sched_setaffinity(0, sizeof(c), &c)) PFATAL("sched_setaffinity failed"); + if (sched_setaffinity(0, sizeof(c), &c)) { + + if (cpu_start == cpu_core_count) + PFATAL("sched_setaffinity failed for CPU %d, exit", i); + WARNF("sched_setaffinity failed to CPU %d, trying next CPU", i); + cpu_start++; + goto try + ; + + } + #elif defined(__FreeBSD__) || defined(__DragonFly__) if (pthread_setaffinity_np(pthread_self(), sizeof(c), &c)) PFATAL("pthread_setaffinity failed"); #elif defined(__NetBSD__) - if (pthread_setaffinity_np(pthread_self(), cpuset_size(c), c)) - PFATAL("pthread_setaffinity failed"); +if (pthread_setaffinity_np(pthread_self(), cpuset_size(c), c)) + PFATAL("pthread_setaffinity failed"); - cpuset_destroy(c); +cpuset_destroy(c); #else - // this will need something for other platforms +// this will need something for other platforms #endif } @@ -1940,17 +1958,17 @@ void check_binary(u8* fname) { } - if ((qemu_mode || unicorn_mode) && + if ((qemu_mode) && memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) { SAYF("\n" cLRD "[-] " cRST "This program appears to be instrumented with afl-gcc, but is being " "run in\n" - " QEMU or Unicorn mode (-Q or -U). This is probably not what you " + " QEMU mode (-Q). This is probably not what you " "want -\n" " this setup will be slow and offer no practical benefits.\n"); - FATAL("Instrumentation found in -Q or -U mode"); + FATAL("Instrumentation found in -Q mode"); } diff --git a/src/afl-fuzz-misc.c b/src/afl-fuzz-misc.c index b8f376be..0da0cb0a 100644 --- a/src/afl-fuzz-misc.c +++ b/src/afl-fuzz-misc.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c index 4c3a5b95..199b3ea8 100644 --- a/src/afl-fuzz-one.c +++ b/src/afl-fuzz-one.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-fuzz-python.c b/src/afl-fuzz-python.c index f1cdecde..f06c8e25 100644 --- a/src/afl-fuzz-python.c +++ b/src/afl-fuzz-python.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c index 1b51e3aa..0880de75 100644 --- a/src/afl-fuzz-queue.c +++ b/src/afl-fuzz-queue.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c index 78708402..79573932 100644 --- a/src/afl-fuzz-run.c +++ b/src/afl-fuzz-run.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-fuzz-stats.c b/src/afl-fuzz-stats.c index 14ffd41a..d00c6750 100644 --- a/src/afl-fuzz-stats.c +++ b/src/afl-fuzz-stats.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c index 50356315..74bc0ee2 100644 --- a/src/afl-fuzz.c +++ b/src/afl-fuzz.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-gcc.c b/src/afl-gcc.c index 301e2034..e46fe5cd 100644 --- a/src/afl-gcc.c +++ b/src/afl-gcc.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-gotcpu.c b/src/afl-gotcpu.c index 9a56159c..5be30238 100644 --- a/src/afl-gotcpu.c +++ b/src/afl-gotcpu.c @@ -4,7 +4,7 @@ Originally written by Michal Zalewski - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-sharedmem.c b/src/afl-sharedmem.c index f8ed4e51..3f552881 100644 --- a/src/afl-sharedmem.c +++ b/src/afl-sharedmem.c @@ -6,7 +6,7 @@ Forkserver design by Jann Horn <jannhorn@googlemail.com> - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-showmap.c b/src/afl-showmap.c index 8c899c9d..b9da3208 100644 --- a/src/afl-showmap.c +++ b/src/afl-showmap.c @@ -6,7 +6,7 @@ Forkserver design by Jann Horn <jannhorn@googlemail.com> - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/src/afl-tmin.c b/src/afl-tmin.c index 3e33b72f..7ce0ccaa 100644 --- a/src/afl-tmin.c +++ b/src/afl-tmin.c @@ -6,7 +6,7 @@ Forkserver design by Jann Horn <jannhorn@googlemail.com> - Now maintained by by Marc Heuse <mh@mh-sec.de>, + Now maintained by Marc Heuse <mh@mh-sec.de>, Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com> diff --git a/test/test-unsigaction.c b/test/test-unsigaction.c new file mode 100644 index 00000000..1a5e4b26 --- /dev/null +++ b/test/test-unsigaction.c @@ -0,0 +1,25 @@ +#include <signal.h> /* sigemptyset(), sigaction(), kill(), SIGUSR1 */ +#include <stdlib.h> /* exit() */ +#include <unistd.h> /* getpid() */ +#include <errno.h> /* errno */ +#include <stdio.h> /* fprintf() */ + +static void mysig_handler(int sig) +{ + exit(2); +} + +int main() +{ + /* setup sig handler */ + struct sigaction sa; + sa.sa_handler = mysig_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + if (sigaction(SIGCHLD, &sa, NULL)) { + fprintf(stderr, "could not set signal handler %d, aborted\n", errno); + exit(1); + } + kill(getpid(), SIGCHLD); + return 0; +} diff --git a/test/test.sh b/test/test.sh index 8f40773c..9676d22d 100755 --- a/test/test.sh +++ b/test/test.sh @@ -150,13 +150,13 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { } echo 000000000000000000000000 > in/in2 mkdir -p in2 - ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null 2>&1 + ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null CNT=`ls in2/ | wc -l` case "$CNT" in -1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized testcase numbers" ;; -*) $ECHO "$RED[!] afl-cmin did not correctly minimize testcase numbers" - CODE=1 - ;; + *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;; + *) $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases" + CODE=1 + ;; esac ../afl-tmin -i in/in2 -o in2/in2 -- ./test-instr.plain > /dev/null 2>&1 SIZE=`ls -l in2/in2 2> /dev/null | awk '{print$5}'` @@ -176,14 +176,16 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && { $ECHO "$YELLOW[-] not an intel platform, cannot test afl-gcc" } -$ECHO "$BLUE[*] Testing: llvm_mode" +$ECHO "$BLUE[*] Testing: llvm_mode, afl-showmap, afl-fuzz, afl-cmin and afl-tmin" test -e ../afl-clang-fast -a -e ../split-switches-pass.so && { # on FreeBSD need to set AFL_CC - if which clang >/dev/null; then - export AFL_CC=`which clang` - else - export AFL_CC=`$LLVM_CONFIG --bindir`/clang - fi + test `uname -s` = 'FreeBSD' && { + if which clang >/dev/null; then + export AFL_CC=`which clang` + else + export AFL_CC=`$LLVM_CONFIG --bindir`/clang + fi + } ../afl-clang-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 AFL_HARDEN=1 ../afl-clang-fast -o test-compcov.harden test-compcov.c > /dev/null 2>&1 test -e test-instr.plain && { @@ -251,6 +253,26 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && { $ECHO "$RED[!] afl-fuzz is not working correctly with llvm_mode" CODE=1 } + test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" || { + echo 000000000000000000000000 > in/in2 + mkdir -p in2 + ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null + CNT=`ls in2/ | wc -l` + case "$CNT" in + *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;; + *) $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases" + CODE=1 + ;; + esac + ../afl-tmin -i in/in2 -o in2/in2 -- ./test-instr.plain > /dev/null 2>&1 + SIZE=`ls -l in2/in2 2> /dev/null | awk '{print$5}'` + test "$SIZE" = 1 && $ECHO "$GREEN[+] afl-tmin correctly minimized the testcase" + test "$SIZE" = 1 || { + $ECHO "$RED[!] afl-tmin did incorrectly minimize the testcase to $SIZE" + CODE=1 + } + rm -rf in2 + } rm -rf in out errors } rm -f test-instr.plain @@ -334,7 +356,7 @@ test -e ../afl-gcc-fast -a -e ../afl-gcc-rt.o && { $ECHO "$GREEN[+] gcc_plugin run reported $TUPLES instrumented locations which is fine" } || { $ECHO "$RED[!] gcc_plugin instrumentation produces a weird number of instrumented locations: $TUPLES" - $ECHO "$YELLOW[-] the gcc_plugin instrumentation issue is not flagged as an error because travis builds would all fail otherwise :-(" + $ECHO "$YELLOW[-] this is a known issue in gcc, not afl++. It is not flagged as an error because travis builds would all fail otherwise :-(" #CODE=1 } } @@ -457,6 +479,15 @@ test -e ../libdislocator.so && { } rm -f test-compcov test -e ../libradamsa.so && { + # on FreeBSD need to set AFL_CC + + test `uname -s` = 'FreeBSD' && { + if which clang >/dev/null; then + export AFL_CC=`which clang` + else + export AFL_CC=`$LLVM_CONFIG --bindir`/clang + fi + } test -e test-instr.plain || ../afl-clang-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain || ../afl-gcc-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1 test -e test-instr.plain || ../${AFL_GCC} -o test-instr.plain ../test-instr.c > /dev/null 2>&1 @@ -560,8 +591,64 @@ test -e ../afl-qemu-trace && { CODE=1 exit 1 } - $ECHO "$YELLOW[-] we need a test case for qemu_mode unsigaction library" rm -rf in out errors + test -e ../qemu_mode/unsigaction/unsigaction32.so && { + ${AFL_CC} -o test-unsigaction32 -m32 test-unsigaction.c >> errors 2>&1 && { + ./test-unsigaction32 + RETVAL_NORMAL32=$? + LD_PRELOAD=../qemu_mode/unsigaction/unsigaction32.so ./test-unsigaction32 + RETVAL_LIBUNSIGACTION32=$? + test $RETVAL_NORMAL32 = "2" -a $RETVAL_LIBUNSIGACTION32 = "0" && { + $ECHO "$GREEN[+] qemu_mode unsigaction library (32 bit) ignores signals" + } || { + test $RETVAL_NORMAL32 != "2" && { + $ECHO "$RED[!] cannot trigger signal in test program (32 bit)" + } + test $RETVAL_LIBUNSIGACTION32 != "0" && { + $ECHO "$RED[!] signal in test program (32 bit) is not ignored with unsigaction" + } + CODE=1 + } + } || { + echo CUT------------------------------------------------------------------CUT + cat errors + echo CUT------------------------------------------------------------------CUT + $ECHO "$RED[!] cannot compile test program (32 bit) for unsigaction library" + CODE=1 + } + } || { + $ECHO "$YELLOW[-] we cannot test qemu_mode unsigaction library (32 bit) because it is not present" + INCOMPLETE=1 + } + test -e ../qemu_mode/unsigaction/unsigaction64.so && { + ${AFL_CC} -o test-unsigaction64 -m64 test-unsigaction.c >> errors 2>&1 && { + ./test-unsigaction64 + RETVAL_NORMAL64=$? + LD_PRELOAD=../qemu_mode/unsigaction/unsigaction64.so ./test-unsigaction64 + RETVAL_LIBUNSIGACTION64=$? + test $RETVAL_NORMAL64 = "2" -a $RETVAL_LIBUNSIGACTION64 = "0" && { + $ECHO "$GREEN[+] qemu_mode unsigaction library (64 bit) ignores signals" + } || { + test $RETVAL_NORMAL64 != "2" && { + $ECHO "$RED[!] cannot trigger signal in test program (64 bit)" + } + test $RETVAL_LIBUNSIGACTION64 != "0" && { + $ECHO "$RED[!] signal in test program (64 bit) is not ignored with unsigaction" + } + CODE=1 + } + } || { + echo CUT------------------------------------------------------------------CUT + cat errors + echo CUT------------------------------------------------------------------CUT + $ECHO "$RED[!] cannot compile test program (64 bit) for unsigaction library" + CODE=1 + } + } || { + $ECHO "$YELLOW[-] we cannot test qemu_mode unsigaction library (64 bit) because it is not present" + INCOMPLETE=1 + } + rm -rf errors test-unsigaction32 test-unsigaction64 } } || { $ECHO "$RED[!] gcc compilation of test targets failed - what is going on??" diff --git a/unicorn_mode/samples/c/a.out b/unicorn_mode/samples/c/a.out deleted file mode 100644 index 176c25e1..00000000 --- a/unicorn_mode/samples/c/a.out +++ /dev/null Binary files differdiff --git a/unicorn_mode/samples/c/harness.c b/unicorn_mode/samples/c/harness.c index a987b8e1..31416ae2 100644 --- a/unicorn_mode/samples/c/harness.c +++ b/unicorn_mode/samples/c/harness.c @@ -28,35 +28,56 @@ #include <unicorn/unicorn.h> // Path to the file containing the binary to emulate -#define BINARY_FILE ("simple_target_x86_64") +#define BINARY_FILE ("persistent_target_x86_64") // Memory map for the code to be tested // Arbitrary address where code to test will be loaded -#define BASE_ADDRESS (0x100000) -#define CODE_ADDRESS (0x101119) -#define END_ADDRESS (0x1011d7) +static const int64_t BASE_ADDRESS = 0x100000; +static const int64_t CODE_ADDRESS = 0x101139; +static const int64_t END_ADDRESS = 0x10120d; // Address of the stack (Some random address again) -#define STACK_ADDRESS (((int64_t) 0x01) << 58) +static const int64_t STACK_ADDRESS = (((int64_t) 0x01) << 58); // Size of the stack (arbitrarily chosen, just make it big enough) -#define STACK_SIZE (0x10000) +static const int64_t STACK_SIZE = 0x10000; // Location where the input will be placed (make sure the emulated program knows this somehow, too ;) ) -#define INPUT_LOCATION (0x10000) +static const int64_t INPUT_LOCATION = 0x10000; // Inside the location, we have an ofset in our special case -#define INPUT_OFFSET (0x16) +static const int64_t INPUT_OFFSET = 0x16; // Maximum allowable size of mutated data from AFL -#define INPUT_SIZE_MAX (0x10000) +static const int64_t INPUT_SIZE_MAX = 0x10000; // Alignment for unicorn mappings (seems to be needed) -#define ALIGNMENT ((uint64_t) 0x1000) +static const int64_t ALIGNMENT = 0x1000; + +// In our special case, we emulate main(), so argc is needed. +static const uint64_t EMULATED_ARGC = 2; + +// The return from our fake strlen +static size_t current_input_len = 0; static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size); } -static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) -{ +static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { printf(">>> Tracing instruction at 0x%"PRIx64 ", instruction size = 0x%x\n", address, size); } +/* +The sample uses strlen, since we don't have a loader or libc, we'll fake it. +We know the strlen will return the lenght of argv[1] that we just planted. +It will be a lot faster than an actual strlen for this specific purpose. +*/ +static void hook_strlen(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { + //Hook + //116b: e8 c0 fe ff ff call 1030 <strlen@plt> + // We place the return at RAX + //printf("Strlen hook at addr 0x%lx (size: 0x%x), result: %ld\n", address, size, current_input_len); + uc_reg_write(uc, UC_X86_REG_RAX, ¤t_input_len); + // We skip the actual call by updating RIP + uint64_t next_addr = address + size; + uc_reg_write(uc, UC_X86_REG_RIP, &next_addr); +} + /* Unicorn page needs to be 0x1000 aligned, apparently */ static uint64_t pad(uint64_t size) { if (size % ALIGNMENT == 0) return size; @@ -99,11 +120,25 @@ static bool place_input_callback( void *data ){ // printf("Placing input with len %ld to %x\n", input_len, DATA_ADDRESS); - if (input_len >= INPUT_SIZE_MAX - INPUT_OFFSET) { - // Test input too long, ignore this testcase + if (input_len < 1 || input_len >= INPUT_SIZE_MAX - INPUT_OFFSET) { + // Test input too short or too long, ignore this testcase return false; } + + // For persistent mode, we have to set up stack and memory each time. + uc_reg_write(uc, UC_X86_REG_RIP, &CODE_ADDRESS); // Set the instruction pointer back + // Set up the function parameters accordingly RSI, RDI (see calling convention/disassembly) + uc_reg_write(uc, UC_X86_REG_RSI, &INPUT_LOCATION); // argv + uc_reg_write(uc, UC_X86_REG_RDI, &EMULATED_ARGC); // argc == 2 + + // We need a valid c string, make sure it never goes out of bounds. + input[input_len-1] = '\0'; + // Write the testcase to unicorn. uc_mem_write(uc, INPUT_LOCATION + INPUT_OFFSET, input, input_len); + + // store input_len for the faux strlen hook + current_input_len = input_len; + return true; } @@ -187,12 +222,7 @@ int main(int argc, char **argv, char **envp) { uc_mem_write(uc, 0x10008, "\x16\x00\x01", 3); // little endian of 0x10016, see above - // Set up the function parameters accordingly RSI, RDI (see calling convention/disassembly) - uint64_t input_location = INPUT_LOCATION; - uc_reg_write(uc, UC_X86_REG_RSI, &input_location); // argv - uint64_t emulated_argc = 2; - uc_reg_write(uc, UC_X86_REG_RDI, &emulated_argc); // argc == 2 - + // If we want tracing output, set the callbacks here if (tracing) { // tracing all basic blocks with customized callback @@ -200,6 +230,11 @@ int main(int argc, char **argv, char **envp) { uc_hook_add(uc, &hooks[1], UC_HOOK_CODE, hook_code, NULL, BASE_ADDRESS, BASE_ADDRESS + len - 1); } + // Add our strlen hook (for this specific testcase only) + int strlen_hook_pos = BASE_ADDRESS + 0x116b; + uc_hook strlen_hook; + uc_hook_add(uc, &strlen_hook, UC_HOOK_CODE, hook_strlen, NULL, strlen_hook_pos, strlen_hook_pos); + printf("Starting to fuzz :)\n"); fflush(stdout); @@ -211,9 +246,9 @@ int main(int argc, char **argv, char **envp) { &end_address, // Where to exit (this is an array) 1, // Count of end addresses NULL, // Optional calback to run after each exec - false, - 1, // For persistent mode: How many rounds to run - NULL + false, // true, if the optional callback should be run also for non-crashes + 100, // For persistent mode: How many rounds to run + NULL // additional data pointer ); switch(afl_ret) { case UC_AFL_RET_ERROR: diff --git a/unicorn_mode/samples/c/persistent_target.c b/unicorn_mode/samples/c/persistent_target.c new file mode 100644 index 00000000..5b866f86 --- /dev/null +++ b/unicorn_mode/samples/c/persistent_target.c @@ -0,0 +1,39 @@ +/* + * Sample target file to test afl-unicorn fuzzing capabilities. + * This is a very trivial example that will crash pretty easily + * in several different exciting ways. + * + * Input is assumed to come from a buffer located at DATA_ADDRESS + * (0x00300000), so make sure that your Unicorn emulation of this + * puts user data there. + * + * Written by Nathan Voss <njvoss99@gmail.com> + * Adapted by Lukas Seidel <seidel.1@campus.tu-berlin.de> + */ +#include <stdint.h> +#include <string.h> + + +int main(int argc, char** argv) { + if (argc < 2) return -1; + + char *data_buf = argv[1]; + uint64_t data_len = strlen(data_buf); + if (data_len < 20) return -2; + + for (; data_len --> 0 ;) { + if (data_len >= 18) continue; + if (data_len > 2 && data_len < 18) { + ((char *)data_len)[(uint64_t)data_buf] = data_buf[data_len + 1]; + } else if (data_buf[9] == 0x90 && data_buf[10] != 0x00 && data_buf[11] == 0x90) { + // Cause a crash if data[10] is not zero, but [9] and [11] are zero + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } + } + if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) { + // Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2] + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } + + return 0; +} diff --git a/unicorn_mode/samples/c/persistent_target_x86_64 b/unicorn_mode/samples/c/persistent_target_x86_64 new file mode 100644 index 00000000..22e04357 --- /dev/null +++ b/unicorn_mode/samples/c/persistent_target_x86_64 Binary files differdiff --git a/unicorn_mode/samples/persistent/.gitignore b/unicorn_mode/samples/persistent/.gitignore new file mode 100644 index 00000000..3e446132 --- /dev/null +++ b/unicorn_mode/samples/persistent/.gitignore @@ -0,0 +1,3 @@ +harness +harness-debug +out diff --git a/unicorn_mode/samples/persistent/COMPILE.md b/unicorn_mode/samples/persistent/COMPILE.md new file mode 100644 index 00000000..781f15c0 --- /dev/null +++ b/unicorn_mode/samples/persistent/COMPILE.md @@ -0,0 +1,24 @@ +# C Sample + +This shows a simple persistent harness for unicornafl in C +In contrast to the normal c harness, this harness manually resets the unicorn state on each new input. +Thanks to this, we can rerun the testcase in unicorn multiple times, without the need to fork again. + +## Compiling sample.c + +The target can be built using the `make` command. +Just make sure you have built unicorn support first: +```bash +cd /path/to/afl/unicorn_mode +./build_unicorn_support.sh +``` + +## Compiling persistent_target.c + +You don't need to compile persistent_target.c since a X86_64 binary version is +pre-built and shipped in this sample folder. This file documents how the binary +was built in case you want to rebuild it or recompile it for any reason. + +The pre-built binary (persistent_target_x86_64.bin) was built using -g -O0 in gcc. + +We then load the binary we execute the main function directly. diff --git a/unicorn_mode/samples/persistent/Makefile b/unicorn_mode/samples/persistent/Makefile new file mode 100644 index 00000000..fe100490 --- /dev/null +++ b/unicorn_mode/samples/persistent/Makefile @@ -0,0 +1,42 @@ +# UnicornAFL Usage +# Original Unicorn Example Makefile by Nguyen Anh Quynh <aquynh@gmail.com>, 2015 +# Adapted for AFL++ by domenukk <domenukk@gmail.com>, 2020 + +UNAME_S := $(shell uname -s) + +LIBDIR = ../../unicornafl +BIN_EXT = +AR_EXT = a + +# Verbose output? +V ?= 0 + +CFLAGS += -Wall -Werror -I../../unicornafl/include + +LDFLAGS += -L$(LIBDIR) -lpthread -lm +ifeq ($(UNAME_S), Linux) +LDFLAGS += -lrt +endif + +ifneq ($(CROSS),) +CC = $(CROSS)gcc +endif + +.PHONY: all clean + +all: harness + +clean: + rm -rf *.o harness harness-debug + +harness.o: harness.c ../../unicornafl/include/unicorn/*.h + ${CC} ${CFLAGS} -O3 -c $< + +harness-debug.o: harness.c ../../unicornafl/include/unicorn/*.h + ${CC} ${CFLAGS} -g -c $< -o $@ + +harness: harness.o + ${CC} -L${LIBDIR} $< ../../unicornafl/libunicornafl.a $(LDFLAGS) -o $@ + +debug: harness-debug.o + ${CC} -L${LIBDIR} $< ../../unicornafl/libunicornafl.a $(LDFLAGS) -o harness-debug diff --git a/unicorn_mode/samples/persistent/harness.c b/unicorn_mode/samples/persistent/harness.c new file mode 100644 index 00000000..d8ebffbc --- /dev/null +++ b/unicorn_mode/samples/persistent/harness.c @@ -0,0 +1,269 @@ +/* + Persistent test harness for AFL++'s unicornafl c mode. + + This loads the persistent_target.bin binary (precompiled as X86_64 code) into + Unicorn's memory map for emulation, places the specified input into + the argv buffer (handed in as first parameter), and executes 'main()'. + Any crashes during emulation will automatically be handled by the afl-fuzz() function. + + Run under AFL as follows: + + $ cd <afl_path>/unicorn_mode/samples/persistent/ + $ make + $ ../../../afl-fuzz -m none -i sample_inputs -o out -- ./harness @@ + + (Re)run a simgle input with block tracing using: + + $ ./harness -t [inputfile] +*/ + +// This is not your everyday Unicorn. +#define UNICORN_AFL + +#include <string.h> +#include <inttypes.h> +#include <stdint.h> +#include <stdbool.h> +#include <unistd.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/mman.h> + +#include <unicorn/unicorn.h> + +// Path to the file containing the binary to emulate +#define BINARY_FILE ("persistent_target_x86_64") + +// Memory map for the code to be tested +// Arbitrary address where code to test will be loaded +static const int64_t BASE_ADDRESS = 0x100000; +static const int64_t CODE_ADDRESS = 0x101139; +static const int64_t END_ADDRESS = 0x10120d; +// Address of the stack (Some random address again) +static const int64_t STACK_ADDRESS = (((int64_t) 0x01) << 58); +// Size of the stack (arbitrarily chosen, just make it big enough) +static const int64_t STACK_SIZE = 0x10000; +// Location where the input will be placed (make sure the emulated program knows this somehow, too ;) ) +static const int64_t INPUT_LOCATION = 0x10000; +// Inside the location, we have an ofset in our special case +static const int64_t INPUT_OFFSET = 0x16; +// Maximum allowable size of mutated data from AFL +static const int64_t INPUT_SIZE_MAX = 0x10000; +// Alignment for unicorn mappings (seems to be needed) +static const int64_t ALIGNMENT = 0x1000; + +// In our special case, we emulate main(), so argc is needed. +static const uint64_t EMULATED_ARGC = 2; + +// The return from our fake strlen +static size_t current_input_len = 0; + +static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { + printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size); +} + +static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { + printf(">>> Tracing instruction at 0x%"PRIx64 ", instruction size = 0x%x\n", address, size); +} + +/* +The sample uses strlen, since we don't have a loader or libc, we'll fake it. +We know the strlen will return the lenght of argv[1] that we just planted. +It will be a lot faster than an actual strlen for this specific purpose. +*/ +static void hook_strlen(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { + //Hook + //116b: e8 c0 fe ff ff call 1030 <strlen@plt> + // We place the return at RAX + uc_reg_write(uc, UC_X86_REG_RAX, ¤t_input_len); + // We skip the actual call by updating RIP + //printf("Strlen hook at addr 0x%lx (size: 0x%x), result: %ld\n", address, size, current_input_len); + uint64_t next_addr = address + size; + uc_reg_write(uc, UC_X86_REG_RIP, &next_addr); +} + +/* Unicorn page needs to be 0x1000 aligned, apparently */ +static uint64_t pad(uint64_t size) { + if (size % ALIGNMENT == 0) return size; + return ((size / ALIGNMENT) + 1) * ALIGNMENT; +} + +/* returns the filesize in bytes, -1 or error. */ +static off_t afl_mmap_file(char *filename, char **buf_ptr) { + + off_t ret = -1; + + int fd = open(filename, O_RDONLY); + + struct stat st = {0}; + if (fstat(fd, &st)) goto exit; + + off_t in_len = st.st_size; + if (in_len == -1) { + /* This can only ever happen on 32 bit if the file is exactly 4gb. */ + fprintf(stderr, "Filesize of %s too large", filename); + goto exit; + } + + *buf_ptr = mmap(0, in_len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + + if (*buf_ptr != MAP_FAILED) ret = in_len; + +exit: + close(fd); + return ret; + +} + +/* Place the input at the right spot inside unicorn */ +static bool place_input_callback( + uc_engine *uc, + char *input, + size_t input_len, + uint32_t persistent_round, + void *data +){ + // printf("Placing input with len %ld to %x\n", input_len, DATA_ADDRESS); + if (input_len < 1 || input_len >= INPUT_SIZE_MAX - INPUT_OFFSET) { + // Test input too short or too long, ignore this testcase + return false; + } + + // For persistent mode, we have to set up stack and memory each time. + uc_reg_write(uc, UC_X86_REG_RIP, &CODE_ADDRESS); // Set the instruction pointer back + // Set up the function parameters accordingly RSI, RDI (see calling convention/disassembly) + uc_reg_write(uc, UC_X86_REG_RSI, &INPUT_LOCATION); // argv + uc_reg_write(uc, UC_X86_REG_RDI, &EMULATED_ARGC); // argc == 2 + + // We need a valid c string, make sure it never goes out of bounds. + input[input_len-1] = '\0'; + // Write the testcase to unicorn. + uc_mem_write(uc, INPUT_LOCATION + INPUT_OFFSET, input, input_len); + + // store input_len for the faux strlen hook + current_input_len = input_len; + + return true; +} + +static void mem_map_checked(uc_engine *uc, uint64_t addr, size_t size, uint32_t mode) { + size = pad(size); + //printf("SIZE %lx, align: %lx\n", size, ALIGNMENT); + uc_err err = uc_mem_map(uc, addr, size, mode); + if (err != UC_ERR_OK) { + printf("Error mapping %ld bytes at 0x%lx: %s (mode: %d)\n", size, addr, uc_strerror(err), mode); + exit(1); + } +} + +int main(int argc, char **argv, char **envp) { + if (argc == 1) { + printf("Test harness for simple_target.bin. Usage: harness [-t] <inputfile>\n"); + exit(1); + } + bool tracing = false; + char *filename = argv[1]; + if (argc > 2 && !strcmp(argv[1], "-t")) { + tracing = true; + filename = argv[2]; + } + + uc_engine *uc; + uc_err err; + uc_hook hooks[2]; + char *file_contents; + + // Initialize emulator in X86_64 mode + err = uc_open(UC_ARCH_X86, UC_MODE_64, &uc); + if (err) { + printf("Failed on uc_open() with error returned: %u (%s)\n", + err, uc_strerror(err)); + return -1; + } + + printf("Loading data input from %s\n", BINARY_FILE); + off_t len = afl_mmap_file(BINARY_FILE, &file_contents); + if (len < 0) { + perror("Could not read binary to emulate"); + return -2; + } + if (len == 0) { + fprintf(stderr, "File at '%s' is empty\n", BINARY_FILE); + return -3; + } + + // Map memory. + mem_map_checked(uc, BASE_ADDRESS, len, UC_PROT_ALL); + printf("Len: %lx", len); + fflush(stdout); + + // write machine code to be emulated to memory + if (uc_mem_write(uc, BASE_ADDRESS, file_contents, len) != UC_ERR_OK) { + printf("Error writing to CODE"); + } + + // Release copied contents + munmap(file_contents, len); + + // Set the program counter to the start of the code + uint64_t start_address = CODE_ADDRESS; // address of entry point of main() + uint64_t end_address = END_ADDRESS; // Address of last instruction in main() + uc_reg_write(uc, UC_X86_REG_RIP, &start_address); // address of entry point of main() + + // Setup the Stack + mem_map_checked(uc, STACK_ADDRESS - STACK_SIZE, STACK_SIZE, UC_PROT_READ | UC_PROT_WRITE); + uint64_t stack_val = STACK_ADDRESS; + printf("%ld", stack_val); + uc_reg_write(uc, UC_X86_REG_RSP, &stack_val); + + // reserve some space for our input data + mem_map_checked(uc, INPUT_LOCATION, INPUT_SIZE_MAX, UC_PROT_READ); + + // build a "dummy" argv with lenth 2 at 0x10000: + // 0x10000 argv[0] NULL + // 0x10008 argv[1] (char *)0x10016 --. points to the next offset. + // 0x10016 argv[1][0], ... <-^ contains the acutal input data. (INPUT_LOCATION + INPUT_OFFSET) + + uc_mem_write(uc, 0x10008, "\x16\x00\x01", 3); // little endian of 0x10016, see above + + + // If we want tracing output, set the callbacks here + if (tracing) { + // tracing all basic blocks with customized callback + uc_hook_add(uc, &hooks[0], UC_HOOK_BLOCK, hook_block, NULL, 1, 0); + uc_hook_add(uc, &hooks[1], UC_HOOK_CODE, hook_code, NULL, BASE_ADDRESS, BASE_ADDRESS + len - 1); + } + + // Add our strlen hook (for this specific testcase only) + int strlen_hook_pos = BASE_ADDRESS + 0x116b; + uc_hook strlen_hook; + uc_hook_add(uc, &strlen_hook, UC_HOOK_CODE, hook_strlen, NULL, strlen_hook_pos, strlen_hook_pos); + + printf("Starting to fuzz :)\n"); + fflush(stdout); + + // let's gooo + uc_afl_ret afl_ret = uc_afl_fuzz( + uc, // The unicorn instance we prepared + filename, // Filename of the input to process. In AFL this is usually the '@@' placeholder, outside it's any input file. + place_input_callback, // Callback that places the input (automatically loaded from the file at filename) in the unicorninstance + &end_address, // Where to exit (this is an array) + 1, // Count of end addresses + NULL, // Optional calback to run after each exec + false, // true, if the optional callback should be run also for non-crashes + 1000, // For persistent mode: How many rounds to run + NULL // additional data pointer + ); + switch(afl_ret) { + case UC_AFL_RET_ERROR: + printf("Error starting to fuzz"); + return -3; + break; + case UC_AFL_RET_NO_AFL: + printf("No AFL attached - We are done with a single run."); + break; + default: + break; + } + return 0; +} diff --git a/unicorn_mode/samples/persistent/persistent_target.c b/unicorn_mode/samples/persistent/persistent_target.c new file mode 100644 index 00000000..5b866f86 --- /dev/null +++ b/unicorn_mode/samples/persistent/persistent_target.c @@ -0,0 +1,39 @@ +/* + * Sample target file to test afl-unicorn fuzzing capabilities. + * This is a very trivial example that will crash pretty easily + * in several different exciting ways. + * + * Input is assumed to come from a buffer located at DATA_ADDRESS + * (0x00300000), so make sure that your Unicorn emulation of this + * puts user data there. + * + * Written by Nathan Voss <njvoss99@gmail.com> + * Adapted by Lukas Seidel <seidel.1@campus.tu-berlin.de> + */ +#include <stdint.h> +#include <string.h> + + +int main(int argc, char** argv) { + if (argc < 2) return -1; + + char *data_buf = argv[1]; + uint64_t data_len = strlen(data_buf); + if (data_len < 20) return -2; + + for (; data_len --> 0 ;) { + if (data_len >= 18) continue; + if (data_len > 2 && data_len < 18) { + ((char *)data_len)[(uint64_t)data_buf] = data_buf[data_len + 1]; + } else if (data_buf[9] == 0x90 && data_buf[10] != 0x00 && data_buf[11] == 0x90) { + // Cause a crash if data[10] is not zero, but [9] and [11] are zero + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } + } + if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) { + // Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2] + unsigned char invalid_read = *(unsigned char *) 0x00000000; + } + + return 0; +} diff --git a/unicorn_mode/samples/persistent/persistent_target_x86_64 b/unicorn_mode/samples/persistent/persistent_target_x86_64 new file mode 100644 index 00000000..22e04357 --- /dev/null +++ b/unicorn_mode/samples/persistent/persistent_target_x86_64 Binary files differdiff --git a/unicorn_mode/samples/persistent/sample_all.sh b/unicorn_mode/samples/persistent/sample_all.sh new file mode 100644 index 00000000..01daf365 --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_all.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +[ -z "${UNAME}" ] && UNAME=$(uname) + +DIR=`dirname $0` + +if [ "$UNAME" = Darwin ]; then + export DYLD_LIBRARY_PATH=../../unicorn +else + export LD_LIBRARY_PATH=../../unicorn +fi + + + +if [ ! test -e $DIR/harness]; then + echo "[!] harness not found in $DIR" + exit 1 +fi \ No newline at end of file diff --git a/unicorn_mode/samples/persistent/sample_inputs/sample1.bin b/unicorn_mode/samples/persistent/sample_inputs/sample1.bin new file mode 100644 index 00000000..85df5078 --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_inputs/sample1.bin @@ -0,0 +1 @@ +abcd \ No newline at end of file diff --git a/unicorn_mode/samples/persistent/sample_inputs/sample2.bin b/unicorn_mode/samples/persistent/sample_inputs/sample2.bin new file mode 100644 index 00000000..f76dd238 --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_inputs/sample2.bin Binary files differdiff --git a/unicorn_mode/samples/persistent/sample_inputs/sample3.bin b/unicorn_mode/samples/persistent/sample_inputs/sample3.bin new file mode 100644 index 00000000..6b2aaa76 --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_inputs/sample3.bin @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/unicorn_mode/samples/persistent/sample_inputs/sample4.bin b/unicorn_mode/samples/persistent/sample_inputs/sample4.bin new file mode 100644 index 00000000..71bd63e6 --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_inputs/sample4.bin @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/unicorn_mode/samples/persistent/sample_inputs/sample5.bin b/unicorn_mode/samples/persistent/sample_inputs/sample5.bin new file mode 100644 index 00000000..aed2973e --- /dev/null +++ b/unicorn_mode/samples/persistent/sample_inputs/sample5.bin @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/unicorn_mode/samples/c/simple_target.c b/unicorn_mode/samples/persistent/simple_target_noncrashing.c index dbf10911..00764473 100644 --- a/unicorn_mode/samples/c/simple_target.c +++ b/unicorn_mode/samples/persistent/simple_target_noncrashing.c @@ -19,12 +19,11 @@ int main(int argc, char** argv) { char *data_buf = argv[1]; + if len(data_buf < 20) { if (data_buf[20] != 0) { - // Cause an 'invalid read' crash if data[0..3] == '\x01\x02\x03\x04' - unsigned char invalid_read = *(unsigned char *) 0x00000000; + printf("Not crashing"); } else if (data_buf[0] > 0x10 && data_buf[0] < 0x20 && data_buf[1] > data_buf[2]) { - // Cause an 'invalid read' crash if (0x10 < data[0] < 0x20) and data[1] > data[2] - unsigned char invalid_read = *(unsigned char *) 0x00000000; + printf("Also not crashing with databuf[0] == %c", data_buf[0]) } else if (data_buf[9] == 0x00 && data_buf[10] != 0x00 && data_buf[11] == 0x00) { // Cause a crash if data[10] is not zero, but [9] and [11] are zero unsigned char invalid_read = *(unsigned char *) 0x00000000; diff --git a/unicorn_mode/samples/persistent/simple_target_x86_64 b/unicorn_mode/samples/persistent/simple_target_x86_64 new file mode 100644 index 00000000..560264fd --- /dev/null +++ b/unicorn_mode/samples/persistent/simple_target_x86_64 Binary files differ |