From 00d086f816d6b517a6817d6093a83ed8a65b18fa Mon Sep 17 00:00:00 2001
From: van Hauser <vh@thc.org>
Date: Tue, 21 Jan 2020 12:53:36 +0100
Subject: USE_TRACE_PC unnecessary, set env AFL_LLVM_USE_TRACE_PC instead

---
 docs/ChangeLog | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'docs')

diff --git a/docs/ChangeLog b/docs/ChangeLog
index 5347d244..bb3537dd 100644
--- a/docs/ChangeLog
+++ b/docs/ChangeLog
@@ -21,9 +21,11 @@ Version ++2.60d (develop):
   - afl-fuzz:
      - now prints the real python version support compiled in
      - set stronger performance compile options and little tweaks
-  - afl-clang-fast now shows in the help output for which llvm version it
-    was compiled for
-  - added blacklisted function check in llvm_mode
+  - afl-clang-fast:
+     - show in the help output for which llvm version it was compiled for
+     - now does not need to be recompiled between trace-pc and pass
+       instrumentation. compile normally and set AFL_LLVM_USE_TRACE_PC :)
+  - added blacklisted function check in all modules of llvm_mode
   - added fix from Debian project to compile libdislocator and libtokencap
 
 
-- 
cgit 1.4.1


From 7e7ab8f5415409fd1bb643f4dfef44c5a3935006 Mon Sep 17 00:00:00 2001
From: hexcoder <hexcoder-@users.noreply.github.com>
Date: Wed, 22 Jan 2020 22:24:00 +0100
Subject: Update binaryonly_fuzzing.txt

---
 docs/binaryonly_fuzzing.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'docs')

diff --git a/docs/binaryonly_fuzzing.txt b/docs/binaryonly_fuzzing.txt
index 239fb4b0..f8d68cd8 100644
--- a/docs/binaryonly_fuzzing.txt
+++ b/docs/binaryonly_fuzzing.txt
@@ -5,10 +5,10 @@ Fuzzing binary-only programs with afl++
 afl++, libfuzzer and others are great if you have the source code, and
 it allows for very fast and coverage guided fuzzing.
 
-However, if there is only the binary program and not source code available,
-then standard afl++ (dumb mode) is not effective.
+However, if there is only the binary program and no source code available,
+then standard `afl-fuzz -n` (dumb mode) is not effective.
 
-The following is a description of how these can be fuzzed with afl++
+The following is a description of how these binaries can be fuzzed with afl++
 
 !!!!!
 TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then
@@ -28,7 +28,7 @@ As it is included in afl++ this needs no URL.
 
 WINE+QEMU
 ---------
-Wine mode can run Win32 PE with the QEMU instrumentation.
+Wine mode can run Win32 PE binaries with the QEMU instrumentation.
 It needs Wine, python3 and the pefile python package installed.
 
 UNICORN
@@ -37,7 +37,7 @@ Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar.
 In contrast to QEMU, Unicorn does not offer a full system or even userland emulation.
 Runtime environment and/or loaders have to be written from scratch, if needed.
 On top, block chaining has been removed. This means the speed boost introduced in 
-to the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn.
+the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn.
 For further information, check out ./unicorn_mode.txt.
 
 
-- 
cgit 1.4.1


From e7c95ebf5a4828b662252b10052a89923dd25030 Mon Sep 17 00:00:00 2001
From: van Hauser <vh@thc.org>
Date: Thu, 23 Jan 2020 10:15:33 +0100
Subject: afl-cmin final touches

---
 Makefile       |   2 +-
 afl-cmin       | 473 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 afl-cmin.awk   | 470 --------------------------------------------------------
 docs/ChangeLog |   2 +
 test/test.sh   |   2 +-
 5 files changed, 476 insertions(+), 473 deletions(-)
 delete mode 100755 afl-cmin.awk

(limited to 'docs')

diff --git a/Makefile b/Makefile
index 7260ee47..459cae5f 100644
--- a/Makefile
+++ b/Makefile
@@ -29,7 +29,7 @@ VERSION     = $(shell grep '^\#define VERSION ' ../config.h | cut -d '"' -f2)
 # PROGS intentionally omit afl-as, which gets installed elsewhere.
 
 PROGS       = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze
-SH_PROGS    = afl-plot afl-cmin afl-whatsup afl-system-config
+SH_PROGS    = afl-plot afl-cmin afl-cmin.bash afl-whatsup afl-system-config
 MANPAGES=$(foreach p, $(PROGS) $(SH_PROGS), $(p).8)
 
 ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
diff --git a/afl-cmin b/afl-cmin
index 75dc63a7..a072a62a 100755
--- a/afl-cmin
+++ b/afl-cmin
@@ -1,4 +1,475 @@
 #!/usr/bin/env sh
 THISPATH=`dirname ${0}`
 export PATH=${THISPATH}:$PATH
-awk -f ${0}.awk -- ${@+"$@"}
+awk -f - -- ${@+"$@"} <<'EOF'
+#!/usr/bin/awk -f
+
+# awk script to minimize a test corpus of input files
+#
+# based on afl-cmin bash script written by Michal Zalewski
+# rewritten by Heiko Eißfeldt (hexcoder-)
+#
+# uses getopt.awk package from Arnold Robbins
+#
+# external tools used by this script:
+# test
+# grep
+# rm
+# mkdir
+# ln
+# cp
+# pwd
+# which
+# cd
+# find
+# stat
+# sort
+# cut
+# and afl-showmap from this project :-)
+
+# getopt.awk --- Do C library getopt(3) function in awk
+
+# External variables:
+#    Optind -- index in ARGV of first nonoption argument
+#    Optarg -- string value of argument to current option
+#    Opterr -- if nonzero, print our own diagnostic
+#    Optopt -- current option letter
+
+# Returns:
+#    -1     at end of options
+#    "?"    for unrecognized option
+#    <c>    a character representing the current option
+
+# Private Data:
+#    _opti  -- index in multiflag option, e.g., -abc
+
+function getopt(argc, argv, options,    thisopt, i)
+{
+    if (length(options) == 0)    # no options given
+        return -1
+
+    if (argv[Optind] == "--") {  # all done
+        Optind++
+        _opti = 0
+        return -1
+    } else if (argv[Optind] !~ /^-[^:[:space:]]/) {
+        _opti = 0
+        return -1
+    }
+    if (_opti == 0)
+        _opti = 2
+    thisopt = substr(argv[Optind], _opti, 1)
+    Optopt = thisopt
+    i = index(options, thisopt)
+    if (i == 0) {
+        if (Opterr)
+            printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
+        if (_opti >= length(argv[Optind])) {
+            Optind++
+            _opti = 0
+        } else
+            _opti++
+        return "?"
+    }
+    if (substr(options, i + 1, 1) == ":") {
+        # get option argument
+        if (length(substr(argv[Optind], _opti + 1)) > 0)
+            Optarg = substr(argv[Optind], _opti + 1)
+        else
+            Optarg = argv[++Optind]
+        _opti = 0
+    } else
+        Optarg = ""
+    if (_opti == 0 || _opti >= length(argv[Optind])) {
+        Optind++
+        _opti = 0
+    } else
+        _opti++
+    return thisopt
+}
+
+BEGIN {
+    Opterr = 1    # default is to diagnose
+    Optind = 1    # skip ARGV[0]
+
+    # test program
+    if (_getopt_test) {
+        while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
+            printf("c = <%c>, Optarg = <%s>\n",
+                                       _go_c, Optarg)
+        printf("non-option arguments:\n")
+        for (; Optind < ARGC; Optind++)
+            printf("\tARGV[%d] = <%s>\n",
+                                    Optind, ARGV[Optind])
+    }
+}
+
+function usage() {
+   print \
+"Usage: afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \
+"\n" \
+"Required parameters:\n" \
+"\n" \
+"  -i dir        - input directory with starting corpus\n" \
+"  -o dir        - output directory for minimized files\n" \
+"\n" \
+"Execution control settings:\n" \
+"\n" \
+"  -f file       - location read by the fuzzed program (stdin)\n" \
+"  -m megs       - memory limit for child process ("mem_limit" MB)\n" \
+"  -t msec       - run time limit for child process (none)\n" \
+"  -Q            - use binary-only instrumentation (QEMU mode)\n" \
+"  -U            - use unicorn-based instrumentation (unicorn mode)\n" \
+"\n" \
+"Minimization settings:\n" \
+"  -C            - keep crashing inputs, reject everything else\n" \
+"  -e            - solve for edge coverage only, ignore hit counts\n" \
+"\n" \
+"For additional tips, please consult docs/README.md\n" \
+"\n" \
+      > "/dev/stderr"
+   exit 1
+}
+
+function exists_and_is_executable(binarypath) {
+  return 0 == system("test -f "binarypath" -a -x "binarypath)
+}
+
+BEGIN {
+  print "corpus minimization tool for afl++ (awk version)\n"
+
+  # defaults
+  extra_par = ""
+  # process options
+  Opterr = 1    # default is to diagnose
+  Optind = 1    # skip ARGV[0]
+  while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eCQU?")) != -1) {
+    if (_go_c == "i") {
+      if (!Optarg) usage()
+      if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
+      in_dir = Optarg
+      continue
+    } else 
+    if (_go_c == "o") {
+      if (!Optarg) usage()
+      if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
+      out_dir = Optarg
+      continue
+    } else 
+    if (_go_c == "f") {
+      if (!Optarg) usage()
+      if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
+      stdin_file = Optarg
+      continue
+    } else 
+    if (_go_c == "m") {
+      if (!Optarg) usage()
+      if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
+      mem_limit = Optarg
+      mem_limit_given = 1
+      continue
+    } else 
+    if (_go_c == "t") {
+      if (!Optarg) usage()
+      if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
+      timeout = Optarg
+      continue
+    } else 
+    if (_go_c == "C") {
+      ENVIRON["AFL_CMIN_CRASHES_ONLY"] = 1
+      continue
+    } else 
+    if (_go_c == "e") {
+      extra_par = extra_par " -e"
+      continue
+    } else 
+    if (_go_c == "Q") {
+      if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
+      extra_par = extra_par " -Q"
+      if ( !mem_limit_given ) mem_limit = "250"
+      qemu_mode = 1
+      continue
+    } else 
+    if (_go_c == "U") {
+      if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
+      extra_par = extra_par " -U"
+      if ( !mem_limit_given ) mem_limit = "250"
+      unicorn_mode = 1
+      continue
+    } else 
+    if (_go_c == "?") {
+      exit 1
+    } else 
+      usage()
+  } # while options
+
+  if (!mem_limit) mem_limit = 200
+  if (!timeout) timeout = "none"
+
+  # get program args
+  i = 0
+  prog_args_string = ""
+  for (; Optind < ARGC; Optind++) {
+    prog_args[i++] = ARGV[Optind]
+    if (i > 1)
+      prog_args_string = prog_args_string" "ARGV[Optind]
+  }
+
+  # sanity checks
+  if (!prog_args[0] || !in_dir || !out_dir) usage()
+
+  target_bin = prog_args[0] 
+
+  # Do a sanity check to discourage the use of /tmp, since we can't really
+  # handle this safely from an awk script.
+
+  if (!ENVIRON["AFL_ALLOW_TMP"]) {
+    dirlist[0] = in_dir
+    dirlist[1] = target_bin
+    dirlist[2] = out_dir
+    dirlist[3] = stdin_file
+    "pwd" | getline dirlist[4] # current directory
+    for (dirind in dirlist) {
+      dir = dirlist[dirind]
+
+      if (dir ~ /^(\/var)?\/tmp/) {
+        print "[-] Error: do not use this script in /tmp or /var/tmp." > "/dev/stderr"
+        exit 1
+      }
+    }
+    delete dirlist
+  }
+
+  # If @@ is specified, but there's no -f, let's come up with a temporary input
+  # file name.
+
+  trace_dir = out_dir "/.traces"
+
+  if (!stdin_file) {
+    found_atat = 0
+    for (prog_args_ind in prog_args) {
+      if ("@@" == prog_args[prog_args_ind]) {
+        found_atat = 1
+        break
+      }
+    }
+    if (found_atat) {
+      stdin_file = trace_dir "/.cur_input"
+    }
+  }
+
+  # Check for obvious errors.
+
+  if (mem_limit && mem_limit != "none" && mem_limit < 5) {
+    print "[-] Error: dangerously low memory limit." > "/dev/stderr"
+    exit 1
+  }
+
+  if (timeout && timeout != "none" && timeout < 10) {
+    print "[-] Error: dangerously low timeout." > "/dev/stderr"
+    exit 1
+  }
+
+  if (target_bin && !exists_and_is_executable(target_bin)) {
+
+    "which "target_bin" 2>/dev/null" | getline tnew
+    if (!tnew || !exists_and_is_executable(tnew)) {
+      print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr"
+      exit 1
+    }
+    target_bin = tnew
+  }
+
+  if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !unicorn_mode) {
+    if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) {
+      print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr"
+      exit 1
+    }
+  }
+
+  if (0 != system( "test -d "in_dir )) {
+    print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr"
+    exit 1
+  }
+
+  if (0 == system( "test -d "in_dir"/queue" )) {
+    in_dir = in_dir "/queue"
+  }
+
+  system("rm -rf "trace_dir" 2>/dev/null");
+  system("rm "out_dir"/id[:_]* 2>/dev/null")
+
+  if (0 == system( "test -d "out_dir" -a -e "out_dir"/*" )) {
+    print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr"
+    exit 1
+  }
+
+  if (stdin_file) {
+    # truncate input file
+    printf "" > stdin_file
+    close( stdin_file )
+  }
+
+  if (!ENVIRON["AFL_PATH"]) {
+    if (0 == system("test -f afl-cmin.awk")) {
+      showmap = "./afl-showmap"
+    } else {
+      "which afl-showmap 2>/dev/null" | getline showmap
+    }
+  } else {
+    showmap = ENVIRON["AFL_PATH"] "/afl-showmap"
+  }
+
+  if (!showmap || 0 != system("test -x "showmap )) {
+    print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr"
+    exit 1
+  }
+  
+  # get list of input filenames sorted by size
+  i = 0
+  # yuck, gnu stat is option incompatible to bsd stat
+  # we use a heuristic to differentiate between
+  # GNU stat and other stats
+  "stat --version 2>/dev/null" | getline statversion
+  if (statversion ~ /GNU coreutils/) {
+    stat_format = "-c '%s %n'" # GNU
+  } else {
+    stat_format = "-f '%z %N'" # *BSD, MacOS
+  }
+  while ("cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" | getline) {
+    infilesSmallToBig[i++] = $0
+  }
+  in_count = i
+
+  first_file = infilesSmallToBig[0]
+  
+  # Make sure that we're not dealing with a directory.
+
+  if (0 == system("test -d "in_dir"/"first_file)) {
+    print "[-] Error: The input directory contains subdirectories - please fix." > "/dev/stderr"
+    exit 1
+  }
+
+  # Check for the more efficient way to copy files...
+  if (0 != system("mkdir -p -m 0700 "trace_dir)) {
+    print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr"
+    exit 1
+  }
+
+  if (0 == system("ln "in_dir"/"first_file" "trace_dir"/.link_test")) {
+    cp_tool = "ln"
+  } else {
+    cp_tool = "cp"
+  }
+
+  # Make sure that we can actually get anything out of afl-showmap before we
+  # waste too much time.
+
+  print "[*] Testing the target binary..."
+
+  if (!stdin_file) {
+    system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"")
+  } else {
+    system("cp "in_dir"/"first_file" "stdin_file)
+    system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" <dev/null")
+  }
+
+  first_count = 0
+
+  runtest = trace_dir"/.run_test"
+  while ((getline < runtest) > 0) {
+    ++first_count
+  }
+
+  if (first_count) {
+    print "[+] OK, "first_count" tuples recorded."
+  } else {
+    print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr"
+    if (!ENVIRON["AFL_KEEP_TRACES"]) {
+      system("rm -rf "trace_dir" 2>/dev/null")
+    }
+    exit 1
+  }
+
+  # Let's roll!
+
+  #############################
+  # STEP 1: Collecting traces #
+  #############################
+
+  print "[*] Obtaining traces for "in_count" input files in '"in_dir"'."
+
+  cur = 0;
+  if (!stdin_file) {
+    while (cur < in_count) {
+      fn = infilesSmallToBig[cur]
+      ++cur;
+      printf "\r    Processing file "cur"/"in_count
+      system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"fn"\"")
+    }
+  } else {
+    while (cur < in_count) {
+      fn = infilesSmallToBig[cur]
+      ++cur
+      printf "\r    Processing file "cur"/"in_count
+      system("cp "in_dir"/"fn" "stdin_file)
+      system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" <dev/null")
+    }
+  }
+
+  print ""
+
+
+  #######################################################
+  # STEP 2: register smallest input file for each tuple #
+  # STEP 3: copy that file (at most once)               #
+  #######################################################
+
+  print "[*] Processing traces for input files in '"in_dir"'."
+
+  cur = 0
+  out_count = 0
+  tuple_count = 0
+
+  while (cur < in_count) {
+    fn = infilesSmallToBig[cur]
+    ++cur
+    printf "\r    Processing file "cur"/"in_count
+    # create path for the trace file from afl-showmap
+    tracefile_path = trace_dir"/"fn
+    # gather all keys, and count them
+    while ((getline line < tracefile_path) > 0) {
+        key = line
+        if (!(key in key_count)) {
+          ++tuple_count
+        }
+        ++key_count[key]
+        if (! (key in best_file)) {
+            # this is the best file for this key
+            best_file[key] = fn
+            # copy file unless already done
+            if (! (fn in file_already_copied)) {
+                system(cp_tool" "in_dir"/"fn" "out_dir"/"fn)
+                file_already_copied[fn] = ""
+                ++out_count
+            }
+        }
+    }
+    close(tracefile_path)
+  }
+
+  print ""
+  print "[+] Found "tuple_count" unique tuples across "in_count" files."
+
+  if (out_count == 1) {
+    print "[!] WARNING: All test cases had the same traces, check syntax!"
+  }
+  print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'."
+
+  if (!ENVIRON["AFL_KEEP_TRACES"]) {
+    system("rm -rf "trace_dir" 2>/dev/null")
+  }
+
+  exit 0
+}
+EOF
diff --git a/afl-cmin.awk b/afl-cmin.awk
deleted file mode 100755
index 967c4e87..00000000
--- a/afl-cmin.awk
+++ /dev/null
@@ -1,470 +0,0 @@
-#!/usr/bin/awk -f
-
-# awk script to minimize a test corpus of input files
-#
-# based on afl-cmin bash script written by Michal Zalewski
-# rewritten by Heiko Eißfeldt (hexcoder-)
-#
-# uses getopt.awk package from Arnold Robbins
-#
-# external tools used by this script:
-# test
-# grep
-# rm
-# mkdir
-# ln
-# cp
-# pwd
-# which
-# cd
-# find
-# stat
-# sort
-# cut
-# and afl-showmap from this project :-)
-
-# getopt.awk --- Do C library getopt(3) function in awk
-
-# External variables:
-#    Optind -- index in ARGV of first nonoption argument
-#    Optarg -- string value of argument to current option
-#    Opterr -- if nonzero, print our own diagnostic
-#    Optopt -- current option letter
-
-# Returns:
-#    -1     at end of options
-#    "?"    for unrecognized option
-#    <c>    a character representing the current option
-
-# Private Data:
-#    _opti  -- index in multiflag option, e.g., -abc
-
-function getopt(argc, argv, options,    thisopt, i)
-{
-    if (length(options) == 0)    # no options given
-        return -1
-
-    if (argv[Optind] == "--") {  # all done
-        Optind++
-        _opti = 0
-        return -1
-    } else if (argv[Optind] !~ /^-[^:[:space:]]/) {
-        _opti = 0
-        return -1
-    }
-    if (_opti == 0)
-        _opti = 2
-    thisopt = substr(argv[Optind], _opti, 1)
-    Optopt = thisopt
-    i = index(options, thisopt)
-    if (i == 0) {
-        if (Opterr)
-            printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
-        if (_opti >= length(argv[Optind])) {
-            Optind++
-            _opti = 0
-        } else
-            _opti++
-        return "?"
-    }
-    if (substr(options, i + 1, 1) == ":") {
-        # get option argument
-        if (length(substr(argv[Optind], _opti + 1)) > 0)
-            Optarg = substr(argv[Optind], _opti + 1)
-        else
-            Optarg = argv[++Optind]
-        _opti = 0
-    } else
-        Optarg = ""
-    if (_opti == 0 || _opti >= length(argv[Optind])) {
-        Optind++
-        _opti = 0
-    } else
-        _opti++
-    return thisopt
-}
-
-BEGIN {
-    Opterr = 1    # default is to diagnose
-    Optind = 1    # skip ARGV[0]
-
-    # test program
-    if (_getopt_test) {
-        while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1)
-            printf("c = <%c>, Optarg = <%s>\n",
-                                       _go_c, Optarg)
-        printf("non-option arguments:\n")
-        for (; Optind < ARGC; Optind++)
-            printf("\tARGV[%d] = <%s>\n",
-                                    Optind, ARGV[Optind])
-    }
-}
-
-function usage() {
-   print \
-"Usage: afl-cmin [ options ] -- /path/to/target_app [ ... ]\n" \
-"\n" \
-"Required parameters:\n" \
-"\n" \
-"  -i dir        - input directory with starting corpus\n" \
-"  -o dir        - output directory for minimized files\n" \
-"\n" \
-"Execution control settings:\n" \
-"\n" \
-"  -f file       - location read by the fuzzed program (stdin)\n" \
-"  -m megs       - memory limit for child process ("mem_limit" MB)\n" \
-"  -t msec       - run time limit for child process (none)\n" \
-"  -Q            - use binary-only instrumentation (QEMU mode)\n" \
-"  -U            - use unicorn-based instrumentation (unicorn mode)\n" \
-"\n" \
-"Minimization settings:\n" \
-"  -C            - keep crashing inputs, reject everything else\n" \
-"  -e            - solve for edge coverage only, ignore hit counts\n" \
-"\n" \
-"For additional tips, please consult docs/README.md\n" \
-"\n" \
-      > "/dev/stderr"
-   exit 1
-}
-
-function exists_and_is_executable(binarypath) {
-  return 0 == system("test -f "binarypath" -a -x "binarypath)
-}
-
-BEGIN {
-  print "corpus minimization tool for afl++ (awk version)\n"
-
-  # defaults
-  extra_par = ""
-  # process options
-  Opterr = 1    # default is to diagnose
-  Optind = 1    # skip ARGV[0]
-  while ((_go_c = getopt(ARGC, ARGV, "hi:o:f:m:t:eCQU?")) != -1) {
-    if (_go_c == "i") {
-      if (!Optarg) usage()
-      if (in_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
-      in_dir = Optarg
-      continue
-    } else 
-    if (_go_c == "o") {
-      if (!Optarg) usage()
-      if (out_dir) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
-      out_dir = Optarg
-      continue
-    } else 
-    if (_go_c == "f") {
-      if (!Optarg) usage()
-      if (stdin_file) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
-      stdin_file = Optarg
-      continue
-    } else 
-    if (_go_c == "m") {
-      if (!Optarg) usage()
-      if (mem_limit) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
-      mem_limit = Optarg
-      mem_limit_given = 1
-      continue
-    } else 
-    if (_go_c == "t") {
-      if (!Optarg) usage()
-      if (timeout) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
-      timeout = Optarg
-      continue
-    } else 
-    if (_go_c == "C") {
-      ENVIRON["AFL_CMIN_CRASHES_ONLY"] = 1
-      continue
-    } else 
-    if (_go_c == "e") {
-      extra_par = extra_par " -e"
-      continue
-    } else 
-    if (_go_c == "Q") {
-      if (qemu_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
-      extra_par = extra_par " -Q"
-      if ( !mem_limit_given ) mem_limit = "250"
-      qemu_mode = 1
-      continue
-    } else 
-    if (_go_c == "U") {
-      if (unicorn_mode) { print "Option "_go_c" is only allowed once" > "/dev/stderr"}
-      extra_par = extra_par " -U"
-      if ( !mem_limit_given ) mem_limit = "250"
-      unicorn_mode = 1
-      continue
-    } else 
-    if (_go_c == "?") {
-      exit 1
-    } else 
-      usage()
-  } # while options
-
-  if (!mem_limit) mem_limit = 200
-  if (!timeout) timeout = "none"
-
-  # get program args
-  i = 0
-  prog_args_string = ""
-  for (; Optind < ARGC; Optind++) {
-    prog_args[i++] = ARGV[Optind]
-    if (i > 1)
-      prog_args_string = prog_args_string" "ARGV[Optind]
-  }
-
-  # sanity checks
-  if (!prog_args[0] || !in_dir || !out_dir) usage()
-
-  target_bin = prog_args[0] 
-
-  # Do a sanity check to discourage the use of /tmp, since we can't really
-  # handle this safely from an awk script.
-
-  if (!ENVIRON["AFL_ALLOW_TMP"]) {
-    dirlist[0] = in_dir
-    dirlist[1] = target_bin
-    dirlist[2] = out_dir
-    dirlist[3] = stdin_file
-    "pwd" | getline dirlist[4] # current directory
-    for (dirind in dirlist) {
-      dir = dirlist[dirind]
-
-      if (dir ~ /^(\/var)?\/tmp/) {
-        print "[-] Error: do not use this script in /tmp or /var/tmp." > "/dev/stderr"
-        exit 1
-      }
-    }
-    delete dirlist
-  }
-
-  # If @@ is specified, but there's no -f, let's come up with a temporary input
-  # file name.
-
-  trace_dir = out_dir "/.traces"
-
-  if (!stdin_file) {
-    found_atat = 0
-    for (prog_args_ind in prog_args) {
-      if ("@@" == prog_args[prog_args_ind]) {
-        found_atat = 1
-        break
-      }
-    }
-    if (found_atat) {
-      stdin_file = trace_dir "/.cur_input"
-    }
-  }
-
-  # Check for obvious errors.
-
-  if (mem_limit && mem_limit != "none" && mem_limit < 5) {
-    print "[-] Error: dangerously low memory limit." > "/dev/stderr"
-    exit 1
-  }
-
-  if (timeout && timeout != "none" && timeout < 10) {
-    print "[-] Error: dangerously low timeout." > "/dev/stderr"
-    exit 1
-  }
-
-  if (target_bin && !exists_and_is_executable(target_bin)) {
-
-    "which "target_bin" 2>/dev/null" | getline tnew
-    if (!tnew || !exists_and_is_executable(tnew)) {
-      print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr"
-      exit 1
-    }
-    target_bin = tnew
-  }
-
-  if (!ENVIRON["AFL_SKIP_BIN_CHECK"] && !qemu_mode && !unicorn_mode) {
-    if (0 != system( "grep -q __AFL_SHM_ID "target_bin )) {
-      print "[-] Error: binary '"target_bin"' doesn't appear to be instrumented." > "/dev/stderr"
-      exit 1
-    }
-  }
-
-  if (0 != system( "test -d "in_dir )) {
-    print "[-] Error: directory '"in_dir"' not found." > "/dev/stderr"
-    exit 1
-  }
-
-  if (0 == system( "test -d "in_dir"/queue" )) {
-    in_dir = in_dir "/queue"
-  }
-
-  system("rm -rf "trace_dir" 2>/dev/null");
-  system("rm "out_dir"/id[:_]* 2>/dev/null")
-
-  if (0 == system( "test -d "out_dir" -a -e "out_dir"/*" )) {
-    print "[-] Error: directory '"out_dir"' exists and is not empty - delete it first." > "/dev/stderr"
-    exit 1
-  }
-
-  if (stdin_file) {
-    # truncate input file
-    printf "" > stdin_file
-    close( stdin_file )
-  }
-
-  if (!ENVIRON["AFL_PATH"]) {
-    if (0 == system("test -f afl-cmin.awk")) {
-      showmap = "./afl-showmap"
-    } else {
-      "which afl-showmap 2>/dev/null" | getline showmap
-    }
-  } else {
-    showmap = ENVIRON["AFL_PATH"] "/afl-showmap"
-  }
-
-  if (!showmap || 0 != system("test -x "showmap )) {
-    print "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." > "/dev/stderr"
-    exit 1
-  }
-  
-  # get list of input filenames sorted by size
-  i = 0
-  # yuck, gnu stat is option incompatible to bsd stat
-  # we use a heuristic to differentiate between
-  # GNU stat and other stats
-  "stat --version 2>/dev/null" | getline statversion
-  if (statversion ~ /GNU coreutils/) {
-    stat_format = "-c '%s %n'" # GNU
-  } else {
-    stat_format = "-f '%z %N'" # *BSD, MacOS
-  }
-  while ("cd "in_dir" && find . -type f -exec stat "stat_format" \\{\\} \\; | sort -n | cut -d' ' -f2-" | getline) {
-    infilesSmallToBig[i++] = $0
-  }
-  in_count = i
-
-  first_file = infilesSmallToBig[0]
-  
-  # Make sure that we're not dealing with a directory.
-
-  if (0 == system("test -d "in_dir"/"first_file)) {
-    print "[-] Error: The input directory contains subdirectories - please fix." > "/dev/stderr"
-    exit 1
-  }
-
-  # Check for the more efficient way to copy files...
-  if (0 != system("mkdir -p -m 0700 "trace_dir)) {
-    print "[-] Error: Cannot create directory "trace_dir > "/dev/stderr"
-    exit 1
-  }
-
-  if (0 == system("ln "in_dir"/"first_file" "trace_dir"/.link_test")) {
-    cp_tool = "ln"
-  } else {
-    cp_tool = "cp"
-  }
-
-  # Make sure that we can actually get anything out of afl-showmap before we
-  # waste too much time.
-
-  print "[*] Testing the target binary..."
-
-  if (!stdin_file) {
-    system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"")
-  } else {
-    system("cp "in_dir"/"first_file" "stdin_file)
-    system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" <dev/null")
-  }
-
-  first_count = 0
-
-  runtest = trace_dir"/.run_test"
-  while ((getline < runtest) > 0) {
-    ++first_count
-  }
-
-  if (first_count) {
-    print "[+] OK, "first_count" tuples recorded."
-  } else {
-    print "[-] Error: no instrumentation output detected (perhaps crash or timeout)." > "/dev/stderr"
-    if (!ENVIRON["AFL_KEEP_TRACES"]) {
-      system("rm -rf "trace_dir" 2>/dev/null")
-    }
-    exit 1
-  }
-
-  # Let's roll!
-
-  #############################
-  # STEP 1: Collecting traces #
-  #############################
-
-  print "[*] Obtaining traces for "in_count" input files in '"in_dir"'."
-
-  cur = 0;
-  if (!stdin_file) {
-    while (cur < in_count) {
-      fn = infilesSmallToBig[cur]
-      ++cur;
-      printf "\r    Processing file "cur"/"in_count
-      system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"fn"\"")
-    }
-  } else {
-    while (cur < in_count) {
-      fn = infilesSmallToBig[cur]
-      ++cur
-      printf "\r    Processing file "cur"/"in_count
-      system("cp "in_dir"/"fn" "stdin_file)
-      system( "AFL_CMIN_ALLOW_ANY=1 \""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/"fn"\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" <dev/null")
-    }
-  }
-
-  print ""
-
-
-  #######################################################
-  # STEP 2: register smallest input file for each tuple #
-  # STEP 3: copy that file (at most once)               #
-  #######################################################
-
-  print "[*] Processing traces for input files in '"in_dir"'."
-
-  cur = 0
-  out_count = 0
-  tuple_count = 0
-
-  while (cur < in_count) {
-    fn = infilesSmallToBig[cur]
-    ++cur
-    printf "\r    Processing file "cur"/"in_count
-    # create path for the trace file from afl-showmap
-    tracefile_path = trace_dir"/"fn
-    # gather all keys, and count them
-    while ((getline line < tracefile_path) > 0) {
-        key = line
-        if (!(key in key_count)) {
-          ++tuple_count
-        }
-        ++key_count[key]
-        if (! (key in best_file)) {
-            # this is the best file for this key
-            best_file[key] = fn
-            # copy file unless already done
-            if (! (fn in file_already_copied)) {
-                system(cp_tool" "in_dir"/"fn" "out_dir"/"fn)
-                file_already_copied[fn] = ""
-                ++out_count
-            }
-        }
-    }
-    close(tracefile_path)
-  }
-
-  print ""
-  print "[+] Found "tuple_count" unique tuples across "in_count" files."
-
-  if (out_count == 1) {
-    print "[!] WARNING: All test cases had the same traces, check syntax!"
-  }
-  print "[+] Narrowed down to "out_count" files, saved in '"out_dir"'."
-
-  if (!ENVIRON["AFL_KEEP_TRACES"]) {
-    system("rm -rf "trace_dir" 2>/dev/null")
-  }
-
-  exit 0
-}
diff --git a/docs/ChangeLog b/docs/ChangeLog
index bb3537dd..33c6f618 100644
--- a/docs/ChangeLog
+++ b/docs/ChangeLog
@@ -25,6 +25,8 @@ Version ++2.60d (develop):
      - show in the help output for which llvm version it was compiled for
      - now does not need to be recompiled between trace-pc and pass
        instrumentation. compile normally and set AFL_LLVM_USE_TRACE_PC :)
+  - afl-cmin is now a sh script (invoking awk) instead of bash for portability
+    the original script is still present as afl-cmin.bash
   - added blacklisted function check in all modules of llvm_mode
   - added fix from Debian project to compile libdislocator and libtokencap
 
diff --git a/test/test.sh b/test/test.sh
index 3473155f..0ae6fd09 100755
--- a/test/test.sh
+++ b/test/test.sh
@@ -150,7 +150,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && {
     }
     echo 000000000000000000000000 > in/in2
     mkdir -p in2
-    ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null
+    ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null 2>&1
     CNT=`ls in2/ | wc -l`
     case "$CNT" in
 1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized testcase numbers" ;;
-- 
cgit 1.4.1


From 2c6847bfa0b57f3330b1aab9b91d935757db51b7 Mon Sep 17 00:00:00 2001
From: van Hauser <vh@thc.org>
Date: Sat, 25 Jan 2020 16:11:42 +0100
Subject: added whitelist+blacklist to all llvm_mode passes

---
 docs/ChangeLog                         |   2 +-
 llvm_mode/LLVMInsTrim.so.cc            |  29 +-------
 llvm_mode/MarkNodes.cc                 |  19 ++----
 llvm_mode/compare-transform-pass.so.cc |  94 ++++++++++++++++++++++++++
 llvm_mode/split-compares-pass.so.cc    | 118 +++++++++++++++++++++++++++++++++
 llvm_mode/split-switches-pass.so.cc    | 113 +++++++++++++++++++++++++++++++
 test/test.sh                           |  16 ++---
 7 files changed, 343 insertions(+), 48 deletions(-)

(limited to 'docs')

diff --git a/docs/ChangeLog b/docs/ChangeLog
index 33c6f618..c1d53379 100644
--- a/docs/ChangeLog
+++ b/docs/ChangeLog
@@ -27,7 +27,7 @@ Version ++2.60d (develop):
        instrumentation. compile normally and set AFL_LLVM_USE_TRACE_PC :)
   - afl-cmin is now a sh script (invoking awk) instead of bash for portability
     the original script is still present as afl-cmin.bash
-  - added blacklisted function check in all modules of llvm_mode
+  - added blacklist and whitelisting function check in all modules of llvm_mode
   - added fix from Debian project to compile libdislocator and libtokencap
 
 
diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc
index 11451b43..24df6d42 100644
--- a/llvm_mode/LLVMInsTrim.so.cc
+++ b/llvm_mode/LLVMInsTrim.so.cc
@@ -144,19 +144,6 @@ struct InsTrim : public ModulePass {
     // this is our default
     MarkSetOpt = true;
 
-    /*    // I dont think this makes sense to port into LLVMInsTrim
-          char* inst_ratio_str = getenv("AFL_INST_RATIO");
-          unsigned int inst_ratio = 100;
-          if (inst_ratio_str) {
-
-           if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || !inst_ratio ||
-       inst_ratio > 100) FATAL("Bad value of AFL_INST_RATIO (must be between 1
-       and 100)");
-
-          }
-
-    */
-
     LLVMContext &C = M.getContext();
     IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
     IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
@@ -203,8 +190,7 @@ struct InsTrim : public ModulePass {
 
           if (instFilename.str().empty()) {
 
-            /* If the original location is empty, try using the inlined location
-             */
+            /* If the original location is empty, try using the inlined location */
             DILocation *oDILoc = cDILoc->getInlinedAt();
             if (oDILoc) {
 
@@ -432,28 +418,19 @@ struct InsTrim : public ModulePass {
         IRB.CreateStore(Incr, MapPtrIdx)
             ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
-        /* Set prev_loc to cur_loc >> 1 */
-        /*
-        StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int32Ty, L >> 1),
-        OldPrev); Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C,
-        None));
-        */
-
         total_instr++;
 
       }
 
     }
 
-    OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n" /*", ratio
-                                                                %u%%)."*/
-        ,
+    OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n",
         total_instr, total_rs, total_hs,
         getenv("AFL_HARDEN")
             ? "hardened"
             : ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN"))
                    ? "ASAN/MSAN"
-                   : "non-hardened") /*, inst_ratio*/);
+                   : "non-hardened"));
     return false;
 
   }
diff --git a/llvm_mode/MarkNodes.cc b/llvm_mode/MarkNodes.cc
index 2aeeda8d..caa8cede 100644
--- a/llvm_mode/MarkNodes.cc
+++ b/llvm_mode/MarkNodes.cc
@@ -65,16 +65,11 @@ void buildCFG(Function *F) {
 
   }
 
-  // uint32_t FakeID = 0;
   for (auto S = F->begin(), E = F->end(); S != E; ++S) {
 
     BasicBlock *BB = &*S;
     uint32_t    MyID = LMap[BB];
-    // if (succ_begin(BB) == succ_end(BB)) {
 
-    // Succs[MyID].push_back(FakeID);
-    // Marked.insert(MyID);
-    //}
     for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
 
       Succs[MyID].push_back(LMap[*I]);
@@ -113,7 +108,7 @@ void DFStree(size_t now_id) {
 
 }
 
-void turnCFGintoDAG(Function *F) {
+void turnCFGintoDAG() {
 
   tSuccs = Succs;
   tag.resize(Blocks.size());
@@ -176,7 +171,7 @@ void DFS(uint32_t now) {
 
 }
 
-void DominatorTree(Function *F) {
+void DominatorTree() {
 
   if (Blocks.empty()) return;
   uint32_t s = start_point;
@@ -390,7 +385,7 @@ void MarkSubGraph(uint32_t ss, uint32_t tt) {
 
 }
 
-void MarkVertice(Function *F) {
+void MarkVertice() {
 
   uint32_t s = start_point;
 
@@ -411,8 +406,6 @@ void MarkVertice(Function *F) {
 
   timeStamp = 0;
   uint32_t t = 0;
-  // MarkSubGraph(s, t);
-  // return;
 
   while (s != t) {
 
@@ -432,9 +425,9 @@ std::pair<std::vector<BasicBlock *>, std::vector<BasicBlock *> > markNodes(
   reset();
   labelEachBlock(F);
   buildCFG(F);
-  turnCFGintoDAG(F);
-  DominatorTree::DominatorTree(F);
-  MarkVertice(F);
+  turnCFGintoDAG();
+  DominatorTree::DominatorTree();
+  MarkVertice();
 
   std::vector<BasicBlock *> Result, ResultAbove;
   for (uint32_t x : Markabove) {
diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc
index 0ccce875..5d924b63 100644
--- a/llvm_mode/compare-transform-pass.so.cc
+++ b/llvm_mode/compare-transform-pass.so.cc
@@ -18,7 +18,13 @@
 #include <stdlib.h>
 #include <unistd.h>
 
+#include <list>
+#include <string>
+#include <fstream>
+#include <sys/time.h>
+
 #include "llvm/ADT/Statistic.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
@@ -42,6 +48,23 @@ class CompareTransform : public ModulePass {
   static char ID;
   CompareTransform() : ModulePass(ID) {
 
+    char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST");
+    if (instWhiteListFilename) {
+
+      std::string   line;
+      std::ifstream fileStream;
+      fileStream.open(instWhiteListFilename);
+      if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST");
+      getline(fileStream, line);
+      while (fileStream) {
+
+        myWhitelist.push_back(line);
+        getline(fileStream, line);
+
+      }
+
+    }
+
   }
 
   bool runOnModule(Module &M) override;
@@ -57,6 +80,9 @@ class CompareTransform : public ModulePass {
 
   }
 
+ protected:
+  std::list<std::string> myWhitelist;
+
  private:
   bool transformCmps(Module &M, const bool processStrcmp,
                      const bool processMemcmp, const bool processStrncmp,
@@ -104,6 +130,74 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
 
     for (auto &BB : F) {
 
+      if (!myWhitelist.empty()) {
+
+        BasicBlock::iterator IP = BB.getFirstInsertionPt();
+
+        bool instrumentBlock = false;
+
+        /* Get the current location using debug information.
+         * For now, just instrument the block if we are not able
+         * to determine our location. */
+        DebugLoc Loc = IP->getDebugLoc();
+        if (Loc) {
+
+          DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode());
+
+          unsigned int instLine = cDILoc->getLine();
+          StringRef    instFilename = cDILoc->getFilename();
+
+          if (instFilename.str().empty()) {
+
+            /* If the original location is empty, try using the inlined location
+             */
+            DILocation *oDILoc = cDILoc->getInlinedAt();
+            if (oDILoc) {
+
+              instFilename = oDILoc->getFilename();
+              instLine = oDILoc->getLine();
+
+            }
+
+          }
+
+          (void)instLine;
+
+          /* Continue only if we know where we actually are */
+          if (!instFilename.str().empty()) {
+
+            for (std::list<std::string>::iterator it = myWhitelist.begin();
+                 it != myWhitelist.end(); ++it) {
+
+              /* We don't check for filename equality here because
+               * filenames might actually be full paths. Instead we
+               * check that the actual filename ends in the filename
+               * specified in the list. */
+              if (instFilename.str().length() >= it->length()) {
+
+                if (instFilename.str().compare(
+                        instFilename.str().length() - it->length(),
+                        it->length(), *it) == 0) {
+
+                  instrumentBlock = true;
+                  break;
+
+                }
+
+              }
+
+            }
+
+          }
+
+        }
+
+        /* Either we couldn't figure out our location or the location is
+         * not whitelisted, so we skip instrumentation. */
+        if (!instrumentBlock) continue;
+
+      }
+
       for (auto &IN : BB) {
 
         CallInst *callInst = nullptr;
diff --git a/llvm_mode/split-compares-pass.so.cc b/llvm_mode/split-compares-pass.so.cc
index eeac4a55..bc25b322 100644
--- a/llvm_mode/split-compares-pass.so.cc
+++ b/llvm_mode/split-compares-pass.so.cc
@@ -15,7 +15,17 @@
  * limitations under the License.
  */
 
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <list>
+#include <string>
+#include <fstream>
+#include <sys/time.h>
+
 #include "llvm/Pass.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
@@ -35,6 +45,41 @@ class SplitComparesTransform : public ModulePass {
   static char ID;
   SplitComparesTransform() : ModulePass(ID) {
 
+    char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST");
+    if (instWhiteListFilename) {
+
+      std::string   line;
+      std::ifstream fileStream;
+      fileStream.open(instWhiteListFilename);
+      if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST");
+      getline(fileStream, line);
+      while (fileStream) {
+
+        myWhitelist.push_back(line);
+        getline(fileStream, line);
+
+      }
+
+    }
+
+  }
+
+  static bool isBlacklisted(const Function *F) {
+
+    static const SmallVector<std::string, 5> Blacklist = {
+
+        "asan.", "llvm.", "sancov.", "__ubsan_handle_", "ign."
+
+    };
+
+    for (auto const &BlacklistFunc : Blacklist) {
+
+      if (F->getName().startswith(BlacklistFunc)) { return true; }
+
+    }
+
+    return false;
+
   }
 
   bool runOnModule(Module &M) override;
@@ -49,6 +94,9 @@ class SplitComparesTransform : public ModulePass {
 
   }
 
+ protected:
+  std::list<std::string> myWhitelist;
+
  private:
   int enableFPSplit;
 
@@ -77,8 +125,78 @@ bool SplitComparesTransform::simplifyCompares(Module &M) {
    * all integer comparisons with >= and <= predicates to the icomps vector */
   for (auto &F : M) {
 
+    if (isBlacklisted(&F)) continue;
+
     for (auto &BB : F) {
 
+      if (!myWhitelist.empty()) {
+
+        bool instrumentBlock = false;
+
+        BasicBlock::iterator IP = BB.getFirstInsertionPt();
+
+        /* Get the current location using debug information.
+         * For now, just instrument the block if we are not able
+         * to determine our location. */
+        DebugLoc Loc = IP->getDebugLoc();
+        if (Loc) {
+
+          DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode());
+
+          unsigned int instLine = cDILoc->getLine();
+          StringRef    instFilename = cDILoc->getFilename();
+
+          if (instFilename.str().empty()) {
+
+            /* If the original location is empty, try using the inlined location
+             */
+            DILocation *oDILoc = cDILoc->getInlinedAt();
+            if (oDILoc) {
+
+              instFilename = oDILoc->getFilename();
+              instLine = oDILoc->getLine();
+
+            }
+
+          }
+
+          (void)instLine;
+
+          /* Continue only if we know where we actually are */
+          if (!instFilename.str().empty()) {
+
+            for (std::list<std::string>::iterator it = myWhitelist.begin();
+                 it != myWhitelist.end(); ++it) {
+
+              /* We don't check for filename equality here because
+               * filenames might actually be full paths. Instead we
+               * check that the actual filename ends in the filename
+               * specified in the list. */
+              if (instFilename.str().length() >= it->length()) {
+
+                if (instFilename.str().compare(
+                        instFilename.str().length() - it->length(),
+                        it->length(), *it) == 0) {
+
+                  instrumentBlock = true;
+                  break;
+
+                }
+
+              }
+
+            }
+
+          }
+
+        }
+
+        /* Either we couldn't figure out our location or the location is
+         * not whitelisted, so we skip instrumentation. */
+        if (!instrumentBlock) continue;
+
+      }
+
       for (auto &IN : BB) {
 
         CmpInst *selectcmpInst = nullptr;
diff --git a/llvm_mode/split-switches-pass.so.cc b/llvm_mode/split-switches-pass.so.cc
index 2743a71a..3a2838c0 100644
--- a/llvm_mode/split-switches-pass.so.cc
+++ b/llvm_mode/split-switches-pass.so.cc
@@ -18,7 +18,13 @@
 #include <stdlib.h>
 #include <unistd.h>
 
+#include <list>
+#include <string>
+#include <fstream>
+#include <sys/time.h>
+
 #include "llvm/ADT/Statistic.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
@@ -42,6 +48,41 @@ class SplitSwitchesTransform : public ModulePass {
   static char ID;
   SplitSwitchesTransform() : ModulePass(ID) {
 
+    char *instWhiteListFilename = getenv("AFL_LLVM_WHITELIST");
+    if (instWhiteListFilename) {
+
+      std::string   line;
+      std::ifstream fileStream;
+      fileStream.open(instWhiteListFilename);
+      if (!fileStream) report_fatal_error("Unable to open AFL_LLVM_WHITELIST");
+      getline(fileStream, line);
+      while (fileStream) {
+
+        myWhitelist.push_back(line);
+        getline(fileStream, line);
+
+      }
+
+    }
+
+  }
+
+  static bool isBlacklisted(const Function *F) {
+
+    static const SmallVector<std::string, 5> Blacklist = {
+
+        "asan.", "llvm.", "sancov.", "__ubsan_handle_", "ign."
+
+    };
+
+    for (auto const &BlacklistFunc : Blacklist) {
+
+      if (F->getName().startswith(BlacklistFunc)) { return true; }
+
+    }
+
+    return false;
+
   }
 
   bool runOnModule(Module &M) override;
@@ -71,6 +112,9 @@ class SplitSwitchesTransform : public ModulePass {
 
   typedef std::vector<CaseExpr> CaseVector;
 
+ protected:
+  std::list<std::string> myWhitelist;
+
  private:
   bool        splitSwitches(Module &M);
   bool        transformCmps(Module &M, const bool processStrcmp,
@@ -268,10 +312,79 @@ bool SplitSwitchesTransform::splitSwitches(Module &M) {
    * all switches to switches vector for later processing */
   for (auto &F : M) {
 
+    if (isBlacklisted(&F)) continue;
+
     for (auto &BB : F) {
 
       SwitchInst *switchInst = nullptr;
 
+      if (!myWhitelist.empty()) {
+
+        bool                 instrumentBlock = false;
+        BasicBlock::iterator IP = BB.getFirstInsertionPt();
+
+        /* Get the current location using debug information.
+         * For now, just instrument the block if we are not able
+         * to determine our location. */
+        DebugLoc Loc = IP->getDebugLoc();
+        if (Loc) {
+
+          DILocation *cDILoc = dyn_cast<DILocation>(Loc.getAsMDNode());
+
+          unsigned int instLine = cDILoc->getLine();
+          StringRef    instFilename = cDILoc->getFilename();
+
+          if (instFilename.str().empty()) {
+
+            /* If the original location is empty, try using the inlined location
+             */
+            DILocation *oDILoc = cDILoc->getInlinedAt();
+            if (oDILoc) {
+
+              instFilename = oDILoc->getFilename();
+              instLine = oDILoc->getLine();
+
+            }
+
+          }
+
+          (void)instLine;
+
+          /* Continue only if we know where we actually are */
+          if (!instFilename.str().empty()) {
+
+            for (std::list<std::string>::iterator it = myWhitelist.begin();
+                 it != myWhitelist.end(); ++it) {
+
+              /* We don't check for filename equality here because
+               * filenames might actually be full paths. Instead we
+               * check that the actual filename ends in the filename
+               * specified in the list. */
+              if (instFilename.str().length() >= it->length()) {
+
+                if (instFilename.str().compare(
+                        instFilename.str().length() - it->length(),
+                        it->length(), *it) == 0) {
+
+                  instrumentBlock = true;
+                  break;
+
+                }
+
+              }
+
+            }
+
+          }
+
+        }
+
+        /* Either we couldn't figure out our location or the location is
+         * not whitelisted, so we skip instrumentation. */
+        if (!instrumentBlock) continue;
+
+      }
+
       if ((switchInst = dyn_cast<SwitchInst>(BB.getTerminator()))) {
 
         if (switchInst->getNumCases() < 1) continue;
diff --git a/test/test.sh b/test/test.sh
index 93a4e008..9676d22d 100755
--- a/test/test.sh
+++ b/test/test.sh
@@ -153,10 +153,10 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" && {
     ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null
     CNT=`ls in2/ | wc -l`
     case "$CNT" in
-1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;;
-*) $ECHO "$RED[!] afl-cmin did not correctly minimizethe  number of testcases"
-       CODE=1
-       ;;
+      *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;;
+      *)  $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases"
+          CODE=1
+          ;;
     esac
     ../afl-tmin -i in/in2 -o in2/in2 -- ./test-instr.plain > /dev/null 2>&1
     SIZE=`ls -l in2/in2 2> /dev/null | awk '{print$5}'`
@@ -259,10 +259,10 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
       ../afl-cmin -i in -o in2 -- ./test-instr.plain > /dev/null
       CNT=`ls in2/ | wc -l`
       case "$CNT" in
-1| *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;;
-*) $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases"
-         CODE=1
-         ;;
+        *1) $ECHO "$GREEN[+] afl-cmin correctly minimized the number of testcases" ;;
+        *)  $ECHO "$RED[!] afl-cmin did not correctly minimize the number of testcases"
+            CODE=1
+            ;;
       esac
       ../afl-tmin -i in/in2 -o in2/in2 -- ./test-instr.plain > /dev/null 2>&1
       SIZE=`ls -l in2/in2 2> /dev/null | awk '{print$5}'`
-- 
cgit 1.4.1


From 17f0aad0f0322a0c56040b3bd93d2bf020a3f3fb Mon Sep 17 00:00:00 2001
From: van Hauser <vh@thc.org>
Date: Mon, 27 Jan 2020 13:06:00 +0100
Subject: updated binary_fuzzing document

---
 docs/binaryonly_fuzzing.md  | 153 ++++++++++++++++++++++++++++++++++++++++++++
 docs/binaryonly_fuzzing.txt | 144 -----------------------------------------
 2 files changed, 153 insertions(+), 144 deletions(-)
 create mode 100644 docs/binaryonly_fuzzing.md
 delete mode 100644 docs/binaryonly_fuzzing.txt

(limited to 'docs')

diff --git a/docs/binaryonly_fuzzing.md b/docs/binaryonly_fuzzing.md
new file mode 100644
index 00000000..d22e4ce2
--- /dev/null
+++ b/docs/binaryonly_fuzzing.md
@@ -0,0 +1,153 @@
+#Fuzzing binary-only programs with afl++
+
+afl++, libfuzzer and others are great if you have the source code, and
+it allows for very fast and coverage guided fuzzing.
+
+However, if there is only the binary program and no source code available,
+then standard `afl-fuzz -n` (dumb mode) is not effective.
+
+The following is a description of how these binaries can be fuzzed with afl++
+
+!!!!!
+TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then
+      use afl -Q qemu_mode, or better: use both in parallel.
+!!!!!
+
+
+##QEMU
+Qemu is the "native" solution to the program.
+It is available in the ./qemu_mode/ directory and once compiled it can
+be accessed by the afl-fuzz -Q command line option.
+The speed decrease is at about 50%
+It is the easiest to use alternative and even works for cross-platform binaries.
+
+Note that there is also honggfuzz: [https://github.com/google/honggfuzz](https://github.com/google/honggfuzz)
+which now has a qemu_mode, but its performance is just 1.5%!
+
+As it is included in afl++ this needs no URL.
+
+
+##WINE+QEMU
+Wine mode can run Win32 PE binaries with the QEMU instrumentation.
+It needs Wine, python3 and the pefile python package installed.
+
+As it is included in afl++ this needs no URL.
+
+
+##UNICORN
+Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar.
+In contrast to QEMU, Unicorn does not offer a full system or even userland
+emulation. Runtime environment and/or loaders have to be written from scratch,
+if needed. On top, block chaining has been removed. This means the speed boost
+introduced in  the patched QEMU Mode of afl++ cannot simply be ported over to
+Unicorn. For further information, check out ./unicorn_mode.txt.
+
+As it is included in afl++ this needs no URL.
+
+
+##DYNINST
+Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio
+(see far below). However whereas Pintool and Dynamorio work at runtime, dyninst
+instruments the target at load time, and then let it run - or save the
+binary with the changes.
+This is great for some things, e.g. fuzzing, and not so effective for others,
+e.g. malware analysis.
+
+So what we can do with dyninst is taking every basic block, and put afl's
+instrumention code in there - and then save the binary.
+Afterwards we can just fuzz the newly saved target binary with afl-fuzz.
+Sounds great? It is. The issue though - it is a non-trivial problem to
+insert instructions, which change addresses in the process space, so that
+everything is still working afterwards. Hence more often than not binaries
+crash when they are run.
+
+The speed decrease is about 15-35%, depending on the optimization options
+used with afl-dyninst.
+
+So if dyninst works, it is the best option available. Otherwise it just doesn't
+work well.
+
+[https://github.com/vanhauser-thc/afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst)
+
+
+##INTEL-PT
+If you have a newer Intel CPU, you can make use of Intels processor trace.
+The big issue with Intel's PT is the small buffer size and the complex
+encoding of the debug information collected through PT.
+This makes the decoding very CPU intensive and hence slow.
+As a result, the overall speed decrease is about 70-90% (depending on
+the implementation and other factors).
+
+There are two afl intel-pt implementations:
+
+1. [https://github.com/junxzm1990/afl-pt](https://github.com/junxzm1990/afl-pt)
+ => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel.
+
+2. [https://github.com/hunter-ht-2018/ptfuzzer](https://github.com/hunter-ht-2018/ptfuzzer)
+ => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must
+    be used. This one is faster than the other.
+
+Note that there is also honggfuzz: https://github.com/google/honggfuzz
+But its IPT performance is just 6%!
+
+
+##CORESIGHT
+Coresight is ARM's answer to Intel's PT.
+There is no implementation so far which handle coresight and getting
+it working on an ARM Linux is very difficult due to custom kernel building
+on embedded systems is difficult. And finding one that has coresight in
+the ARM chip is difficult too.
+My guess is that it is slower than Qemu, but faster than Intel PT.
+
+If anyone finds any coresight implementation for afl please ping me:
+vh@thc.org
+
+
+##FRIDA
+Frida is a dynamic instrumentation engine like Pintool, Dyninst and Dynamorio.
+What is special is that it is written Python, and scripted with Javascript.
+It is mostly used to reverse binaries on mobile phones however can be used
+everywhere.
+
+There is a WIP fuzzer available at [https://github.com/andreafioraldi/frida-fuzzer](https://github.com/andreafioraldi/frida-fuzzer)
+
+
+##PIN & DYNAMORIO
+Pintool and Dynamorio are dynamic instrumentation engines, and they can be
+used for getting basic block information at runtime.
+Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows
+whereas Dynamorio is additionally available for ARM and AARCH64.
+Dynamorio is also 10x faster than Pintool.
+
+The big issue with Dynamorio (and therefore Pintool too) is speed.
+Dynamorio has a speed decrease of 98-99%
+Pintool has a speed decrease of 99.5%
+
+Hence Dynamorio is the option to go for if everything fails, and Pintool
+only if Dynamorio fails too.
+
+Dynamorio solutions:
+  * [https://github.com/vanhauser-thc/afl-dynamorio](https://github.com/vanhauser-thc/afl-dynamorio)
+  * [https://github.com/mxmssh/drAFL](https://github.com/mxmssh/drAFL)
+  * [https://github.com/googleprojectzero/winafl/](https://github.com/googleprojectzero/winafl/) <= very good but windows only
+
+Pintool solutions:
+  * [https://github.com/vanhauser-thc/afl-pin](https://github.com/vanhauser-thc/afl-pin)
+  * [https://github.com/mothran/aflpin](https://github.com/mothran/aflpin)
+  * [https://github.com/spinpx/afl_pin_mode](https://github.com/spinpx/afl_pin_mode) <= only old Pintool version supported
+
+
+##Non-AFL solutions
+There are many binary-only fuzzing frameworks.
+Some are great for CTFs but don't work with large binaries, others are very
+slow but have good path discovery, some are very hard to set-up ...
+
+* QSYM: [https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym)
+* Manticore: [https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore)
+* S2E: [https://github.com/S2E](https://github.com/S2E)
+* <please send me any missing that are good>
+
+
+## Closing words
+
+That's it! News, corrections, updates? Send an email to vh@thc.org
diff --git a/docs/binaryonly_fuzzing.txt b/docs/binaryonly_fuzzing.txt
deleted file mode 100644
index f8d68cd8..00000000
--- a/docs/binaryonly_fuzzing.txt
+++ /dev/null
@@ -1,144 +0,0 @@
-
-Fuzzing binary-only programs with afl++
-=======================================
-
-afl++, libfuzzer and others are great if you have the source code, and
-it allows for very fast and coverage guided fuzzing.
-
-However, if there is only the binary program and no source code available,
-then standard `afl-fuzz -n` (dumb mode) is not effective.
-
-The following is a description of how these binaries can be fuzzed with afl++
-
-!!!!!
-TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then
-      use afl -Q qemu_mode, or better: use both in parallel.
-!!!!!
-
-
-QEMU
-----
-Qemu is the "native" solution to the program.
-It is available in the ./qemu_mode/ directory and once compiled it can
-be accessed by the afl-fuzz -Q command line option.
-The speed decrease is at about 50%
-It is the easiest to use alternative and even works for cross-platform binaries.
-
-As it is included in afl++ this needs no URL.
-
-WINE+QEMU
----------
-Wine mode can run Win32 PE binaries with the QEMU instrumentation.
-It needs Wine, python3 and the pefile python package installed.
-
-UNICORN
--------
-Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar.
-In contrast to QEMU, Unicorn does not offer a full system or even userland emulation.
-Runtime environment and/or loaders have to be written from scratch, if needed.
-On top, block chaining has been removed. This means the speed boost introduced in 
-the patched QEMU Mode of afl++ cannot simply be ported over to Unicorn.
-For further information, check out ./unicorn_mode.txt.
-
-
-DYNINST
--------
-Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio
-(see far below). However whereas Pintool and Dynamorio work at runtime, dyninst
-instruments the target at load time, and then let it run.
-This is great for some things, e.g. fuzzing, and not so effective for others,
-e.g. malware analysis.
-
-So what we can do with dyninst is taking every basic block, and put afl's
-instrumention code in there - and then save the binary.
-Afterwards we can just fuzz the newly saved target binary with afl-fuzz.
-Sounds great? It is. The issue though - it is a non-trivial problem to
-insert instructions, which change addresses in the process space, so
-everything is still working afterwards. Hence more often than not binaries
-crash when they are run (because of instrumentation).
-
-The speed decrease is about 15-35%, depending on the optimization options
-used with afl-dyninst.
-
-So if dyninst works, it is the best option available. Otherwise it just doesn't
-work well.
-
-https://github.com/vanhauser-thc/afl-dyninst
-
-
-INTEL-PT
---------
-If you have a newer Intel CPU, you can make use of Intels processor trace.
-The big issue with Intel's PT is the small buffer size and the complex
-encoding of the debug information collected through PT.
-This makes the decoding very CPU intensive and hence slow.
-As a result, the overall speed decrease is about 70-90% (depending on
-the implementation and other factors).
-
-There are two afl intel-pt implementations:
-
-1. https://github.com/junxzm1990/afl-pt
- => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel.
-
-2. https://github.com/hunter-ht-2018/ptfuzzer
- => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must
-    be used. This one is faster than the other.
-
-
-CORESIGHT
----------
-
-Coresight is ARM's answer to Intel's PT.
-There is no implementation so far which handle coresight and getting
-it working on an ARM Linux is very difficult due to custom kernel building
-on embedded systems is difficult. And finding one that has coresight in
-the ARM chip is difficult too.
-My guess is that it is slower than Qemu, but faster than Intel PT.
-If anyone finds any coresight implementation for afl please ping me:
-vh@thc.org
-
-
-PIN & DYNAMORIO
----------------
-
-Pintool and Dynamorio are dynamic instrumentation engines, and they can be
-used for getting basic block information at runtime.
-Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows
-whereas Dynamorio is additionally available for ARM and AARCH64.
-Dynamorio is also 10x faster than Pintool.
-
-The big issue with Dynamorio (and therefore Pintool too) is speed.
-Dynamorio has a speed decrease of 98-99%
-Pintool has a speed decrease of 99.5%
-
-Hence Dynamorio is the option to go for if everything fails, and Pintool
-only if Dynamorio fails too.
-
-Dynamorio solutions:
-  https://github.com/vanhauser-thc/afl-dynamorio
-  https://github.com/mxmssh/drAFL
-  https://github.com/googleprojectzero/winafl/ <= very good but windows only
-
-Pintool solutions:
-  https://github.com/vanhauser-thc/afl-pin
-  https://github.com/mothran/aflpin
-  https://github.com/spinpx/afl_pin_mode  <= only old Pintool version supported
-
-
-Non-AFL solutions
------------------
-
-There are many binary-only fuzzing frameworks. Some are great for CTFs but don't
-work with large binaries, others are very slow but have good path discovery,
-some are very hard to set-up ...
-
-QSYM: https://github.com/sslab-gatech/qsym
-Manticore: https://github.com/trailofbits/manticore
-S2E: https://github.com/S2E
-<please send me any missing that are good>
-
-
-
-That's it!
-News, corrections, updates?
-Email vh@thc.org
-- 
cgit 1.4.1


From 83481f9460f684883a66fdb38b55f6240a687f85 Mon Sep 17 00:00:00 2001
From: van Hauser <vh@thc.org>
Date: Mon, 27 Jan 2020 13:34:59 +0100
Subject: update binary_fuzzing doc

---
 docs/binaryonly_fuzzing.md | 212 +++++++++++++++++++++++----------------------
 1 file changed, 110 insertions(+), 102 deletions(-)

(limited to 'docs')

diff --git a/docs/binaryonly_fuzzing.md b/docs/binaryonly_fuzzing.md
index d22e4ce2..6eff30d7 100644
--- a/docs/binaryonly_fuzzing.md
+++ b/docs/binaryonly_fuzzing.md
@@ -1,153 +1,161 @@
-#Fuzzing binary-only programs with afl++
+# Fuzzing binary-only programs with afl++
 
-afl++, libfuzzer and others are great if you have the source code, and
-it allows for very fast and coverage guided fuzzing.
+  afl++, libfuzzer and others are great if you have the source code, and
+  it allows for very fast and coverage guided fuzzing.
 
-However, if there is only the binary program and no source code available,
-then standard `afl-fuzz -n` (dumb mode) is not effective.
+  However, if there is only the binary program and no source code available,
+  then standard `afl-fuzz -n` (dumb mode) is not effective.
 
-The following is a description of how these binaries can be fuzzed with afl++
+  The following is a description of how these binaries can be fuzzed with afl++
 
-!!!!!
-TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then
-      use afl -Q qemu_mode, or better: use both in parallel.
-!!!!!
+  !!!!!
+  TL;DR: try DYNINST with afl-dyninst. If it produces too many crashes then
+         use afl -Q qemu_mode, or better: use both in parallel.
+  !!!!!
 
 
-##QEMU
-Qemu is the "native" solution to the program.
-It is available in the ./qemu_mode/ directory and once compiled it can
-be accessed by the afl-fuzz -Q command line option.
-The speed decrease is at about 50%
-It is the easiest to use alternative and even works for cross-platform binaries.
+## QEMU
 
-Note that there is also honggfuzz: [https://github.com/google/honggfuzz](https://github.com/google/honggfuzz)
-which now has a qemu_mode, but its performance is just 1.5%!
+  Qemu is the "native" solution to the program.
+  It is available in the ./qemu_mode/ directory and once compiled it can
+  be accessed by the afl-fuzz -Q command line option.
+  The speed decrease is at about 50%.
+  It is the easiest to use alternative and even works for cross-platform binaries.
 
-As it is included in afl++ this needs no URL.
+  Note that there is also honggfuzz: [https://github.com/google/honggfuzz](https://github.com/google/honggfuzz)
+  which now has a qemu_mode, but its performance is just 1.5%!
 
+  As it is included in afl++ this needs no URL.
 
-##WINE+QEMU
-Wine mode can run Win32 PE binaries with the QEMU instrumentation.
-It needs Wine, python3 and the pefile python package installed.
 
-As it is included in afl++ this needs no URL.
+## WINE+QEMU
 
+  Wine mode can run Win32 PE binaries with the QEMU instrumentation.
+  It needs Wine, python3 and the pefile python package installed.
 
-##UNICORN
-Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar.
-In contrast to QEMU, Unicorn does not offer a full system or even userland
-emulation. Runtime environment and/or loaders have to be written from scratch,
-if needed. On top, block chaining has been removed. This means the speed boost
-introduced in  the patched QEMU Mode of afl++ cannot simply be ported over to
-Unicorn. For further information, check out ./unicorn_mode.txt.
+  As it is included in afl++ this needs no URL.
 
-As it is included in afl++ this needs no URL.
 
+## UNICORN
 
-##DYNINST
-Dyninst is a binary instrumentation framework similar to Pintool and Dynamorio
-(see far below). However whereas Pintool and Dynamorio work at runtime, dyninst
-instruments the target at load time, and then let it run - or save the
-binary with the changes.
-This is great for some things, e.g. fuzzing, and not so effective for others,
-e.g. malware analysis.
+  Unicorn is a fork of QEMU. The instrumentation is, therefore, very similar.
+  In contrast to QEMU, Unicorn does not offer a full system or even userland
+  emulation. Runtime environment and/or loaders have to be written from scratch,
+  if needed. On top, block chaining has been removed. This means the speed boost
+  introduced in  the patched QEMU Mode of afl++ cannot simply be ported over to
+  Unicorn. For further information, check out ./unicorn_mode.txt.
 
-So what we can do with dyninst is taking every basic block, and put afl's
-instrumention code in there - and then save the binary.
-Afterwards we can just fuzz the newly saved target binary with afl-fuzz.
-Sounds great? It is. The issue though - it is a non-trivial problem to
-insert instructions, which change addresses in the process space, so that
-everything is still working afterwards. Hence more often than not binaries
-crash when they are run.
+  As it is included in afl++ this needs no URL.
 
-The speed decrease is about 15-35%, depending on the optimization options
-used with afl-dyninst.
 
-So if dyninst works, it is the best option available. Otherwise it just doesn't
-work well.
+## DYNINST
 
-[https://github.com/vanhauser-thc/afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst)
+  Dyninst is a binary instrumentation framework similar to Pintool and
+  Dynamorio (see far below). However whereas Pintool and Dynamorio work at
+  runtime, dyninst instruments the target at load time, and then let it run -
+  or save the  binary with the changes.
+  This is great for some things, e.g. fuzzing, and not so effective for others,
+  e.g. malware analysis.
 
+  So what we can do with dyninst is taking every basic block, and put afl's
+  instrumention code in there - and then save the binary.
+  Afterwards we can just fuzz the newly saved target binary with afl-fuzz.
+  Sounds great? It is. The issue though - it is a non-trivial problem to
+  insert instructions, which change addresses in the process space, so that
+  everything is still working afterwards. Hence more often than not binaries
+  crash when they are run.
 
-##INTEL-PT
-If you have a newer Intel CPU, you can make use of Intels processor trace.
-The big issue with Intel's PT is the small buffer size and the complex
-encoding of the debug information collected through PT.
-This makes the decoding very CPU intensive and hence slow.
-As a result, the overall speed decrease is about 70-90% (depending on
-the implementation and other factors).
+  The speed decrease is about 15-35%, depending on the optimization options
+  used with afl-dyninst.
 
-There are two afl intel-pt implementations:
+  So if Dyninst works, it is the best option available. Otherwise it just
+  doesn't work well.
 
-1. [https://github.com/junxzm1990/afl-pt](https://github.com/junxzm1990/afl-pt)
- => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel.
+  [https://github.com/vanhauser-thc/afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst)
 
-2. [https://github.com/hunter-ht-2018/ptfuzzer](https://github.com/hunter-ht-2018/ptfuzzer)
- => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must
-    be used. This one is faster than the other.
 
-Note that there is also honggfuzz: https://github.com/google/honggfuzz
-But its IPT performance is just 6%!
+## INTEL-PT
 
+  If you have a newer Intel CPU, you can make use of Intels processor trace.
+  The big issue with Intel's PT is the small buffer size and the complex
+  encoding of the debug information collected through PT.
+  This makes the decoding very CPU intensive and hence slow.
+  As a result, the overall speed decrease is about 70-90% (depending on
+  the implementation and other factors).
 
-##CORESIGHT
-Coresight is ARM's answer to Intel's PT.
-There is no implementation so far which handle coresight and getting
-it working on an ARM Linux is very difficult due to custom kernel building
-on embedded systems is difficult. And finding one that has coresight in
-the ARM chip is difficult too.
-My guess is that it is slower than Qemu, but faster than Intel PT.
+  There are two afl intel-pt implementations:
 
-If anyone finds any coresight implementation for afl please ping me:
-vh@thc.org
+  1. [https://github.com/junxzm1990/afl-pt](https://github.com/junxzm1990/afl-pt)
+     => this needs Ubuntu 14.04.05 without any updates and the 4.4 kernel.
 
+  2. [https://github.com/hunter-ht-2018/ptfuzzer](https://github.com/hunter-ht-2018/ptfuzzer)
+     => this needs a 4.14 or 4.15 kernel. the "nopti" kernel boot option must
+        be used. This one is faster than the other.
 
-##FRIDA
-Frida is a dynamic instrumentation engine like Pintool, Dyninst and Dynamorio.
-What is special is that it is written Python, and scripted with Javascript.
-It is mostly used to reverse binaries on mobile phones however can be used
-everywhere.
+  Note that there is also honggfuzz: https://github.com/google/honggfuzz
+  But its IPT performance is just 6%!
 
-There is a WIP fuzzer available at [https://github.com/andreafioraldi/frida-fuzzer](https://github.com/andreafioraldi/frida-fuzzer)
 
+## CORESIGHT
 
-##PIN & DYNAMORIO
-Pintool and Dynamorio are dynamic instrumentation engines, and they can be
-used for getting basic block information at runtime.
-Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows
-whereas Dynamorio is additionally available for ARM and AARCH64.
-Dynamorio is also 10x faster than Pintool.
+  Coresight is ARM's answer to Intel's PT.
+  There is no implementation so far which handle coresight and getting
+  it working on an ARM Linux is very difficult due to custom kernel building
+  on embedded systems is difficult. And finding one that has coresight in
+  the ARM chip is difficult too.
+  My guess is that it is slower than Qemu, but faster than Intel PT.
 
-The big issue with Dynamorio (and therefore Pintool too) is speed.
-Dynamorio has a speed decrease of 98-99%
-Pintool has a speed decrease of 99.5%
+  If anyone finds any coresight implementation for afl please ping me: vh@thc.org
 
-Hence Dynamorio is the option to go for if everything fails, and Pintool
-only if Dynamorio fails too.
 
-Dynamorio solutions:
+## FRIDA
+
+  Frida is a dynamic instrumentation engine like Pintool, Dyninst and Dynamorio.
+  What is special is that it is written Python, and scripted with Javascript.
+  It is mostly used to reverse binaries on mobile phones however can be used
+  everywhere.
+
+  There is a WIP fuzzer available at [https://github.com/andreafioraldi/frida-fuzzer](https://github.com/andreafioraldi/frida-fuzzer)
+
+
+## PIN & DYNAMORIO
+
+  Pintool and Dynamorio are dynamic instrumentation engines, and they can be
+  used for getting basic block information at runtime.
+  Pintool is only available for Intel x32/x64 on Linux, Mac OS and Windows
+  whereas Dynamorio is additionally available for ARM and AARCH64.
+  Dynamorio is also 10x faster than Pintool.
+
+  The big issue with Dynamorio (and therefore Pintool too) is speed.
+  Dynamorio has a speed decrease of 98-99%
+  Pintool has a speed decrease of 99.5%
+
+  Hence Dynamorio is the option to go for if everything fails, and Pintool
+  only if Dynamorio fails too.
+
+  Dynamorio solutions:
   * [https://github.com/vanhauser-thc/afl-dynamorio](https://github.com/vanhauser-thc/afl-dynamorio)
   * [https://github.com/mxmssh/drAFL](https://github.com/mxmssh/drAFL)
   * [https://github.com/googleprojectzero/winafl/](https://github.com/googleprojectzero/winafl/) <= very good but windows only
 
-Pintool solutions:
+  Pintool solutions:
   * [https://github.com/vanhauser-thc/afl-pin](https://github.com/vanhauser-thc/afl-pin)
   * [https://github.com/mothran/aflpin](https://github.com/mothran/aflpin)
   * [https://github.com/spinpx/afl_pin_mode](https://github.com/spinpx/afl_pin_mode) <= only old Pintool version supported
 
 
-##Non-AFL solutions
-There are many binary-only fuzzing frameworks.
-Some are great for CTFs but don't work with large binaries, others are very
-slow but have good path discovery, some are very hard to set-up ...
+## Non-AFL solutions
+
+  There are many binary-only fuzzing frameworks.
+  Some are great for CTFs but don't work with large binaries, others are very
+  slow but have good path discovery, some are very hard to set-up ...
 
-* QSYM: [https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym)
-* Manticore: [https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore)
-* S2E: [https://github.com/S2E](https://github.com/S2E)
-* <please send me any missing that are good>
+  * QSYM: [https://github.com/sslab-gatech/qsym](https://github.com/sslab-gatech/qsym)
+  * Manticore: [https://github.com/trailofbits/manticore](https://github.com/trailofbits/manticore)
+  * S2E: [https://github.com/S2E](https://github.com/S2E)
+  *  ... please send me any missing that are good
 
 
 ## Closing words
 
-That's it! News, corrections, updates? Send an email to vh@thc.org
+  That's it! News, corrections, updates? Send an email to vh@thc.org
-- 
cgit 1.4.1


From bb88d98ff8f8f1b1a434643ccd30dcd48b529a64 Mon Sep 17 00:00:00 2001
From: van Hauser <vh@thc.org>
Date: Tue, 28 Jan 2020 19:23:04 +0100
Subject: android: prefer bigcores

---
 docs/ChangeLog              |  2 ++
 llvm_mode/LLVMInsTrim.so.cc |  7 ++++---
 src/afl-fuzz-init.c         | 38 ++++++++++++++++++++++++++++----------
 3 files changed, 34 insertions(+), 13 deletions(-)

(limited to 'docs')

diff --git a/docs/ChangeLog b/docs/ChangeLog
index c1d53379..5017a803 100644
--- a/docs/ChangeLog
+++ b/docs/ChangeLog
@@ -21,10 +21,12 @@ Version ++2.60d (develop):
   - afl-fuzz:
      - now prints the real python version support compiled in
      - set stronger performance compile options and little tweaks
+     - Android: prefer bigcores when selecting a CPU
   - afl-clang-fast:
      - show in the help output for which llvm version it was compiled for
      - now does not need to be recompiled between trace-pc and pass
        instrumentation. compile normally and set AFL_LLVM_USE_TRACE_PC :)
+     - llvm 11 is supported
   - afl-cmin is now a sh script (invoking awk) instead of bash for portability
     the original script is still present as afl-cmin.bash
   - added blacklist and whitelisting function check in all modules of llvm_mode
diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc
index 24df6d42..39b2dedd 100644
--- a/llvm_mode/LLVMInsTrim.so.cc
+++ b/llvm_mode/LLVMInsTrim.so.cc
@@ -190,7 +190,8 @@ struct InsTrim : public ModulePass {
 
           if (instFilename.str().empty()) {
 
-            /* If the original location is empty, try using the inlined location */
+            /* If the original location is empty, try using the inlined location
+             */
             DILocation *oDILoc = cDILoc->getInlinedAt();
             if (oDILoc) {
 
@@ -424,8 +425,8 @@ struct InsTrim : public ModulePass {
 
     }
 
-    OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n",
-        total_instr, total_rs, total_hs,
+    OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n", total_instr,
+        total_rs, total_hs,
         getenv("AFL_HARDEN")
             ? "hardened"
             : ((getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN"))
diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c
index 2ef2c4e7..6efa6227 100644
--- a/src/afl-fuzz-init.c
+++ b/src/afl-fuzz-init.c
@@ -184,11 +184,21 @@ void bind_to_free_cpu(void) {
     "For this platform we do not have free CPU binding code yet. If possible, please supply a PR to https://github.com/vanhauser-thc/AFLplusplus"
 #endif
 
-  for (i = 0; i < cpu_core_count; ++i)
-    if (!cpu_used[i]) break;
+  size_t cpu_start = 0;
 
+  try:
+#ifndef __ANDROID__
+    for (i = cpu_start; i < cpu_core_count; i++)
+      if (!cpu_used[i]) break;
   if (i == cpu_core_count) {
 
+#else
+    for (i = cpu_core_count - cpu_start - 1; i > -1; i--)
+      if (!cpu_used[i]) break;
+  if (i == -1) {
+
+#endif
+
     SAYF("\n" cLRD "[-] " cRST
          "Uh-oh, looks like all %d CPU cores on your system are allocated to\n"
          "    other instances of afl-fuzz (or similar CPU-locked tasks). "
@@ -197,12 +207,11 @@ void bind_to_free_cpu(void) {
          "you are\n"
          "    absolutely sure, you can set AFL_NO_AFFINITY and try again.\n",
          cpu_core_count);
-
     FATAL("No more free CPU cores");
 
   }
 
-  OKF("Found a free CPU core, binding to #%u.", i);
+  OKF("Found a free CPU core, try binding to #%u.", i);
 
   cpu_aff = i;
 
@@ -212,22 +221,31 @@ void bind_to_free_cpu(void) {
 #elif defined(__NetBSD__)
   c = cpuset_create();
   if (c == NULL) PFATAL("cpuset_create failed");
-
   cpuset_set(i, c);
 #endif
 
 #if defined(__linux__)
-  if (sched_setaffinity(0, sizeof(c), &c)) PFATAL("sched_setaffinity failed");
+  if (sched_setaffinity(0, sizeof(c), &c)) {
+
+    if (cpu_start == cpu_core_count)
+      PFATAL("sched_setaffinity failed for CPU %d, exit", i);
+    WARNF("sched_setaffinity failed to CPU %d, trying next CPU", i);
+    cpu_start++;
+    goto try
+      ;
+
+  }
+
 #elif defined(__FreeBSD__) || defined(__DragonFly__)
   if (pthread_setaffinity_np(pthread_self(), sizeof(c), &c))
     PFATAL("pthread_setaffinity failed");
 #elif defined(__NetBSD__)
-  if (pthread_setaffinity_np(pthread_self(), cpuset_size(c), c))
-    PFATAL("pthread_setaffinity failed");
+if (pthread_setaffinity_np(pthread_self(), cpuset_size(c), c))
+  PFATAL("pthread_setaffinity failed");
 
-  cpuset_destroy(c);
+cpuset_destroy(c);
 #else
-  // this will need something for other platforms
+// this will need something for other platforms
 #endif
 
 }
-- 
cgit 1.4.1