about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-xafl-cmin474
-rwxr-xr-xafl-cmin.bash470
2 files changed, 474 insertions, 470 deletions
diff --git a/afl-cmin b/afl-cmin
index 1dd782d8..75dc63a7 100755
--- a/afl-cmin
+++ b/afl-cmin
@@ -1,470 +1,4 @@
-#!/usr/bin/env bash
-#
-# american fuzzy lop++ - corpus minimization tool
-# ---------------------------------------------
-#
-# Originally written by Michal Zalewski
-#
-# Copyright 2014, 2015 Google Inc. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# This tool tries to find the smallest subset of files in the input directory
-# that still trigger the full range of instrumentation data points seen in
-# the starting corpus. This has two uses:
-#
-#   - Screening large corpora of input files before using them as a seed for
-#     afl-fuzz. The tool will remove functionally redundant files and likely
-#     leave you with a much smaller set.
-#
-#     (In this case, you probably also want to consider running afl-tmin on
-#     the individual files later on to reduce their size.)
-#
-#   - Minimizing the corpus generated organically by afl-fuzz, perhaps when
-#     planning to feed it to more resource-intensive tools. The tool achieves
-#     this by removing all entries that used to trigger unique behaviors in the
-#     past, but have been made obsolete by later finds.
-#
-# Note that the tool doesn't modify the files themselves. For that, you want
-# afl-tmin.
-#
-# This script must use bash because other shells may have hardcoded limits on
-# array sizes.
-#
-
-echo "corpus minimization tool for afl-fuzz by Michal Zalewski"
-echo
-
-#########
-# SETUP #
-#########
-
-# Process command-line options...
-
-MEM_LIMIT=200
-TIMEOUT=none
-
-unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \
-  AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE UNICORN_MODE
-
-while getopts "+i:o:f:m:t:eQUCh" opt; do
-
-  case "$opt" in 
-
-    "h")
-	;;
-
-    "i")
-         IN_DIR="$OPTARG"
-         ;;
-
-    "o")
-         OUT_DIR="$OPTARG"
-         ;;
-    "f")
-         STDIN_FILE="$OPTARG"
-         ;;
-    "m")
-         MEM_LIMIT="$OPTARG"
-         MEM_LIMIT_GIVEN=1
-         ;;
-    "t")
-         TIMEOUT="$OPTARG"
-         ;;
-    "e")
-         EXTRA_PAR="$EXTRA_PAR -e"
-         ;;
-    "C")
-         export AFL_CMIN_CRASHES_ONLY=1
-         ;;
-    "Q")
-         EXTRA_PAR="$EXTRA_PAR -Q"
-         test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
-         QEMU_MODE=1
-         ;;
-    "U")
-         EXTRA_PAR="$EXTRA_PAR -U"
-         test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
-         UNICORN_MODE=1
-         ;;    
-    "?")
-         exit 1
-         ;;
-
-   esac
-
-done
-
-shift $((OPTIND-1))
-
-TARGET_BIN="$1"
-
-if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then
-
-  cat 1>&2 <<_EOF_
-Usage: $0 [ options ] -- /path/to/target_app [ ... ]
-
-Required parameters:
-
-  -i dir        - input directory with the starting corpus
-  -o dir        - output directory for minimized files
-
-Execution control settings:
-
-  -f file       - location read by the fuzzed program (stdin)
-  -m megs       - memory limit for child process ($MEM_LIMIT MB)
-  -t msec       - run time limit for child process (none)
-  -Q            - use binary-only instrumentation (QEMU mode)
-  -U            - use unicorn-based instrumentation (Unicorn mode)
-  
-Minimization settings:
-
-  -C            - keep crashing inputs, reject everything else
-  -e            - solve for edge coverage only, ignore hit counts
-
-For additional tips, please consult docs/README.
-
-_EOF_
-  exit 1
-fi
-
-# Do a sanity check to discourage the use of /tmp, since we can't really
-# handle this safely from a shell script.
-
-if [ "$AFL_ALLOW_TMP" = "" ]; then
-
-  echo "$IN_DIR" | grep -qE '^(/var)?/tmp/'
-  T1="$?"
-
-  echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/'
-  T2="$?"
-
-  echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/'
-  T3="$?"
-
-  echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/'
-  T4="$?"
-
-  echo "$PWD" | grep -qE '^(/var)?/tmp/'
-  T5="$?"
-
-  if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then
-    echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2
-    exit 1
-  fi
-
-fi
-
-# If @@ is specified, but there's no -f, let's come up with a temporary input
-# file name.
-
-TRACE_DIR="$OUT_DIR/.traces"
-
-if [ "$STDIN_FILE" = "" ]; then
-
-  if echo "$*" | grep -qF '@@'; then
-    STDIN_FILE="$TRACE_DIR/.cur_input"
-  fi
-
-fi
-
-# Check for obvious errors.
-
-if [ ! "$MEM_LIMIT" = "none" ]; then
-
-  if [ "$MEM_LIMIT" -lt "5" ]; then
-    echo "[-] Error: dangerously low memory limit." 1>&2
-    exit 1
-  fi
-
-fi
-
-if [ ! "$TIMEOUT" = "none" ]; then
-
-  if [ "$TIMEOUT" -lt "10" ]; then
-    echo "[-] Error: dangerously low timeout." 1>&2
-    exit 1
-  fi
-
-fi
-
-if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then
-
-  TNEW="`which "$TARGET_BIN" 2>/dev/null`"
-
-  if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then
-    echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2
-    exit 1
-  fi
-
-  TARGET_BIN="$TNEW"
-
-fi
-
-if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" -a "$UNICORN_MODE" = "" ]; then
-
-  if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then
-    echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2
-    exit 1
-  fi
-
-fi
-
-if [ ! -d "$IN_DIR" ]; then
-  echo "[-] Error: directory '$IN_DIR' not found." 1>&2
-  exit 1
-fi
-
-test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue"
-
-find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null
-rm -rf "$TRACE_DIR" 2>/dev/null
-
-rmdir "$OUT_DIR" 2>/dev/null
-
-if [ -d "$OUT_DIR" ]; then
-  echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it first." 1>&2
-  exit 1
-fi
-
-mkdir -m 700 -p "$TRACE_DIR" || exit 1
-
-if [ ! "$STDIN_FILE" = "" ]; then
-  rm -f "$STDIN_FILE" || exit 1
-  touch "$STDIN_FILE" || exit 1
-fi
-
-if [ "$AFL_PATH" = "" ]; then
-  SHOWMAP="${0%/afl-cmin}/afl-showmap"
-else
-  SHOWMAP="$AFL_PATH/afl-showmap"
-fi
-
-if [ ! -x "$SHOWMAP" ]; then
-  echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2
-  rm -rf "$TRACE_DIR"
-  exit 1
-fi
-
-IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`))
-
-if [ "$IN_COUNT" = "0" ]; then
-  echo "[+] Hmm, no inputs in the target directory. Nothing to be done."
-  rm -rf "$TRACE_DIR"
-  exit 1
-fi
-
-FIRST_FILE=`ls "$IN_DIR" | head -1`
-
-# Make sure that we're not dealing with a directory.
-
-if [ -d "$IN_DIR/$FIRST_FILE" ]; then
-  echo "[-] Error: The target directory contains subdirectories - please fix." 1>&2
-  rm -rf "$TRACE_DIR"
-  exit 1
-fi
-
-# Check for the more efficient way to copy files...
-
-if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then
-  CP_TOOL=ln
-else
-  CP_TOOL=cp
-fi
-
-# Make sure that we can actually get anything out of afl-showmap before we
-# waste too much time.
-
-echo "[*] Testing the target binary..."
-
-if [ "$STDIN_FILE" = "" ]; then
-
-  AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE"
-
-else
-
-  cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE"
-  AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
-
-fi
-
-FIRST_COUNT=$((`grep -c . "$TRACE_DIR/.run_test"`))
-
-if [ "$FIRST_COUNT" -gt "0" ]; then
-
-  echo "[+] OK, $FIRST_COUNT tuples recorded."
-
-else
-
-  echo "[-] Error: no instrumentation output detected (perhaps crash or timeout)." 1>&2
-  test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
-  exit 1
-
-fi
-
-# Let's roll!
-
-#############################
-# STEP 1: COLLECTING TRACES #
-#############################
-
-echo "[*] Obtaining traces for input files in '$IN_DIR'..."
-
-(
-
-  CUR=0
-
-  if [ "$STDIN_FILE" = "" ]; then
-
-    ls "$IN_DIR" | while read -r fn; do
-
-      CUR=$((CUR+1))
-      printf "\\r    Processing file $CUR/$IN_COUNT... "
-
-      "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn"
-
-    done
-
-  else
-
-    ls "$IN_DIR" | while read -r fn; do
-
-      CUR=$((CUR+1))
-      printf "\\r    Processing file $CUR/$IN_COUNT... "
-
-      cp "$IN_DIR/$fn" "$STDIN_FILE"
-
-      "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
-
-    done
-
-
-  fi
-
-)
-
-echo
-
-##########################
-# STEP 2: SORTING TUPLES #
-##########################
-
-# With this out of the way, we sort all tuples by popularity across all
-# datasets. The reasoning here is that we won't be able to avoid the files
-# that trigger unique tuples anyway, so we will want to start with them and
-# see what's left.
-
-echo "[*] Sorting trace sets (this may take a while)..."
-
-ls "$IN_DIR" | sed "s#^#$TRACE_DIR/#" | tr '\n' '\0' | xargs -0 -n 1 cat | \
-  sort | uniq -c | sort -k 1,1 -n >"$TRACE_DIR/.all_uniq"
-
-TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`))
-
-echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files."
-
-#####################################
-# STEP 3: SELECTING CANDIDATE FILES #
-#####################################
-
-# The next step is to find the best candidate for each tuple. The "best"
-# part is understood simply as the smallest input that includes a particular
-# tuple in its trace. Empirical evidence suggests that this produces smaller
-# datasets than more involved algorithms that could be still pulled off in
-# a shell script.
-
-echo "[*] Finding best candidates for each tuple..."
-
-CUR=0
-
-ls -rS "$IN_DIR" | while read -r fn; do
-
-  CUR=$((CUR+1))
-  printf "\\r    Processing file $CUR/$IN_COUNT... "
-
-  sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list"
-
-done
-
-echo
-
-##############################
-# STEP 4: LOADING CANDIDATES #
-##############################
-
-# At this point, we have a file of tuple-file pairs, sorted by file size
-# in ascending order (as a consequence of ls -rS). By doing sort keyed
-# only by tuple (-k 1,1) and configured to output only the first line for
-# every key (-s -u), we end up with the smallest file for each tuple.
-
-echo "[*] Sorting candidate list (be patient)..."
-
-sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \
-  sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script"
-
-if [ ! -s "$TRACE_DIR/.candidate_script" ]; then
-  echo "[-] Error: no traces obtained from test cases, check syntax!" 1>&2
-  test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
-  exit 1
-fi
-
-# The sed command converted the sorted list to a shell script that populates
-# BEST_FILE[tuple]="fname". Let's load that!
-
-. "$TRACE_DIR/.candidate_script"
-
-##########################
-# STEP 5: WRITING OUTPUT #
-##########################
-
-# The final trick is to grab the top pick for each tuple, unless said tuple is
-# already set due to the inclusion of an earlier candidate; and then put all
-# tuples associated with the newly-added file to the "already have" list. The
-# loop works from least popular tuples and toward the most common ones.
-
-echo "[*] Processing candidates and writing output files..."
-
-CUR=0
-
-touch "$TRACE_DIR/.already_have"
-
-while read -r cnt tuple; do
-
-  CUR=$((CUR+1))
-  printf "\\r    Processing tuple $CUR/$TUPLE_COUNT... "
-
-  # If we already have this tuple, skip it.
-
-  grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue
-
-  FN=${BEST_FILE[tuple]}
-
-  $CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN"
-
-  if [ "$((CUR % 5))" = "0" ]; then
-    sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp"
-    mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have"
-  else
-    cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have"
-  fi
-
-done <"$TRACE_DIR/.all_uniq"
-
-echo
-
-OUT_COUNT=`ls -- "$OUT_DIR" | wc -l`
-
-if [ "$OUT_COUNT" = "1" ]; then
-  echo "[!] WARNING: All test cases had the same traces, check syntax!"
-fi
-
-echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'."
-echo
-
-test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
-
-exit 0
+#!/usr/bin/env sh
+THISPATH=`dirname ${0}`
+export PATH=${THISPATH}:$PATH
+awk -f ${0}.awk -- ${@+"$@"}
diff --git a/afl-cmin.bash b/afl-cmin.bash
new file mode 100755
index 00000000..1dd782d8
--- /dev/null
+++ b/afl-cmin.bash
@@ -0,0 +1,470 @@
+#!/usr/bin/env bash
+#
+# american fuzzy lop++ - corpus minimization tool
+# ---------------------------------------------
+#
+# Originally written by Michal Zalewski
+#
+# Copyright 2014, 2015 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# This tool tries to find the smallest subset of files in the input directory
+# that still trigger the full range of instrumentation data points seen in
+# the starting corpus. This has two uses:
+#
+#   - Screening large corpora of input files before using them as a seed for
+#     afl-fuzz. The tool will remove functionally redundant files and likely
+#     leave you with a much smaller set.
+#
+#     (In this case, you probably also want to consider running afl-tmin on
+#     the individual files later on to reduce their size.)
+#
+#   - Minimizing the corpus generated organically by afl-fuzz, perhaps when
+#     planning to feed it to more resource-intensive tools. The tool achieves
+#     this by removing all entries that used to trigger unique behaviors in the
+#     past, but have been made obsolete by later finds.
+#
+# Note that the tool doesn't modify the files themselves. For that, you want
+# afl-tmin.
+#
+# This script must use bash because other shells may have hardcoded limits on
+# array sizes.
+#
+
+echo "corpus minimization tool for afl-fuzz by Michal Zalewski"
+echo
+
+#########
+# SETUP #
+#########
+
+# Process command-line options...
+
+MEM_LIMIT=200
+TIMEOUT=none
+
+unset IN_DIR OUT_DIR STDIN_FILE EXTRA_PAR MEM_LIMIT_GIVEN \
+  AFL_CMIN_CRASHES_ONLY AFL_CMIN_ALLOW_ANY QEMU_MODE UNICORN_MODE
+
+while getopts "+i:o:f:m:t:eQUCh" opt; do
+
+  case "$opt" in 
+
+    "h")
+	;;
+
+    "i")
+         IN_DIR="$OPTARG"
+         ;;
+
+    "o")
+         OUT_DIR="$OPTARG"
+         ;;
+    "f")
+         STDIN_FILE="$OPTARG"
+         ;;
+    "m")
+         MEM_LIMIT="$OPTARG"
+         MEM_LIMIT_GIVEN=1
+         ;;
+    "t")
+         TIMEOUT="$OPTARG"
+         ;;
+    "e")
+         EXTRA_PAR="$EXTRA_PAR -e"
+         ;;
+    "C")
+         export AFL_CMIN_CRASHES_ONLY=1
+         ;;
+    "Q")
+         EXTRA_PAR="$EXTRA_PAR -Q"
+         test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
+         QEMU_MODE=1
+         ;;
+    "U")
+         EXTRA_PAR="$EXTRA_PAR -U"
+         test "$MEM_LIMIT_GIVEN" = "" && MEM_LIMIT=250
+         UNICORN_MODE=1
+         ;;    
+    "?")
+         exit 1
+         ;;
+
+   esac
+
+done
+
+shift $((OPTIND-1))
+
+TARGET_BIN="$1"
+
+if [ "$TARGET_BIN" = "" -o "$IN_DIR" = "" -o "$OUT_DIR" = "" ]; then
+
+  cat 1>&2 <<_EOF_
+Usage: $0 [ options ] -- /path/to/target_app [ ... ]
+
+Required parameters:
+
+  -i dir        - input directory with the starting corpus
+  -o dir        - output directory for minimized files
+
+Execution control settings:
+
+  -f file       - location read by the fuzzed program (stdin)
+  -m megs       - memory limit for child process ($MEM_LIMIT MB)
+  -t msec       - run time limit for child process (none)
+  -Q            - use binary-only instrumentation (QEMU mode)
+  -U            - use unicorn-based instrumentation (Unicorn mode)
+  
+Minimization settings:
+
+  -C            - keep crashing inputs, reject everything else
+  -e            - solve for edge coverage only, ignore hit counts
+
+For additional tips, please consult docs/README.
+
+_EOF_
+  exit 1
+fi
+
+# Do a sanity check to discourage the use of /tmp, since we can't really
+# handle this safely from a shell script.
+
+if [ "$AFL_ALLOW_TMP" = "" ]; then
+
+  echo "$IN_DIR" | grep -qE '^(/var)?/tmp/'
+  T1="$?"
+
+  echo "$TARGET_BIN" | grep -qE '^(/var)?/tmp/'
+  T2="$?"
+
+  echo "$OUT_DIR" | grep -qE '^(/var)?/tmp/'
+  T3="$?"
+
+  echo "$STDIN_FILE" | grep -qE '^(/var)?/tmp/'
+  T4="$?"
+
+  echo "$PWD" | grep -qE '^(/var)?/tmp/'
+  T5="$?"
+
+  if [ "$T1" = "0" -o "$T2" = "0" -o "$T3" = "0" -o "$T4" = "0" -o "$T5" = "0" ]; then
+    echo "[-] Error: do not use this script in /tmp or /var/tmp." 1>&2
+    exit 1
+  fi
+
+fi
+
+# If @@ is specified, but there's no -f, let's come up with a temporary input
+# file name.
+
+TRACE_DIR="$OUT_DIR/.traces"
+
+if [ "$STDIN_FILE" = "" ]; then
+
+  if echo "$*" | grep -qF '@@'; then
+    STDIN_FILE="$TRACE_DIR/.cur_input"
+  fi
+
+fi
+
+# Check for obvious errors.
+
+if [ ! "$MEM_LIMIT" = "none" ]; then
+
+  if [ "$MEM_LIMIT" -lt "5" ]; then
+    echo "[-] Error: dangerously low memory limit." 1>&2
+    exit 1
+  fi
+
+fi
+
+if [ ! "$TIMEOUT" = "none" ]; then
+
+  if [ "$TIMEOUT" -lt "10" ]; then
+    echo "[-] Error: dangerously low timeout." 1>&2
+    exit 1
+  fi
+
+fi
+
+if [ ! -f "$TARGET_BIN" -o ! -x "$TARGET_BIN" ]; then
+
+  TNEW="`which "$TARGET_BIN" 2>/dev/null`"
+
+  if [ ! -f "$TNEW" -o ! -x "$TNEW" ]; then
+    echo "[-] Error: binary '$TARGET_BIN' not found or not executable." 1>&2
+    exit 1
+  fi
+
+  TARGET_BIN="$TNEW"
+
+fi
+
+if [ "$AFL_SKIP_BIN_CHECK" = "" -a "$QEMU_MODE" = "" -a "$UNICORN_MODE" = "" ]; then
+
+  if ! grep -qF "__AFL_SHM_ID" "$TARGET_BIN"; then
+    echo "[-] Error: binary '$TARGET_BIN' doesn't appear to be instrumented." 1>&2
+    exit 1
+  fi
+
+fi
+
+if [ ! -d "$IN_DIR" ]; then
+  echo "[-] Error: directory '$IN_DIR' not found." 1>&2
+  exit 1
+fi
+
+test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue"
+
+find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null
+rm -rf "$TRACE_DIR" 2>/dev/null
+
+rmdir "$OUT_DIR" 2>/dev/null
+
+if [ -d "$OUT_DIR" ]; then
+  echo "[-] Error: directory '$OUT_DIR' exists and is not empty - delete it first." 1>&2
+  exit 1
+fi
+
+mkdir -m 700 -p "$TRACE_DIR" || exit 1
+
+if [ ! "$STDIN_FILE" = "" ]; then
+  rm -f "$STDIN_FILE" || exit 1
+  touch "$STDIN_FILE" || exit 1
+fi
+
+if [ "$AFL_PATH" = "" ]; then
+  SHOWMAP="${0%/afl-cmin}/afl-showmap"
+else
+  SHOWMAP="$AFL_PATH/afl-showmap"
+fi
+
+if [ ! -x "$SHOWMAP" ]; then
+  echo "[-] Error: can't find 'afl-showmap' - please set AFL_PATH." 1>&2
+  rm -rf "$TRACE_DIR"
+  exit 1
+fi
+
+IN_COUNT=$((`ls -- "$IN_DIR" 2>/dev/null | wc -l`))
+
+if [ "$IN_COUNT" = "0" ]; then
+  echo "[+] Hmm, no inputs in the target directory. Nothing to be done."
+  rm -rf "$TRACE_DIR"
+  exit 1
+fi
+
+FIRST_FILE=`ls "$IN_DIR" | head -1`
+
+# Make sure that we're not dealing with a directory.
+
+if [ -d "$IN_DIR/$FIRST_FILE" ]; then
+  echo "[-] Error: The target directory contains subdirectories - please fix." 1>&2
+  rm -rf "$TRACE_DIR"
+  exit 1
+fi
+
+# Check for the more efficient way to copy files...
+
+if ln "$IN_DIR/$FIRST_FILE" "$TRACE_DIR/.link_test" 2>/dev/null; then
+  CP_TOOL=ln
+else
+  CP_TOOL=cp
+fi
+
+# Make sure that we can actually get anything out of afl-showmap before we
+# waste too much time.
+
+echo "[*] Testing the target binary..."
+
+if [ "$STDIN_FILE" = "" ]; then
+
+  AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$FIRST_FILE"
+
+else
+
+  cp "$IN_DIR/$FIRST_FILE" "$STDIN_FILE"
+  AFL_CMIN_ALLOW_ANY=1 "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/.run_test" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
+
+fi
+
+FIRST_COUNT=$((`grep -c . "$TRACE_DIR/.run_test"`))
+
+if [ "$FIRST_COUNT" -gt "0" ]; then
+
+  echo "[+] OK, $FIRST_COUNT tuples recorded."
+
+else
+
+  echo "[-] Error: no instrumentation output detected (perhaps crash or timeout)." 1>&2
+  test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
+  exit 1
+
+fi
+
+# Let's roll!
+
+#############################
+# STEP 1: COLLECTING TRACES #
+#############################
+
+echo "[*] Obtaining traces for input files in '$IN_DIR'..."
+
+(
+
+  CUR=0
+
+  if [ "$STDIN_FILE" = "" ]; then
+
+    ls "$IN_DIR" | while read -r fn; do
+
+      CUR=$((CUR+1))
+      printf "\\r    Processing file $CUR/$IN_COUNT... "
+
+      "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -- "$@" <"$IN_DIR/$fn"
+
+    done
+
+  else
+
+    ls "$IN_DIR" | while read -r fn; do
+
+      CUR=$((CUR+1))
+      printf "\\r    Processing file $CUR/$IN_COUNT... "
+
+      cp "$IN_DIR/$fn" "$STDIN_FILE"
+
+      "$SHOWMAP" -m "$MEM_LIMIT" -t "$TIMEOUT" -o "$TRACE_DIR/$fn" -Z $EXTRA_PAR -A "$STDIN_FILE" -- "$@" </dev/null
+
+    done
+
+
+  fi
+
+)
+
+echo
+
+##########################
+# STEP 2: SORTING TUPLES #
+##########################
+
+# With this out of the way, we sort all tuples by popularity across all
+# datasets. The reasoning here is that we won't be able to avoid the files
+# that trigger unique tuples anyway, so we will want to start with them and
+# see what's left.
+
+echo "[*] Sorting trace sets (this may take a while)..."
+
+ls "$IN_DIR" | sed "s#^#$TRACE_DIR/#" | tr '\n' '\0' | xargs -0 -n 1 cat | \
+  sort | uniq -c | sort -k 1,1 -n >"$TRACE_DIR/.all_uniq"
+
+TUPLE_COUNT=$((`grep -c . "$TRACE_DIR/.all_uniq"`))
+
+echo "[+] Found $TUPLE_COUNT unique tuples across $IN_COUNT files."
+
+#####################################
+# STEP 3: SELECTING CANDIDATE FILES #
+#####################################
+
+# The next step is to find the best candidate for each tuple. The "best"
+# part is understood simply as the smallest input that includes a particular
+# tuple in its trace. Empirical evidence suggests that this produces smaller
+# datasets than more involved algorithms that could be still pulled off in
+# a shell script.
+
+echo "[*] Finding best candidates for each tuple..."
+
+CUR=0
+
+ls -rS "$IN_DIR" | while read -r fn; do
+
+  CUR=$((CUR+1))
+  printf "\\r    Processing file $CUR/$IN_COUNT... "
+
+  sed "s#\$# $fn#" "$TRACE_DIR/$fn" >>"$TRACE_DIR/.candidate_list"
+
+done
+
+echo
+
+##############################
+# STEP 4: LOADING CANDIDATES #
+##############################
+
+# At this point, we have a file of tuple-file pairs, sorted by file size
+# in ascending order (as a consequence of ls -rS). By doing sort keyed
+# only by tuple (-k 1,1) and configured to output only the first line for
+# every key (-s -u), we end up with the smallest file for each tuple.
+
+echo "[*] Sorting candidate list (be patient)..."
+
+sort -k1,1 -s -u "$TRACE_DIR/.candidate_list" | \
+  sed 's/^/BEST_FILE[/;s/ /]="/;s/$/"/' >"$TRACE_DIR/.candidate_script"
+
+if [ ! -s "$TRACE_DIR/.candidate_script" ]; then
+  echo "[-] Error: no traces obtained from test cases, check syntax!" 1>&2
+  test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
+  exit 1
+fi
+
+# The sed command converted the sorted list to a shell script that populates
+# BEST_FILE[tuple]="fname". Let's load that!
+
+. "$TRACE_DIR/.candidate_script"
+
+##########################
+# STEP 5: WRITING OUTPUT #
+##########################
+
+# The final trick is to grab the top pick for each tuple, unless said tuple is
+# already set due to the inclusion of an earlier candidate; and then put all
+# tuples associated with the newly-added file to the "already have" list. The
+# loop works from least popular tuples and toward the most common ones.
+
+echo "[*] Processing candidates and writing output files..."
+
+CUR=0
+
+touch "$TRACE_DIR/.already_have"
+
+while read -r cnt tuple; do
+
+  CUR=$((CUR+1))
+  printf "\\r    Processing tuple $CUR/$TUPLE_COUNT... "
+
+  # If we already have this tuple, skip it.
+
+  grep -q "^$tuple\$" "$TRACE_DIR/.already_have" && continue
+
+  FN=${BEST_FILE[tuple]}
+
+  $CP_TOOL "$IN_DIR/$FN" "$OUT_DIR/$FN"
+
+  if [ "$((CUR % 5))" = "0" ]; then
+    sort -u "$TRACE_DIR/$FN" "$TRACE_DIR/.already_have" >"$TRACE_DIR/.tmp"
+    mv -f "$TRACE_DIR/.tmp" "$TRACE_DIR/.already_have"
+  else
+    cat "$TRACE_DIR/$FN" >>"$TRACE_DIR/.already_have"
+  fi
+
+done <"$TRACE_DIR/.all_uniq"
+
+echo
+
+OUT_COUNT=`ls -- "$OUT_DIR" | wc -l`
+
+if [ "$OUT_COUNT" = "1" ]; then
+  echo "[!] WARNING: All test cases had the same traces, check syntax!"
+fi
+
+echo "[+] Narrowed down to $OUT_COUNT files, saved in '$OUT_DIR'."
+echo
+
+test "$AFL_KEEP_TRACES" = "" && rm -rf "$TRACE_DIR"
+
+exit 0