about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--README.md15
-rw-r--r--collect-values (renamed from reach)26
-rw-r--r--fix.m481
-rw-r--r--sort-inputs51
5 files changed, 113 insertions, 62 deletions
diff --git a/Makefile b/Makefile
index 85f634e..809ace3 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ PREFIX ?= /usr/local
 BIN_PREFIX ::= $(DESTDIR)$(PREFIX)/bin/taosc-
 DATA_DIR ::= $(DESTDIR)$(PREFIX)/share/taosc
 
-BIN ::= fix measure-stack reach scout synth trace-call
+BIN ::= collect-values fix measure-stack scout sort-inputs synth trace-call
 DATA ::= collect cover jump patch
 
 all: $(BIN) $(DATA)
diff --git a/README.md b/README.md
index 682632d..e30b715 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ Taosc is an automated makeshift patcher for binary programs.
 
 ## Installation
 
-Taosc depends on GDB, [Dyninst], [E9Patch], [FUZZOLIC], [GNU Parallel]
+Taosc depends on [AFL++], [Dyninst], [E9Patch], [GNU Findutils], [FUZZOLIC]
 and POSIX utilities.  To build taosc, you need [GNU M4] and a compiler
 for C++23 and [Zig] 0.15:
 
@@ -16,7 +16,15 @@ To install taosc to `$prefix`, you'll also need `install(1p)`:
 
 ## Usage
 
-    taosc-fix WORKDIR TIMEOUT EXECUTABLE PROOFS_OF_CONCEPT [OPTION]...
+    taosc-fix TIMEOUT WORKDIR PROOFS_OF_CONCEPT EXECUTABLE ARG...
+
+Fix `EXECUTABLE`, which crashes for PoC(s) in the `PROOFS_OF_CONCEPT` directory
+when it is run with `ARG`(s), where `@@` is the placeholder for input files.
+
+Processes taking more than `TIMEOUT` seconds to terminate
+are treated the same as crashes.
+
+Files in `WORKDIR` are overwritten without any warning.
 
 ## Copying
 
@@ -25,9 +33,10 @@ under the terms of the GNU Affero General Public License as published
 by the Free Software Foundation, either version 3 of the License,
 or (at your option) any later version.
 
+[AFL++]: https://aflplus.plus
 [Dyninst]: https://github.com/dyninst/dyninst
 [E9Patch]: https://github.com/gjduck/e9patch
+[GNU Findutils]: https://www.gnu.org/software/findutils
 [FUZZOLIC]: https://season-lab.github.io/fuzzolic
 [GNU M4]: https://www.gnu.org/software/m4
-[GNU Parallel]: https://www.gnu.org/software/parallel
 [Zig]: https://ziglang.org
diff --git a/reach b/collect-values
index 1827fc5..6397b0c 100644
--- a/reach
+++ b/collect-values
@@ -1,5 +1,5 @@
 #!/bin/sh
-# Patch location coverage checker
+# Variable value collector
 # Copyright (C) 2025  Nguyễn Gia Phong
 #
 # This file is part of taosc.
@@ -17,11 +17,21 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with taosc.  If not, see <https://www.gnu.org/licenses/>.
 
-set -u
-test $# -lt 2 && echo Usage: taosc-reach DURATION COMMAND [ARG]... && exit 1
-reached=$(mktemp -u)
-TAOSC_OUTPUT=$reached timeout -k 0 $1 ${@:2}
-test -f $reached || exit 2
-trap "rm $reached" EXIT
-test -s $reached && exit 3
+set -eux -o pipefail
+if test $# -lt 6
+then
+  echo Usage: taosc-collect-values DURATION STACK_SIZE DEST INPUT COMMAND ARG...
+  exit 1
+fi
+timeout=$1
+stack_size=$2
+grandparent="$3"
+input="$4"
+parent="$(basename "$(dirname "$input")")"
+output="$grandparent/$parent/$(basename "$input")"
+template="${@:5}"
+cmd="$(printf %s "$template" | sed "s#@@#$input#g")"
+set +e
+AFL_USE_QASAN=1 TAOSC_STACK_SIZE=$stack_size TAOSC_OUTPUT=$output \
+  timeout -k 0 $timeout afl-qemu-trace $cmd 1>/dev/null 2>&1
 exit 0
diff --git a/fix.m4 b/fix.m4
index fc7c2ce..d896a5c 100644
--- a/fix.m4
+++ b/fix.m4
@@ -18,50 +18,51 @@
 # along with taosc.  If not, see <https://www.gnu.org/licenses/>.
 
 save_exit_code() {
+  template="${@:3}"
+  cmd="$(printf %s "$template" | sed "s#@@#$2#g")"
   set +e
-  timeout -k 1 $1 ${@:2} 1>/dev/null 2>&1
+  AFL_USE_QASAN=1 timeout -k 0 $1 afl-qemu-trace $cmd
   exit_code=$?
   set -e
 }
 
 bad() {
-  save_exit_code $@
+  save_exit_code $@ 1>/dev/null 2>&1
   test $exit_code -gt 128 ||
     test $exit_code -ge 124 -a $exit_code -le 127 # timeout
 }
 
-if test $# -lt 4
+if test $# -lt 5
 then
-  echo Usage: taosc-fix WORKDIR TIMEOUT EXECUTABLE PROOFS_OF_CONCEPT [OPTION]...
+  echo Usage: taosc-fix TIMEOUT WORKDIR PROOFS_OF_CONCEPT EXECUTABLE ARG...
   exit 1
 fi
 
+timeout=$1
 set -eux -o pipefail
-wd="$(realpath $1)"
+wd="$(realpath "$2")"
 test -d "$wd"
-timeout=$2
-bin="$wd/$(basename $3)"
-binary="$(realpath $3)"
-test -x "$binary"
-poc="$(realpath $4)"
+poc="$(realpath "$3")"
 test -d "$poc"
 test "$(ls -A "$poc")"
-options="${@:5}" # TODO: interpolation
+binary="$(realpath "$4")"
+test -x "$binary"
+bin="$wd/$(basename "$4")"
+args="${@:5}"
 
 mkdir -p "$wd"
 rm -fr "$wd/poc"
 cp -r "$poc" "$wd/poc"
 for exploit in "$wd"/poc/*
 do
-  gdb --batch --ex run --ex backtrace --args\
-    "$binary" $options "$exploit" 2>/dev/null |
-    grep '^#[0-9]\+ \+0x[0-9a-f]\+' |
-    awk '!$7 || $7 == bin {print $1, $2}' "bin=$binary" |
-    sed 's/^#//'
+  save_exit_code $timeout "$exploit" "$binary" $args 2>&1 1>/dev/null |
+    grep '^    #' |
+    grep -F "$binary" |
+    sed 's/^    #\([0-9]\+ 0x[0-9a-f]\+\).*$/\1/'
 done | sort -n | uniq > "$wd/stack-trace"
 
-grep '^0 0x[0-9a-f]\+$' "$wd/stack-trace" |
-  sed 's/^0 0x0*//' > "$wd/call-trace"
+(grep '^0 0x[0-9a-f]\+$' "$wd/stack-trace" | sed 's/^0 0x0*//' ||
+  true) > "$wd/call-trace"
 # Stack trace contains return addresses, not call addresses:
 # https://devblogs.microsoft.com/oldnewthing?p=96116
 grep -v '^0 0x[0-9a-f]\+$' "$wd/stack-trace" |
@@ -83,7 +84,7 @@ taosc-scout "$binary" < "$wd/call-trace" |
       cp -r "$poc" "$wd/poc"
       for exploit in "$wd/poc"/*
       do
-        if bad $timeout env TAOSC_DEST=0x$dest "$bin.jump" $options "$exploit"
+        if TAOSC_DEST=0x$dest bad $timeout "$exploit" "$bin.jump" $args
         then
           continue 2 # next destination
         fi
@@ -91,7 +92,7 @@ taosc-scout "$binary" < "$wd/call-trace" |
       echo $loc > "$wd/patch-location"
       echo $dest >> "$wd/destinations"
     done
-  done 1>/dev/null 2>&1
+  done
 test -s "$wd/patch-location"
 test -s "$wd/destinations"
 
@@ -104,42 +105,22 @@ e9tool -100 -M addr=$patch_loc -P 'if dest(state)@patch goto'\
   -o "$bin.patched" "$binary"
 
 # TODO: FUZZOLIC's options
-fuzzolic -kmprst 90000 -i "$poc" -o "$wd/fuzzolic" -- "$binary" $options @@ ||
+fuzzolic -kmprst 90000 -i "$poc" -o "$wd/fuzzolic" -- "$binary" $args ||
   true # FIXME: failing with the same status as the target program
 rm -fr "$wd/input"
 mkdir -p "$wd/input/benign"
 cp -r "$poc" "$wd/input/malicious"
-# TODO: use parallel
-for dat in "$wd"/fuzzolic/fuzzolic-*/test_case_*.dat
-do
-  if taosc-reach $timeout "$bin.covered" $options "$dat" 1>/dev/null 2>&1
-  then
-    if bad $timeout "$binary" $options "$dat"
-    then
-      cp $dat "$wd/input/malicious"
-    else
-      cp $dat "$wd/input/benign"
-    fi
-  fi
-done
+find "$wd/fuzzolic" -name 'test_case_*.dat' -print0 |
+  xargs -I '{}' -0 -P$(nproc) -n1 \
+  taosc-sort-inputs $timeout "$wd"/input/{malicious,benign} '{}' \
+  "$bin.covered" $args
 
 rm -fr "$wd/values"
-for input_dir in "$wd"/input/*
-do
-  output_dir="$wd/values/$(basename "$input_dir")"
-  mkdir -p "$output_dir"
-  # TODO: use parallel
-  if test "$(ls -A "$input_dir")"
-  then
-    for input in "$input_dir"/*
-    do
-      output="$output_dir/$(basename "$input")"
-      save_exit_code $timeout\
-        env TAOSC_STACK_SIZE=$stack_size TAOSC_OUTPUT=$output\
-        "$bin.collect" $options "$input"
-    done
-  fi
-done
+mkdir -p "$wd"/values/{benign,malicious}
+find "$wd/input" -print0 |
+  xargs -I '{}' -0 -P$(nproc) -n1 \
+  taosc-collect-values $timeout $stack_size "$wd/values" '{}' \
+  "$bin.collect" $args
 # TODO: split if the patch location is reached multiple times with an input
 taosc-synth $stack_size "$wd"/values/{benign,malicious} > "$wd/predicates"
 # vim: filetype=sh.m4
diff --git a/sort-inputs b/sort-inputs
new file mode 100644
index 0000000..6d03e70
--- /dev/null
+++ b/sort-inputs
@@ -0,0 +1,51 @@
+#!/bin/sh
+# Input sorter
+# Copyright (C) 2025  Nguyễn Gia Phong
+#
+# This file is part of taosc.
+#
+# Taosc is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Taosc is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with taosc.  If not, see <https://www.gnu.org/licenses/>.
+
+set -eux -o pipefail
+if test $# -lt 6
+then
+  echo Usage: taosc-sort-inputs DURATION BAD GOOD INPUT COMMAND ARG...
+  exit 1
+fi
+timeout=$1
+bad_dir="$2"
+good_dir="$3"
+input="$4"
+template="${@:5}"
+cmd="$(printf %s "$template" | sed "s#@@#$input#g")"
+reached=$(mktemp -u)
+set +e
+AFL_USE_QASAN=1 TAOSC_OUTPUT=$reached timeout -k 0 $timeout \
+  afl-qemu-trace $cmd 1>/dev/null 2>&1
+exit_code=$?
+set -e
+if test -f $reached
+then
+  trap "rm $reached" EXIT
+  if test ! -s $reached
+  then
+    if test $exit_code -gt 128 ||
+      test $exit_code -ge 124 -a $exit_code -le 127 # timeout
+    then
+      cp "$input" "$bad_dir"
+    else
+      cp "$input" "$good_dir"
+    fi
+  fi
+fi