about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--GNUmakefile6
-rw-r--r--GNUmakefile.gcc_plugin13
-rw-r--r--GNUmakefile.llvm6
-rw-r--r--README.md6
-rw-r--r--docs/Changelog.md12
-rw-r--r--docs/binaryonly_fuzzing.md3
-rw-r--r--docs/ideas.md6
-rw-r--r--dynamic_list.txt62
-rw-r--r--frida_mode/README.md8
-rw-r--r--frida_mode/include/ctx.h11
-rw-r--r--frida_mode/include/instrument.h10
-rw-r--r--frida_mode/src/asan/asan_arm32.c (renamed from frida_mode/src/asan/asan_arm.c)0
-rw-r--r--frida_mode/src/asan/asan_arm64.c76
-rw-r--r--frida_mode/src/cmplog/cmplog_arm32.c (renamed from frida_mode/src/cmplog/cmplog_arm.c)0
-rw-r--r--frida_mode/src/cmplog/cmplog_arm64.c295
-rw-r--r--frida_mode/src/ctx/ctx_arm32.c16
-rw-r--r--frida_mode/src/ctx/ctx_arm64.c303
-rw-r--r--frida_mode/src/instrument/instrument.c5
-rw-r--r--frida_mode/src/instrument/instrument_arm32.c12
-rw-r--r--frida_mode/src/instrument/instrument_arm64.c12
-rw-r--r--frida_mode/src/instrument/instrument_debug.c58
-rw-r--r--frida_mode/src/instrument/instrument_x64.c12
-rw-r--r--frida_mode/src/instrument/instrument_x86.c12
-rw-r--r--frida_mode/src/persistent/persistent_arm64.c354
-rw-r--r--frida_mode/src/persistent/persistent_x64.c2
-rw-r--r--frida_mode/src/persistent/persistent_x86.c2
-rw-r--r--frida_mode/src/stats/stats.c1
-rw-r--r--frida_mode/src/stats/stats_arm32.c (renamed from frida_mode/src/stats/stats_arm.c)0
-rw-r--r--frida_mode/test/cmplog/GNUmakefile11
-rw-r--r--frida_mode/test/cmplog/Makefile4
-rw-r--r--frida_mode/test/cmplog/cmplog.c2
-rw-r--r--frida_mode/test/fasan/GNUmakefile4
-rw-r--r--frida_mode/test/persistent_ret/GNUmakefile4
-rw-r--r--frida_mode/test/png/persistent/GNUmakefile14
-rw-r--r--frida_mode/test/png/persistent/Makefile3
-rw-r--r--frida_mode/test/png/persistent/hook/GNUmakefile4
-rw-r--r--frida_mode/test/png/persistent/hook/aflpp_qemu_driver_hook.c96
-rw-r--r--instrumentation/README.llvm.md2
-rw-r--r--instrumentation/afl-compiler-rt.o.c146
-rw-r--r--instrumentation/afl-llvm-pass.so.cc2
-rw-r--r--src/afl-analyze.c218
-rw-r--r--src/afl-cc.c12
-rw-r--r--src/afl-fuzz-init.c17
-rw-r--r--src/afl-fuzz-stats.c52
-rw-r--r--src/afl-fuzz.c3
-rwxr-xr-xtest/test-llvm.sh8
46 files changed, 1591 insertions, 314 deletions
diff --git a/GNUmakefile b/GNUmakefile
index a45f6d5c..bd206af0 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -436,8 +436,8 @@ afl-showmap: src/afl-showmap.c src/afl-common.o src/afl-sharedmem.o src/afl-fork
 afl-tmin: src/afl-tmin.c src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o src/afl-performance.o $(COMM_HDR) | test_x86
 	$(CC) $(CFLAGS) $(COMPILE_STATIC) $(CFLAGS_FLTO) src/$@.c src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o src/afl-performance.o -o $@ $(LDFLAGS)
 
-afl-analyze: src/afl-analyze.c src/afl-common.o src/afl-sharedmem.o src/afl-performance.o $(COMM_HDR) | test_x86
-	$(CC) $(CFLAGS) $(COMPILE_STATIC) $(CFLAGS_FLTO) src/$@.c src/afl-common.o src/afl-sharedmem.o src/afl-performance.o -o $@ $(LDFLAGS)
+afl-analyze: src/afl-analyze.c src/afl-common.o src/afl-sharedmem.o src/afl-performance.o src/afl-forkserver.o $(COMM_HDR) | test_x86
+	$(CC) $(CFLAGS) $(COMPILE_STATIC) $(CFLAGS_FLTO) src/$@.c src/afl-common.o src/afl-sharedmem.o src/afl-performance.o src/afl-forkserver.o -o $@ $(LDFLAGS)
 
 afl-gotcpu: src/afl-gotcpu.c src/afl-common.o $(COMM_HDR) | test_x86
 	$(CC) $(CFLAGS) $(COMPILE_STATIC) $(CFLAGS_FLTO) src/$@.c src/afl-common.o -o $@ $(LDFLAGS)
@@ -572,7 +572,7 @@ clean:
 	$(MAKE) -C qemu_mode/unsigaction clean
 	$(MAKE) -C qemu_mode/libcompcov clean
 	$(MAKE) -C qemu_mode/libqasan clean
-	$(MAKE) -C frida_mode clean
+	-$(MAKE) -C frida_mode clean
 ifeq "$(IN_REPO)" "1"
 	test -e qemu_mode/qemuafl/Makefile && $(MAKE) -C qemu_mode/qemuafl clean || true
 	test -e unicorn_mode/unicornafl/Makefile && $(MAKE) -C unicorn_mode/unicornafl clean || true
diff --git a/GNUmakefile.gcc_plugin b/GNUmakefile.gcc_plugin
index b0f90f1b..bce97b2f 100644
--- a/GNUmakefile.gcc_plugin
+++ b/GNUmakefile.gcc_plugin
@@ -100,7 +100,7 @@ ifeq "$(SYS)" "SunOS"
 endif
 
 
-PROGS        = ./afl-gcc-pass.so
+PROGS        = ./afl-gcc-pass.so ./afl-compiler-rt.o ./afl-compiler-rt-32.o ./afl-compiler-rt-64.o
 
 .PHONY: all
 all: test_shm test_deps $(PROGS) test_build all_done
@@ -130,6 +130,17 @@ test_deps:
 afl-common.o: ./src/afl-common.c
 	$(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ $(LDFLAGS)
 
+./afl-compiler-rt.o: instrumentation/afl-compiler-rt.o.c
+	$(CC) $(CFLAGS_SAFE) $(CPPFLAGS) -O3 -Wno-unused-result -fPIC -c $< -o $@
+
+./afl-compiler-rt-32.o: instrumentation/afl-compiler-rt.o.c
+	@printf "[*] Building 32-bit variant of the runtime (-m32)... "
+	@$(CC) $(CFLAGS_SAFE) $(CPPFLAGS) -O3 -Wno-unused-result -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; ln -sf afl-compiler-rt-32.o afl-llvm-rt-32.o; else echo "failed (that's fine)"; fi
+
+./afl-compiler-rt-64.o: instrumentation/afl-compiler-rt.o.c
+	@printf "[*] Building 64-bit variant of the runtime (-m64)... "
+	@$(CC) $(CFLAGS_SAFE) $(CPPFLAGS) -O3 -Wno-unused-result -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; ln -sf afl-compiler-rt-64.o afl-llvm-rt-64.o; else echo "failed (that's fine)"; fi
+
 ./afl-gcc-pass.so: instrumentation/afl-gcc-pass.so.cc | test_deps
 	$(CXX) $(CXXEFLAGS) $(PLUGIN_FLAGS) -shared $< -o $@
 	ln -sf afl-cc afl-gcc-fast
diff --git a/GNUmakefile.llvm b/GNUmakefile.llvm
index 2d50badc..95140cb0 100644
--- a/GNUmakefile.llvm
+++ b/GNUmakefile.llvm
@@ -45,7 +45,7 @@ endif
 LLVMVER  = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/git//' | sed 's/svn//' )
 LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//' )
 LLVM_MINOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/.*\.//' | sed 's/git//' | sed 's/svn//' | sed 's/ .*//' )
-LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^[0-5]\.' && echo 1 || echo 0 )
+LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^[0-2]\.|^3.[0-7]\.' && echo 1 || echo 0 )
 LLVM_TOO_NEW = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[3-9]' && echo 1 || echo 0 )
 LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 )
 LLVM_10_OK = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[1-9]|^10\.[1-9]|^10\.0.[1-9]' && echo 1 || echo 0 )
@@ -57,11 +57,11 @@ LLVM_APPLE_XCODE = $(shell clang -v 2>&1 | grep -q Apple && echo 1 || echo 0)
 LLVM_LTO   = 0
 
 ifeq "$(LLVMVER)" ""
-  $(warning [!] llvm_mode needs llvm-config, which was not found)
+  $(warning [!] llvm_mode needs llvm-config, which was not found. Set LLVM_CONFIG to its path and retry.)
 endif
 
 ifeq "$(LLVM_UNSUPPORTED)" "1"
-  $(error llvm_mode only supports llvm from version 6.0 onwards)
+  $(error llvm_mode only supports llvm from version 3.8 onwards)
 endif
 
 ifeq "$(LLVM_TOO_NEW)" "1"
diff --git a/README.md b/README.md
index ba612edb..bc547b3c 100644
--- a/README.md
+++ b/README.md
@@ -84,7 +84,7 @@ behaviours and defaults:
 
 ## Important features of afl++
 
-  afl++ supports llvm from 6.0 up to version 12, very fast binary fuzzing with QEMU 5.1
+  afl++ supports llvm from 3.8 up to version 12, very fast binary fuzzing with QEMU 5.1
   with laf-intel and redqueen, frida mode, unicorn mode, gcc plugin, full *BSD,
   Mac OS, Solaris and Android support and much, much, much more.
 
@@ -296,7 +296,7 @@ anything below 9 is not recommended.
     |
     v
 +---------------------------------+
-| clang/clang++ 6.0+ is available | --> use LLVM mode (afl-clang-fast/afl-clang-fast++)
+| clang/clang++ 3.8+ is available | --> use LLVM mode (afl-clang-fast/afl-clang-fast++)
 +---------------------------------+     see [instrumentation/README.llvm.md](instrumentation/README.llvm.md)
     |
     | if not, or if the target fails with LLVM afl-clang-fast/++
@@ -801,7 +801,7 @@ Alternatively you can use frida_mode, just switch `-Q` with `-O` and remove the
 LAF instance.
 
 Then run as many instances as you have cores left with either -Q mode or - better -
-use a binary rewriter like afl-dyninst, retrowrite, zaflr, etc.
+use a binary rewriter like afl-dyninst, retrowrite, zafl, etc.
 
 For Qemu and Frida mode, check out the persistent mode, it gives a huge speed
 improvement if it is possible to use.
diff --git a/docs/Changelog.md b/docs/Changelog.md
index a49c0672..6c851460 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -10,6 +10,18 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
 
 ### Version ++3.14a (release)
   - Fix for llvm 13
+  - afl-fuzz:
+    - fix -F when a '/' was part of the parameter
+    - removed implied -D determinstic from -M main
+    - if the target becomes unavailable check out out/default/error.txt for
+      an indicator why
+  - afl-cc
+    - support partial linking
+    - We do support llvm versions from 3.8 again
+  - afl_analyze
+    - fix timeout handling and support forkserver
+  - ensure afl-compiler-rt is built for gcc_module
+  - afl-analyze now uses the forkserver for increased performance
 
 
 ### Version ++3.13c (release)
diff --git a/docs/binaryonly_fuzzing.md b/docs/binaryonly_fuzzing.md
index 11e1dbeb..3b32f5ed 100644
--- a/docs/binaryonly_fuzzing.md
+++ b/docs/binaryonly_fuzzing.md
@@ -122,7 +122,7 @@
   [https://github.com/vanhauser-thc/afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst)
 
 
-## RETROWRITE
+## RETROWRITE, ZAFL, ... other binary rewriter
 
   If you have an x86/x86_64 binary that still has its symbols, is compiled
   with position independant code (PIC/PIE) and does not use most of the C++
@@ -131,6 +131,7 @@
 
   It is at about 80-85% performance.
 
+  [https://git.zephyr-software.com/opensrc/zafl](https://git.zephyr-software.com/opensrc/zafl)
   [https://github.com/HexHive/retrowrite](https://github.com/HexHive/retrowrite)
 
 
diff --git a/docs/ideas.md b/docs/ideas.md
index e25d3ba6..0ee69851 100644
--- a/docs/ideas.md
+++ b/docs/ideas.md
@@ -34,6 +34,12 @@ Mentor: any
 Other programming languages also use llvm hence they could (easily?) supported
 for fuzzing, e.g. mono, swift, go, kotlin native, fortran, ...
 
+GCC also supports: Objective-C, Fortran, Ada, Go, and D
+(according to [Gcc homepage](https://gcc.gnu.org/))
+
+LLVM is also used by: Rust, LLGo (Go), kaleidoscope (Haskell), flang (Fortran), emscripten (JavaScript, WASM), ilwasm (CIL (C#))
+(according to [LLVM frontends](https://gist.github.com/axic/62d66fb9d8bccca6cc48fa9841db9241))
+
 Mentor: vanhauser-thc
 
 ## Machine Learning
diff --git a/dynamic_list.txt b/dynamic_list.txt
index d1905d43..7293ae77 100644
--- a/dynamic_list.txt
+++ b/dynamic_list.txt
@@ -1,48 +1,56 @@
 {
+  "__afl_already_initialized_first";
+  "__afl_already_initialized_forkserver";
+  "__afl_already_initialized_second";
+  "__afl_already_initialized_shm";
   "__afl_area_ptr";
+  "__afl_auto_early";
+  "__afl_auto_first";
+  "__afl_auto_init";
+  "__afl_auto_second";
+  "__afl_coverage_discard";
+  "__afl_coverage_interesting";
+  "__afl_coverage_off";
+  "__afl_coverage_on";
+  "__afl_coverage_skip";
+  "__afl_dictionary";
+  "__afl_dictionary_len";
+  "__afl_final_loc";
+  "__afl_fuzz_len";
+  "__afl_fuzz_ptr";
   "__afl_manual_init";
+  "__afl_map_addr";
   "__afl_persistent_loop";
-  "__afl_auto_init";
-  "__afl_area_initial";
-  "__afl_prev_loc";
   "__afl_prev_caller";
   "__afl_prev_ctx";
-  "__afl_final_loc";
-  "__afl_map_addr";
-  "__afl_dictionary";
-  "__afl_dictionary_len";
+  "__afl_prev_loc";
   "__afl_selective_coverage";
   "__afl_selective_coverage_start_off";
   "__afl_selective_coverage_temp";
-  "__afl_coverage_discard";
-  "__afl_coverage_skip";
-  "__afl_coverage_on";
-  "__afl_coverage_off";
-  "__afl_coverage_interesting";
-  "__afl_fuzz_len";
-  "__afl_fuzz_ptr";
   "__afl_sharedmem_fuzzing";
-  "__sanitizer_cov_trace_pc_guard";
-  "__sanitizer_cov_trace_pc_guard_init";
+  "__afl_trace";
   "__cmplog_ins_hook1";
+  "__cmplog_ins_hook16";
   "__cmplog_ins_hook2";
   "__cmplog_ins_hook4";
+  "__cmplog_ins_hook8";
   "__cmplog_ins_hookN";
-  "__cmplog_ins_hook16";
+  "__cmplog_rtn_gcc_stdstring_cstring";
+  "__cmplog_rtn_gcc_stdstring_stdstring";
+  "__cmplog_rtn_hook";
+  "__cmplog_rtn_llvm_stdstring_cstring";
+  "__cmplog_rtn_llvm_stdstring_stdstring";
   "__sanitizer_cov_trace_cmp1";
-  "__sanitizer_cov_trace_const_cmp1";
+  "__sanitizer_cov_trace_cmp16";
   "__sanitizer_cov_trace_cmp2";
-  "__sanitizer_cov_trace_const_cmp2";
   "__sanitizer_cov_trace_cmp4";
-  "__sanitizer_cov_trace_const_cmp4";
   "__sanitizer_cov_trace_cmp8";
-  "__sanitizer_cov_trace_const_cmp8";
-  "__sanitizer_cov_trace_cmp16";
+  "__sanitizer_cov_trace_const_cmp1";
   "__sanitizer_cov_trace_const_cmp16";
+  "__sanitizer_cov_trace_const_cmp2";
+  "__sanitizer_cov_trace_const_cmp4";
+  "__sanitizer_cov_trace_const_cmp8";
+  "__sanitizer_cov_trace_pc_guard";
+  "__sanitizer_cov_trace_pc_guard_init";
   "__sanitizer_cov_trace_switch";
-  "__cmplog_rtn_hook";
-  "__cmplog_rtn_gcc_stdstring_cstring";
-  "__cmplog_rtn_gcc_stdstring_stdstring";
-  "__cmplog_rtn_llvm_stdstring_cstring";
-  "__cmplog_rtn_llvm_stdstring_stdstring";
 };
diff --git a/frida_mode/README.md b/frida_mode/README.md
index 0103a395..d7dd72a0 100644
--- a/frida_mode/README.md
+++ b/frida_mode/README.md
@@ -21,16 +21,16 @@ perhaps leverage some of its design and implementation.
   | Feature/Instrumentation  | frida-mode | Notes                                        |
   | -------------------------|:----------:|:--------------------------------------------:|
   | NeverZero                |     x      |                                              |
-  | Persistent Mode          |     x      | (x86/x64 only)(Only on function boundaries)  |
+  | Persistent Mode          |     x      | (x86/x64/aarch64 only)                       |
   | LAF-Intel / CompCov      |     -      | (CMPLOG is better 90% of the time)           |
-  | CMPLOG                   |     x      | (x86/x64 only)                               |
+  | CMPLOG                   |     x      | (x86/x64/aarch64 only)                       |
   | Selective Instrumentation|     x      |                                              |
   | Non-Colliding Coverage   |     -      | (Not possible in binary-only instrumentation |
   | Ngram prev_loc Coverage  |     -      |                                              |
   | Context Coverage         |     -      |                                              |
   | Auto Dictionary          |     -      |                                              |
   | Snapshot LKM Support     |     -      |                                              |
-  | In-Memory Test Cases     |     x      | (x86/x64 only)                               |
+  | In-Memory Test Cases     |     x      | (x86/x64/aarch64 only)                       |
 
 ## Compatibility
 Currently FRIDA mode supports Linux and macOS targets on both x86/x64
@@ -288,7 +288,7 @@ to validate memory accesses against the shadow memory.
 
 ## TODO
 
-The next features to be added are Aarch64 and Aarch32 support as well as looking at
+The next features to be added are Aarch32 support as well as looking at
 potential performance improvements. The intention is to achieve feature parity with
 QEMU mode in due course. Contributions are welcome, but please get in touch to
 ensure that efforts are deconflicted.
diff --git a/frida_mode/include/ctx.h b/frida_mode/include/ctx.h
index cbcc892a..67274aee 100644
--- a/frida_mode/include/ctx.h
+++ b/frida_mode/include/ctx.h
@@ -3,8 +3,15 @@
 
 #include "frida-gum.h"
 
-#if defined(__x86_64__) || defined(__i386__)
-gsize ctx_read_reg(GumCpuContext *ctx, x86_reg reg);
+#if defined(__x86_64__)
+gsize ctx_read_reg(GumX64CpuContext *ctx, x86_reg reg);
+#elif defined(__i386__)
+gsize ctx_read_reg(GumIA32CpuContext *ctx, x86_reg reg);
+#elif defined(__aarch64__)
+gsize  ctx_read_reg(GumArm64CpuContext *ctx, arm64_reg reg);
+size_t ctx_get_size(const cs_insn *instr, cs_arm64_op *operand);
+#elif defined(__arm__)
+gsize ctx_read_reg(GumArmCpuContext *ctx, arm_reg reg);
 #endif
 
 #endif
diff --git a/frida_mode/include/instrument.h b/frida_mode/include/instrument.h
index ed92c25a..577481d1 100644
--- a/frida_mode/include/instrument.h
+++ b/frida_mode/include/instrument.h
@@ -19,9 +19,11 @@ gboolean instrument_is_coverage_optimize_supported(void);
 void instrument_coverage_optimize(const cs_insn *   instr,
                                   GumStalkerOutput *output);
 
-void instrument_debug_init(void);
-void instrument_debug_start(uint64_t address, GumStalkerOutput *output);
-void instrument_debug_instruction(uint64_t address, uint16_t size);
-void instrument_debug_end(GumStalkerOutput *output);
+void     instrument_debug_init(void);
+void     instrument_debug_start(uint64_t address, GumStalkerOutput *output);
+void     instrument_debug_instruction(uint64_t address, uint16_t size);
+void     instrument_debug_end(GumStalkerOutput *output);
+void     instrument_flush(GumStalkerOutput *output);
+gpointer instrument_cur(GumStalkerOutput *output);
 #endif
 
diff --git a/frida_mode/src/asan/asan_arm.c b/frida_mode/src/asan/asan_arm32.c
index 79475ced..79475ced 100644
--- a/frida_mode/src/asan/asan_arm.c
+++ b/frida_mode/src/asan/asan_arm32.c
diff --git a/frida_mode/src/asan/asan_arm64.c b/frida_mode/src/asan/asan_arm64.c
index 6262ee18..66138e42 100644
--- a/frida_mode/src/asan/asan_arm64.c
+++ b/frida_mode/src/asan/asan_arm64.c
@@ -1,18 +1,80 @@
+#include <dlfcn.h>
 #include "frida-gum.h"
 
 #include "debug.h"
 
 #include "asan.h"
+#include "ctx.h"
 #include "util.h"
 
 #if defined(__aarch64__)
+
+typedef struct {
+
+  size_t      size;
+  cs_arm64_op operand;
+
+} asan_ctx_t;
+
+typedef void (*asan_loadN_t)(gsize address, uint8_t size);
+typedef void (*asan_storeN_t)(gsize address, uint8_t size);
+
+asan_loadN_t  asan_loadN = NULL;
+asan_storeN_t asan_storeN = NULL;
+
+static void asan_callout(GumCpuContext *ctx, gpointer user_data) {
+
+  asan_ctx_t *  asan_ctx = (asan_ctx_t *)user_data;
+  cs_arm64_op * operand = &asan_ctx->operand;
+  arm64_op_mem *mem = &operand->mem;
+  gsize         base = 0;
+  gsize         index = 0;
+  gsize         address;
+
+  if (mem->base != ARM64_REG_INVALID) { base = ctx_read_reg(ctx, mem->base); }
+
+  if (mem->index != ARM64_REG_INVALID) {
+
+    index = ctx_read_reg(ctx, mem->index);
+
+  }
+
+  address = base + index + mem->disp;
+
+  if ((operand->access & CS_AC_READ) == CS_AC_READ) {
+
+    asan_loadN(address, asan_ctx->size);
+
+  }
+
+  if ((operand->access & CS_AC_WRITE) == CS_AC_WRITE) {
+
+    asan_storeN(address, asan_ctx->size);
+
+  }
+
+}
+
 void asan_instrument(const cs_insn *instr, GumStalkerIterator *iterator) {
 
-  UNUSED_PARAMETER(instr);
   UNUSED_PARAMETER(iterator);
-  if (asan_initialized) {
 
-    FATAL("ASAN mode not supported on this architecture");
+  cs_arm64     arm64 = instr->detail->arm64;
+  cs_arm64_op *operand;
+  asan_ctx_t * ctx;
+
+  if (!asan_initialized) return;
+
+  for (uint8_t i = 0; i < arm64.op_count; i++) {
+
+    operand = &arm64.operands[i];
+
+    if (operand->type != ARM64_OP_MEM) { continue; }
+
+    ctx = g_malloc0(sizeof(asan_ctx_t));
+    ctx->size = ctx_get_size(instr, &arm64.operands[0]);
+    memcpy(&ctx->operand, operand, sizeof(cs_arm64_op));
+    gum_stalker_iterator_put_callout(iterator, asan_callout, ctx, g_free);
 
   }
 
@@ -20,7 +82,13 @@ void asan_instrument(const cs_insn *instr, GumStalkerIterator *iterator) {
 
 void asan_arch_init(void) {
 
-  FATAL("ASAN mode not supported on this architecture");
+  asan_loadN = (asan_loadN_t)dlsym(RTLD_DEFAULT, "__asan_loadN");
+  asan_storeN = (asan_loadN_t)dlsym(RTLD_DEFAULT, "__asan_storeN");
+  if (asan_loadN == NULL || asan_storeN == NULL) {
+
+    FATAL("Frida ASAN failed to find '__asan_loadN' or '__asan_storeN'");
+
+  }
 
 }
 
diff --git a/frida_mode/src/cmplog/cmplog_arm.c b/frida_mode/src/cmplog/cmplog_arm32.c
index 5af28f3f..5af28f3f 100644
--- a/frida_mode/src/cmplog/cmplog_arm.c
+++ b/frida_mode/src/cmplog/cmplog_arm32.c
diff --git a/frida_mode/src/cmplog/cmplog_arm64.c b/frida_mode/src/cmplog/cmplog_arm64.c
index 187d0162..04631ff8 100644
--- a/frida_mode/src/cmplog/cmplog_arm64.c
+++ b/frida_mode/src/cmplog/cmplog_arm64.c
@@ -1,17 +1,304 @@
 #include "frida-gum.h"
 
 #include "debug.h"
+#include "cmplog.h"
 
+#include "ctx.h"
 #include "frida_cmplog.h"
 #include "util.h"
 
 #if defined(__aarch64__)
+
+typedef struct {
+
+  arm64_op_type type;
+  uint8_t       size;
+
+  union {
+
+    arm64_op_mem mem;
+    arm64_reg    reg;
+    int64_t      imm;
+
+  };
+
+} cmplog_ctx_t;
+
+typedef struct {
+
+  cmplog_ctx_t operand1;
+  cmplog_ctx_t operand2;
+  size_t       size;
+
+} cmplog_pair_ctx_t;
+
+static gboolean cmplog_read_mem(GumCpuContext *ctx, uint8_t size,
+                                arm64_op_mem *mem, gsize *val) {
+
+  gsize base = 0;
+  gsize index = 0;
+  gsize address;
+
+  if (mem->base != ARM64_REG_INVALID) { base = ctx_read_reg(ctx, mem->base); }
+
+  if (mem->index != ARM64_REG_INVALID) {
+
+    index = ctx_read_reg(ctx, mem->index);
+
+  }
+
+  address = base + index + mem->disp;
+
+  if (!cmplog_is_readable(address, size)) { return FALSE; }
+
+  switch (size) {
+
+    case 1:
+      *val = *((guint8 *)GSIZE_TO_POINTER(address));
+      return TRUE;
+    case 2:
+      *val = *((guint16 *)GSIZE_TO_POINTER(address));
+      return TRUE;
+    case 4:
+      *val = *((guint32 *)GSIZE_TO_POINTER(address));
+      return TRUE;
+    case 8:
+      *val = *((guint64 *)GSIZE_TO_POINTER(address));
+      return TRUE;
+    default:
+      FATAL("Invalid operand size: %d\n", size);
+
+  }
+
+  return FALSE;
+
+}
+
+static gboolean cmplog_get_operand_value(GumCpuContext *context,
+                                         cmplog_ctx_t *ctx, gsize *val) {
+
+  switch (ctx->type) {
+
+    case ARM64_OP_REG:
+      *val = ctx_read_reg(context, ctx->reg);
+      return TRUE;
+    case ARM64_OP_IMM:
+      *val = ctx->imm;
+      return TRUE;
+    case ARM64_OP_MEM:
+      return cmplog_read_mem(context, ctx->size, &ctx->mem, val);
+    default:
+      FATAL("Invalid operand type: %d\n", ctx->type);
+
+  }
+
+  return FALSE;
+
+}
+
+static void cmplog_call_callout(GumCpuContext *context, gpointer user_data) {
+
+  UNUSED_PARAMETER(user_data);
+
+  gsize address = context->pc;
+  gsize x0 = ctx_read_reg(context, ARM64_REG_X0);
+  gsize x1 = ctx_read_reg(context, ARM64_REG_X1);
+
+  if (((G_MAXULONG - x0) < 32) || ((G_MAXULONG - x1) < 32)) return;
+
+  if (!cmplog_is_readable(x0, 32) || !cmplog_is_readable(x1, 32)) return;
+
+  void *ptr1 = GSIZE_TO_POINTER(x0);
+  void *ptr2 = GSIZE_TO_POINTER(x1);
+
+  uintptr_t k = address;
+
+  k = (k >> 4) ^ (k << 8);
+  k &= CMP_MAP_W - 1;
+
+  __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
+
+  u32 hits = __afl_cmp_map->headers[k].hits;
+  __afl_cmp_map->headers[k].hits = hits + 1;
+
+  __afl_cmp_map->headers[k].shape = 31;
+
+  hits &= CMP_MAP_RTN_H - 1;
+  gum_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0, ptr1,
+             32);
+  gum_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1, ptr2,
+             32);
+
+}
+
+static void cmplog_instrument_put_operand(cmplog_ctx_t *ctx,
+                                          cs_arm64_op * operand) {
+
+  ctx->type = operand->type;
+  switch (operand->type) {
+
+    case ARM64_OP_REG:
+      gum_memcpy(&ctx->reg, &operand->reg, sizeof(arm64_reg));
+      break;
+    case ARM64_OP_IMM:
+      gum_memcpy(&ctx->imm, &operand->imm, sizeof(int64_t));
+      break;
+    case ARM64_OP_MEM:
+      gum_memcpy(&ctx->mem, &operand->mem, sizeof(arm64_op_mem));
+      break;
+    default:
+      FATAL("Invalid operand type: %d\n", operand->type);
+
+  }
+
+}
+
+static void cmplog_instrument_call(const cs_insn *     instr,
+                                   GumStalkerIterator *iterator) {
+
+  cs_arm64     arm64 = instr->detail->arm64;
+  cs_arm64_op *operand;
+
+  switch (instr->id) {
+
+    case ARM64_INS_BL:
+    case ARM64_INS_BLR:
+    case ARM64_INS_BLRAA:
+    case ARM64_INS_BLRAAZ:
+    case ARM64_INS_BLRAB:
+    case ARM64_INS_BLRABZ:
+      break;
+    default:
+      return;
+
+  }
+
+  if (arm64.op_count != 1) return;
+
+  operand = &arm64.operands[0];
+
+  if (operand->type == ARM64_OP_INVALID) return;
+
+  gum_stalker_iterator_put_callout(iterator, cmplog_call_callout, NULL, NULL);
+
+}
+
+static void cmplog_handle_cmp_sub(GumCpuContext *context, gsize operand1,
+                                  gsize operand2, uint8_t size) {
+
+  gsize address = context->pc;
+
+  register uintptr_t k = (uintptr_t)address;
+
+  k = (k >> 4) ^ (k << 8);
+  k &= CMP_MAP_W - 1;
+
+  __afl_cmp_map->headers[k].type = CMP_TYPE_INS;
+
+  u32 hits = __afl_cmp_map->headers[k].hits;
+  __afl_cmp_map->headers[k].hits = hits + 1;
+
+  __afl_cmp_map->headers[k].shape = (size - 1);
+
+  hits &= CMP_MAP_H - 1;
+  __afl_cmp_map->log[k][hits].v0 = operand1;
+  __afl_cmp_map->log[k][hits].v1 = operand2;
+
+}
+
+static void cmplog_cmp_sub_callout(GumCpuContext *context, gpointer user_data) {
+
+  cmplog_pair_ctx_t *ctx = (cmplog_pair_ctx_t *)user_data;
+  gsize              operand1;
+  gsize              operand2;
+
+  if (!cmplog_get_operand_value(context, &ctx->operand1, &operand1)) { return; }
+  if (!cmplog_get_operand_value(context, &ctx->operand2, &operand2)) { return; }
+
+  cmplog_handle_cmp_sub(context, operand1, operand2, ctx->size);
+
+}
+
+static void cmplog_instrument_cmp_sub_put_callout(GumStalkerIterator *iterator,
+                                                  cs_arm64_op *       operand1,
+                                                  cs_arm64_op *       operand2,
+                                                  size_t              size) {
+
+  cmplog_pair_ctx_t *ctx = g_malloc(sizeof(cmplog_pair_ctx_t));
+  if (ctx == NULL) return;
+
+  cmplog_instrument_put_operand(&ctx->operand1, operand1);
+  cmplog_instrument_put_operand(&ctx->operand2, operand2);
+  ctx->size = size;
+
+  gum_stalker_iterator_put_callout(iterator, cmplog_cmp_sub_callout, ctx,
+                                   g_free);
+
+}
+
+static void cmplog_instrument_cmp_sub(const cs_insn *     instr,
+                                      GumStalkerIterator *iterator) {
+
+  cs_arm64     arm64 = instr->detail->arm64;
+  cs_arm64_op *operand1;
+  cs_arm64_op *operand2;
+  size_t       size;
+
+  switch (instr->id) {
+
+    case ARM64_INS_ADCS:
+    case ARM64_INS_ADDS:
+    case ARM64_INS_ANDS:
+    case ARM64_INS_BICS:
+    case ARM64_INS_CMN:
+    case ARM64_INS_CMP:
+    case ARM64_INS_CMPEQ:
+    case ARM64_INS_CMPGE:
+    case ARM64_INS_CMPGT:
+    case ARM64_INS_CMPHI:
+    case ARM64_INS_CMPHS:
+    case ARM64_INS_CMPLE:
+    case ARM64_INS_CMPLO:
+    case ARM64_INS_CMPLS:
+    case ARM64_INS_CMPLT:
+    case ARM64_INS_CMPNE:
+    case ARM64_INS_EORS:
+    case ARM64_INS_NANDS:
+    case ARM64_INS_NEGS:
+    case ARM64_INS_NGCS:
+    case ARM64_INS_NORS:
+    case ARM64_INS_NOTS:
+    case ARM64_INS_ORNS:
+    case ARM64_INS_ORRS:
+    case ARM64_INS_SBCS:
+    case ARM64_INS_SUBS:
+      break;
+
+    default:
+      return;
+
+  }
+
+  if (arm64.op_count != 2) return;
+
+  operand1 = &arm64.operands[0];
+  operand2 = &arm64.operands[1];
+
+  if (operand1->type == ARM64_OP_INVALID) return;
+  if (operand2->type == ARM64_OP_INVALID) return;
+
+  size = ctx_get_size(instr, &arm64.operands[0]);
+
+  cmplog_instrument_cmp_sub_put_callout(iterator, operand1, operand2, size);
+
+}
+
 void cmplog_instrument(const cs_insn *instr, GumStalkerIterator *iterator) {
 
-  UNUSED_PARAMETER(instr);
-  UNUSED_PARAMETER(iterator);
-  if (__afl_cmp_map == NULL) { return; }
-  FATAL("CMPLOG mode not supported on this architecture");
+  if (__afl_cmp_map == NULL) return;
+
+  cmplog_instrument_call(instr, iterator);
+  cmplog_instrument_cmp_sub(instr, iterator);
 
 }
 
diff --git a/frida_mode/src/ctx/ctx_arm32.c b/frida_mode/src/ctx/ctx_arm32.c
new file mode 100644
index 00000000..a5c6f6d4
--- /dev/null
+++ b/frida_mode/src/ctx/ctx_arm32.c
@@ -0,0 +1,16 @@
+#include "frida-gum.h"
+
+#include "debug.h"
+
+#include "ctx.h"
+
+#if defined(__arm__)
+
+gsize ctx_read_reg(GumIA32CpuContext *ctx, x86_reg reg) {
+
+  FATAL("ctx_read_reg unimplemented for this architecture");
+
+}
+
+#endif
+
diff --git a/frida_mode/src/ctx/ctx_arm64.c b/frida_mode/src/ctx/ctx_arm64.c
new file mode 100644
index 00000000..d09896af
--- /dev/null
+++ b/frida_mode/src/ctx/ctx_arm64.c
@@ -0,0 +1,303 @@
+#include "frida-gum.h"
+
+#include "debug.h"
+
+#include "ctx.h"
+
+#if defined(__aarch64__)
+
+  #define ARM64_REG_8(LABEL, REG) \
+    case LABEL: {                 \
+                                  \
+      return REG & GUM_INT8_MASK; \
+                                  \
+    }
+
+  #define ARM64_REG_16(LABEL, REG)   \
+    case LABEL: {                    \
+                                     \
+      return (REG & GUM_INT16_MASK); \
+                                     \
+    }
+
+  #define ARM64_REG_32(LABEL, REG)   \
+    case LABEL: {                    \
+                                     \
+      return (REG & GUM_INT32_MASK); \
+                                     \
+    }
+
+  #define ARM64_REG_64(LABEL, REG) \
+    case LABEL: {                  \
+                                   \
+      return (REG);                \
+                                   \
+    }
+
+gsize ctx_read_reg(GumArm64CpuContext *ctx, arm64_reg reg) {
+
+  switch (reg) {
+
+    case ARM64_REG_WZR:
+    case ARM64_REG_XZR:
+      return 0;
+
+      ARM64_REG_8(ARM64_REG_B0, ctx->x[0])
+      ARM64_REG_8(ARM64_REG_B1, ctx->x[1])
+      ARM64_REG_8(ARM64_REG_B2, ctx->x[2])
+      ARM64_REG_8(ARM64_REG_B3, ctx->x[3])
+      ARM64_REG_8(ARM64_REG_B4, ctx->x[4])
+      ARM64_REG_8(ARM64_REG_B5, ctx->x[5])
+      ARM64_REG_8(ARM64_REG_B6, ctx->x[6])
+      ARM64_REG_8(ARM64_REG_B7, ctx->x[7])
+      ARM64_REG_8(ARM64_REG_B8, ctx->x[8])
+      ARM64_REG_8(ARM64_REG_B9, ctx->x[9])
+      ARM64_REG_8(ARM64_REG_B10, ctx->x[10])
+      ARM64_REG_8(ARM64_REG_B11, ctx->x[11])
+      ARM64_REG_8(ARM64_REG_B12, ctx->x[12])
+      ARM64_REG_8(ARM64_REG_B13, ctx->x[13])
+      ARM64_REG_8(ARM64_REG_B14, ctx->x[14])
+      ARM64_REG_8(ARM64_REG_B15, ctx->x[15])
+      ARM64_REG_8(ARM64_REG_B16, ctx->x[16])
+      ARM64_REG_8(ARM64_REG_B17, ctx->x[17])
+      ARM64_REG_8(ARM64_REG_B18, ctx->x[18])
+      ARM64_REG_8(ARM64_REG_B19, ctx->x[19])
+      ARM64_REG_8(ARM64_REG_B20, ctx->x[20])
+      ARM64_REG_8(ARM64_REG_B21, ctx->x[21])
+      ARM64_REG_8(ARM64_REG_B22, ctx->x[22])
+      ARM64_REG_8(ARM64_REG_B23, ctx->x[23])
+      ARM64_REG_8(ARM64_REG_B24, ctx->x[24])
+      ARM64_REG_8(ARM64_REG_B25, ctx->x[25])
+      ARM64_REG_8(ARM64_REG_B26, ctx->x[26])
+      ARM64_REG_8(ARM64_REG_B27, ctx->x[27])
+      ARM64_REG_8(ARM64_REG_B28, ctx->x[28])
+      ARM64_REG_8(ARM64_REG_B29, ctx->fp)
+      ARM64_REG_8(ARM64_REG_B30, ctx->lr)
+      ARM64_REG_8(ARM64_REG_B31, ctx->sp)
+
+      ARM64_REG_16(ARM64_REG_H0, ctx->x[0])
+      ARM64_REG_16(ARM64_REG_H1, ctx->x[1])
+      ARM64_REG_16(ARM64_REG_H2, ctx->x[2])
+      ARM64_REG_16(ARM64_REG_H3, ctx->x[3])
+      ARM64_REG_16(ARM64_REG_H4, ctx->x[4])
+      ARM64_REG_16(ARM64_REG_H5, ctx->x[5])
+      ARM64_REG_16(ARM64_REG_H6, ctx->x[6])
+      ARM64_REG_16(ARM64_REG_H7, ctx->x[7])
+      ARM64_REG_16(ARM64_REG_H8, ctx->x[8])
+      ARM64_REG_16(ARM64_REG_H9, ctx->x[9])
+      ARM64_REG_16(ARM64_REG_H10, ctx->x[10])
+      ARM64_REG_16(ARM64_REG_H11, ctx->x[11])
+      ARM64_REG_16(ARM64_REG_H12, ctx->x[12])
+      ARM64_REG_16(ARM64_REG_H13, ctx->x[13])
+      ARM64_REG_16(ARM64_REG_H14, ctx->x[14])
+      ARM64_REG_16(ARM64_REG_H15, ctx->x[15])
+      ARM64_REG_16(ARM64_REG_H16, ctx->x[16])
+      ARM64_REG_16(ARM64_REG_H17, ctx->x[17])
+      ARM64_REG_16(ARM64_REG_H18, ctx->x[18])
+      ARM64_REG_16(ARM64_REG_H19, ctx->x[19])
+      ARM64_REG_16(ARM64_REG_H20, ctx->x[20])
+      ARM64_REG_16(ARM64_REG_H21, ctx->x[21])
+      ARM64_REG_16(ARM64_REG_H22, ctx->x[22])
+      ARM64_REG_16(ARM64_REG_H23, ctx->x[23])
+      ARM64_REG_16(ARM64_REG_H24, ctx->x[24])
+      ARM64_REG_16(ARM64_REG_H25, ctx->x[25])
+      ARM64_REG_16(ARM64_REG_H26, ctx->x[26])
+      ARM64_REG_16(ARM64_REG_H27, ctx->x[27])
+      ARM64_REG_16(ARM64_REG_H28, ctx->x[28])
+      ARM64_REG_16(ARM64_REG_H29, ctx->fp)
+      ARM64_REG_16(ARM64_REG_H30, ctx->lr)
+      ARM64_REG_16(ARM64_REG_H31, ctx->sp)
+
+      ARM64_REG_32(ARM64_REG_W0, ctx->x[0])
+      ARM64_REG_32(ARM64_REG_W1, ctx->x[1])
+      ARM64_REG_32(ARM64_REG_W2, ctx->x[2])
+      ARM64_REG_32(ARM64_REG_W3, ctx->x[3])
+      ARM64_REG_32(ARM64_REG_W4, ctx->x[4])
+      ARM64_REG_32(ARM64_REG_W5, ctx->x[5])
+      ARM64_REG_32(ARM64_REG_W6, ctx->x[6])
+      ARM64_REG_32(ARM64_REG_W7, ctx->x[7])
+      ARM64_REG_32(ARM64_REG_W8, ctx->x[8])
+      ARM64_REG_32(ARM64_REG_W9, ctx->x[9])
+      ARM64_REG_32(ARM64_REG_W10, ctx->x[10])
+      ARM64_REG_32(ARM64_REG_W11, ctx->x[11])
+      ARM64_REG_32(ARM64_REG_W12, ctx->x[12])
+      ARM64_REG_32(ARM64_REG_W13, ctx->x[13])
+      ARM64_REG_32(ARM64_REG_W14, ctx->x[14])
+      ARM64_REG_32(ARM64_REG_W15, ctx->x[15])
+      ARM64_REG_32(ARM64_REG_W16, ctx->x[16])
+      ARM64_REG_32(ARM64_REG_W17, ctx->x[17])
+      ARM64_REG_32(ARM64_REG_W18, ctx->x[18])
+      ARM64_REG_32(ARM64_REG_W19, ctx->x[19])
+      ARM64_REG_32(ARM64_REG_W20, ctx->x[20])
+      ARM64_REG_32(ARM64_REG_W21, ctx->x[21])
+      ARM64_REG_32(ARM64_REG_W22, ctx->x[22])
+      ARM64_REG_32(ARM64_REG_W23, ctx->x[23])
+      ARM64_REG_32(ARM64_REG_W24, ctx->x[24])
+      ARM64_REG_32(ARM64_REG_W25, ctx->x[25])
+      ARM64_REG_32(ARM64_REG_W26, ctx->x[26])
+      ARM64_REG_32(ARM64_REG_W27, ctx->x[27])
+      ARM64_REG_32(ARM64_REG_W28, ctx->x[28])
+      ARM64_REG_32(ARM64_REG_W29, ctx->fp)
+      ARM64_REG_32(ARM64_REG_W30, ctx->lr)
+
+      ARM64_REG_64(ARM64_REG_X0, ctx->x[0])
+      ARM64_REG_64(ARM64_REG_X1, ctx->x[1])
+      ARM64_REG_64(ARM64_REG_X2, ctx->x[2])
+      ARM64_REG_64(ARM64_REG_X3, ctx->x[3])
+      ARM64_REG_64(ARM64_REG_X4, ctx->x[4])
+      ARM64_REG_64(ARM64_REG_X5, ctx->x[5])
+      ARM64_REG_64(ARM64_REG_X6, ctx->x[6])
+      ARM64_REG_64(ARM64_REG_X7, ctx->x[7])
+      ARM64_REG_64(ARM64_REG_X8, ctx->x[8])
+      ARM64_REG_64(ARM64_REG_X9, ctx->x[9])
+      ARM64_REG_64(ARM64_REG_X10, ctx->x[10])
+      ARM64_REG_64(ARM64_REG_X11, ctx->x[11])
+      ARM64_REG_64(ARM64_REG_X12, ctx->x[12])
+      ARM64_REG_64(ARM64_REG_X13, ctx->x[13])
+      ARM64_REG_64(ARM64_REG_X14, ctx->x[14])
+      ARM64_REG_64(ARM64_REG_X15, ctx->x[15])
+      ARM64_REG_64(ARM64_REG_X16, ctx->x[16])
+      ARM64_REG_64(ARM64_REG_X17, ctx->x[17])
+      ARM64_REG_64(ARM64_REG_X18, ctx->x[18])
+      ARM64_REG_64(ARM64_REG_X19, ctx->x[19])
+      ARM64_REG_64(ARM64_REG_X20, ctx->x[20])
+      ARM64_REG_64(ARM64_REG_X21, ctx->x[21])
+      ARM64_REG_64(ARM64_REG_X22, ctx->x[22])
+      ARM64_REG_64(ARM64_REG_X23, ctx->x[23])
+      ARM64_REG_64(ARM64_REG_X24, ctx->x[24])
+      ARM64_REG_64(ARM64_REG_X25, ctx->x[25])
+      ARM64_REG_64(ARM64_REG_X26, ctx->x[26])
+      ARM64_REG_64(ARM64_REG_X27, ctx->x[27])
+      ARM64_REG_64(ARM64_REG_X28, ctx->x[28])
+      ARM64_REG_64(ARM64_REG_FP, ctx->fp)
+      ARM64_REG_64(ARM64_REG_LR, ctx->lr)
+      ARM64_REG_64(ARM64_REG_SP, ctx->sp)
+
+    default:
+      FATAL("Failed to read register: %d", reg);
+      return 0;
+
+  }
+
+}
+
+size_t ctx_get_size(const cs_insn *instr, cs_arm64_op *operand) {
+
+  uint8_t num_registers;
+  uint8_t count_byte;
+  char    vas_digit;
+  size_t  mnemonic_len;
+
+  switch (instr->id) {
+
+    case ARM64_INS_STP:
+    case ARM64_INS_STXP:
+    case ARM64_INS_STNP:
+    case ARM64_INS_STLXP:
+    case ARM64_INS_LDP:
+    case ARM64_INS_LDXP:
+    case ARM64_INS_LDNP:
+      num_registers = 2;
+      break;
+    default:
+      num_registers = 1;
+      break;
+
+  }
+
+  mnemonic_len = strlen(instr->mnemonic);
+  if (mnemonic_len == 0) { FATAL("No mnemonic found"); };
+
+  char last = instr->mnemonic[mnemonic_len - 1];
+  switch (last) {
+
+    case 'b':
+      return 1;
+    case 'h':
+      return 2;
+    case 'w':
+      return 4 * num_registers;
+
+  }
+
+  if (operand->vas == ARM64_VAS_INVALID) {
+
+    if (operand->type == ARM64_OP_REG) {
+
+      switch (operand->reg) {
+
+        case ARM64_REG_WZR:
+        case ARM64_REG_WSP:
+        case ARM64_REG_W0 ... ARM64_REG_W30:
+        case ARM64_REG_S0 ... ARM64_REG_S31:
+          return 4 * num_registers;
+        case ARM64_REG_D0 ... ARM64_REG_D31:
+          return 8 * num_registers;
+        case ARM64_REG_Q0 ... ARM64_REG_Q31:
+          return 16;
+        default:
+          return 8 * num_registers;
+          ;
+
+      }
+
+    }
+
+    return 8 * num_registers;
+
+  }
+
+  if (g_str_has_prefix(instr->mnemonic, "st") ||
+      g_str_has_prefix(instr->mnemonic, "ld")) {
+
+    if (mnemonic_len < 3) {
+
+      FATAL("VAS Mnemonic too short: %s\n", instr->mnemonic);
+
+    }
+
+    vas_digit = instr->mnemonic[2];
+    if (vas_digit < '0' || vas_digit > '9') {
+
+      FATAL("VAS Mnemonic digit out of range: %s\n", instr->mnemonic);
+
+    }
+
+    count_byte = vas_digit - '0';
+
+  } else {
+
+    count_byte = 1;
+
+  }
+
+  switch (operand->vas) {
+
+    case ARM64_VAS_1B:
+      return 1 * count_byte;
+    case ARM64_VAS_1H:
+      return 2 * count_byte;
+    case ARM64_VAS_4B:
+    case ARM64_VAS_1S:
+    case ARM64_VAS_1D:
+    case ARM64_VAS_2H:
+      return 4 * count_byte;
+    case ARM64_VAS_8B:
+    case ARM64_VAS_4H:
+    case ARM64_VAS_2S:
+    case ARM64_VAS_2D:
+    case ARM64_VAS_1Q:
+      return 8 * count_byte;
+    case ARM64_VAS_8H:
+    case ARM64_VAS_4S:
+    case ARM64_VAS_16B:
+      return 16 * count_byte;
+    default:
+      FATAL("Unexpected VAS type: %s %d", instr->mnemonic, operand->vas);
+
+  }
+
+}
+
+#endif
+
diff --git a/frida_mode/src/instrument/instrument.c b/frida_mode/src/instrument/instrument.c
index cd1ac0be..f261e79a 100644
--- a/frida_mode/src/instrument/instrument.c
+++ b/frida_mode/src/instrument/instrument.c
@@ -84,6 +84,8 @@ static void instr_basic_block(GumStalkerIterator *iterator,
 
   while (gum_stalker_iterator_next(iterator, &instr)) {
 
+    if (unlikely(begin)) { instrument_debug_start(instr->address, output); }
+
     if (instr->address == entry_start) { entry_prologue(iterator, output); }
     if (instr->address == persistent_start) { persistent_prologue(output); }
     if (instr->address == persistent_ret) { persistent_epilogue(output); }
@@ -119,8 +121,6 @@ static void instr_basic_block(GumStalkerIterator *iterator,
 
     if (unlikely(begin)) {
 
-      instrument_debug_start(instr->address, output);
-
       prefetch_write(GSIZE_TO_POINTER(instr->address));
 
       if (likely(!excluded)) {
@@ -155,6 +155,7 @@ static void instr_basic_block(GumStalkerIterator *iterator,
 
   }
 
+  instrument_flush(output);
   instrument_debug_end(output);
 
 }
diff --git a/frida_mode/src/instrument/instrument_arm32.c b/frida_mode/src/instrument/instrument_arm32.c
index 1a3c40bb..450a69a3 100644
--- a/frida_mode/src/instrument/instrument_arm32.c
+++ b/frida_mode/src/instrument/instrument_arm32.c
@@ -22,5 +22,17 @@ void instrument_coverage_optimize(const cs_insn *   instr,
 
 }
 
+void instrument_flush(GumStalkerOutput *output) {
+
+  gum_arm_writer_flush(output->writer.arm);
+
+}
+
+gpointer instrument_cur(GumStalkerOutput *output) {
+
+  return gum_arm_writer_cur(output->writer.arm);
+
+}
+
 #endif
 
diff --git a/frida_mode/src/instrument/instrument_arm64.c b/frida_mode/src/instrument/instrument_arm64.c
index fa3afb48..49ee86a2 100644
--- a/frida_mode/src/instrument/instrument_arm64.c
+++ b/frida_mode/src/instrument/instrument_arm64.c
@@ -93,5 +93,17 @@ void instrument_coverage_optimize(const cs_insn *   instr,
 
 }
 
+void instrument_flush(GumStalkerOutput *output) {
+
+  gum_arm64_writer_flush(output->writer.arm64);
+
+}
+
+gpointer instrument_cur(GumStalkerOutput *output) {
+
+  return gum_arm64_writer_cur(output->writer.arm64);
+
+}
+
 #endif
 
diff --git a/frida_mode/src/instrument/instrument_debug.c b/frida_mode/src/instrument/instrument_debug.c
index f8c1df77..0ce26a1c 100644
--- a/frida_mode/src/instrument/instrument_debug.c
+++ b/frida_mode/src/instrument/instrument_debug.c
@@ -7,6 +7,7 @@
 
 #include "debug.h"
 
+#include "instrument.h"
 #include "util.h"
 
 static int      debugging_fd = -1;
@@ -31,44 +32,50 @@ static void instrument_debug(char *format, ...) {
 
 }
 
-static void instrument_disasm(guint8 *code, guint size) {
+static void instrument_disasm(guint8 *start, guint8 *end) {
 
   csh      capstone;
   cs_err   err;
+  uint16_t size;
   cs_insn *insn;
-  size_t   count, i;
+  size_t   count = 0;
+  size_t   i;
+  uint16_t len;
 
   err = cs_open(GUM_DEFAULT_CS_ARCH,
                 GUM_DEFAULT_CS_MODE | GUM_DEFAULT_CS_ENDIAN, &capstone);
   g_assert(err == CS_ERR_OK);
 
-  count = cs_disasm(capstone, code, size, GPOINTER_TO_SIZE(code), 0, &insn);
-  g_assert(insn != NULL);
+  size = GPOINTER_TO_SIZE(end) - GPOINTER_TO_SIZE(start);
 
-  for (i = 0; i != count; i++) {
+  for (guint8 *curr = start; curr < end; curr += len, size -= len, len = 0) {
 
-    instrument_debug("\t0x%" G_GINT64_MODIFIER "x\t%s %s\n", insn[i].address,
-                     insn[i].mnemonic, insn[i].op_str);
+    count = cs_disasm(capstone, curr, size, GPOINTER_TO_SIZE(curr), 0, &insn);
+    if (insn == NULL) {
 
-  }
+      instrument_debug("\t0x%" G_GINT64_MODIFIER "x\t* 0x%016" G_GSIZE_MODIFIER
+                       "x\n",
+                       curr, *(size_t *)curr);
 
-  cs_free(insn, count);
+      len += sizeof(size_t);
+      continue;
 
-  cs_close(&capstone);
+    }
 
-}
+    for (i = 0; i != count; i++) {
+
+      instrument_debug("\t0x%" G_GINT64_MODIFIER "x\t%s %s\n", insn[i].address,
+                       insn[i].mnemonic, insn[i].op_str);
+
+      len += insn[i].size;
 
-static gpointer instrument_cur(GumStalkerOutput *output) {
+    }
 
-#if defined(__i386__) || defined(__x86_64__)
-  return gum_x86_writer_cur(output->writer.x86);
-#elif defined(__aarch64__)
-  return gum_arm64_writer_cur(output->writer.arm64);
-#elif defined(__arm__)
-  return gum_arm_writer_cur(output->writer.arm);
-#else
-  #error "Unsupported architecture"
-#endif
+  }
+
+  cs_free(insn, count);
+
+  cs_close(&capstone);
 
 }
 
@@ -111,7 +118,7 @@ void instrument_debug_instruction(uint64_t address, uint16_t size) {
 
   if (likely(debugging_fd < 0)) { return; }
   uint8_t *start = (uint8_t *)GSIZE_TO_POINTER(address);
-  instrument_disasm(start, size);
+  instrument_disasm(start, start + size);
 
 }
 
@@ -119,11 +126,10 @@ void instrument_debug_end(GumStalkerOutput *output) {
 
   if (likely(debugging_fd < 0)) { return; }
   gpointer instrument_gen_end = instrument_cur(output);
-  uint16_t size = GPOINTER_TO_SIZE(instrument_gen_end) -
-                  GPOINTER_TO_SIZE(instrument_gen_start);
 
-  instrument_debug("\nGenerated block %p\n", instrument_gen_start);
-  instrument_disasm(instrument_gen_start, size);
+  instrument_debug("\nGenerated block %p-%p\n", instrument_gen_start,
+                   instrument_gen_end);
+  instrument_disasm(instrument_gen_start, instrument_gen_end);
 
 }
 
diff --git a/frida_mode/src/instrument/instrument_x64.c b/frida_mode/src/instrument/instrument_x64.c
index 901f3bd0..7000e65d 100644
--- a/frida_mode/src/instrument/instrument_x64.c
+++ b/frida_mode/src/instrument/instrument_x64.c
@@ -89,5 +89,17 @@ void instrument_coverage_optimize(const cs_insn *   instr,
 
 }
 
+void instrument_flush(GumStalkerOutput *output) {
+
+  gum_x86_writer_flush(output->writer.x86);
+
+}
+
+gpointer instrument_cur(GumStalkerOutput *output) {
+
+  return gum_x86_writer_cur(output->writer.x86);
+
+}
+
 #endif
 
diff --git a/frida_mode/src/instrument/instrument_x86.c b/frida_mode/src/instrument/instrument_x86.c
index 585bb5b8..04a19e08 100644
--- a/frida_mode/src/instrument/instrument_x86.c
+++ b/frida_mode/src/instrument/instrument_x86.c
@@ -81,5 +81,17 @@ void instrument_coverage_optimize(const cs_insn *   instr,
 
 }
 
+void instrument_flush(GumStalkerOutput *output) {
+
+  gum_x86_writer_flush(output->writer.x86);
+
+}
+
+gpointer instrument_cur(GumStalkerOutput *output) {
+
+  return gum_x86_writer_cur(output->writer.x86);
+
+}
+
 #endif
 
diff --git a/frida_mode/src/persistent/persistent_arm64.c b/frida_mode/src/persistent/persistent_arm64.c
index 1215d8da..b23693fe 100644
--- a/frida_mode/src/persistent/persistent_arm64.c
+++ b/frida_mode/src/persistent/persistent_arm64.c
@@ -1,9 +1,11 @@
+#include <unistd.h>
 #include "frida-gum.h"
 
 #include "config.h"
 #include "debug.h"
 
 #include "instrument.h"
+#include "persistent.h"
 #include "util.h"
 
 #if defined(__aarch64__)
@@ -98,23 +100,365 @@ struct arm64_regs {
 
 typedef struct arm64_regs arch_api_regs;
 
+static arch_api_regs saved_regs = {0};
+static gpointer      saved_lr = NULL;
+
 gboolean persistent_is_supported(void) {
 
-  return false;
+  return true;
+
+}
+
+static void instrument_persitent_save_regs(GumArm64Writer *   cw,
+                                           struct arm64_regs *regs) {
+
+  GumAddress    regs_address = GUM_ADDRESS(regs);
+  const guint32 mrs_x1_nzcv = 0xd53b4201;
+
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(
+      cw, ARM64_REG_X0, ARM64_REG_X1, ARM64_REG_SP, -(16 + GUM_RED_ZONE_SIZE),
+      GUM_INDEX_PRE_ADJUST);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X3,
+                                              ARM64_REG_SP, -(16),
+                                              GUM_INDEX_PRE_ADJUST);
+
+  gum_arm64_writer_put_instruction(cw, mrs_x1_nzcv);
+
+  gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X0,
+                                       GUM_ADDRESS(regs_address));
+
+  /* Skip x0 & x1 we'll do that later */
+
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X3,
+                                              ARM64_REG_X0, (16 * 1),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X4, ARM64_REG_X5,
+                                              ARM64_REG_X0, (16 * 2),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X6, ARM64_REG_X7,
+                                              ARM64_REG_X0, (16 * 3),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X8, ARM64_REG_X9,
+                                              ARM64_REG_X0, (16 * 4),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X10, ARM64_REG_X11,
+                                              ARM64_REG_X0, (16 * 5),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X12, ARM64_REG_X13,
+                                              ARM64_REG_X0, (16 * 6),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X14, ARM64_REG_X15,
+                                              ARM64_REG_X0, (16 * 7),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X16, ARM64_REG_X17,
+                                              ARM64_REG_X0, (16 * 8),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X18, ARM64_REG_X19,
+                                              ARM64_REG_X0, (16 * 9),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X20, ARM64_REG_X21,
+                                              ARM64_REG_X0, (16 * 10),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X22, ARM64_REG_X23,
+                                              ARM64_REG_X0, (16 * 11),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X24, ARM64_REG_X25,
+                                              ARM64_REG_X0, (16 * 12),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X26, ARM64_REG_X27,
+                                              ARM64_REG_X0, (16 * 13),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X28, ARM64_REG_X29,
+                                              ARM64_REG_X0, (16 * 14),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  /* LR & Adjusted SP */
+  gum_arm64_writer_put_add_reg_reg_imm(cw, ARM64_REG_X2, ARM64_REG_SP,
+                                       (GUM_RED_ZONE_SIZE + 32));
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X30, ARM64_REG_X2,
+                                              ARM64_REG_X0, (16 * 15),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  /* PC & CPSR */
+  gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X2,
+                                       GUM_ADDRESS(persistent_start));
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X1,
+                                              ARM64_REG_X0, (16 * 16),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_Q0, ARM64_REG_Q1,
+                                              ARM64_REG_X0, (16 * 17),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_Q2, ARM64_REG_Q3,
+                                              ARM64_REG_X0, (16 * 18),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_Q4, ARM64_REG_Q5,
+                                              ARM64_REG_X0, (16 * 19),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_Q6, ARM64_REG_Q7,
+                                              ARM64_REG_X0, (16 * 20),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  /* x0 & x1 */
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X3,
+                                              ARM64_REG_SP, 16,
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X3,
+                                              ARM64_REG_X0, (16 * 0),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  /* Pop the saved values */
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(
+      cw, ARM64_REG_X2, ARM64_REG_X3, ARM64_REG_SP, 16, GUM_INDEX_POST_ADJUST);
+
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(
+      cw, ARM64_REG_X0, ARM64_REG_X1, ARM64_REG_SP, 16 + GUM_RED_ZONE_SIZE,
+      GUM_INDEX_POST_ADJUST);
+
+}
+
+static void instrument_persitent_restore_regs(GumArm64Writer *   cw,
+                                              struct arm64_regs *regs) {
+
+  GumAddress    regs_address = GUM_ADDRESS(regs);
+  const guint32 msr_nzcv_x1 = 0xd51b4201;
+
+  gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X0,
+                                       GUM_ADDRESS(regs_address));
+
+  /* Skip x0 - x3 we'll do that last */
+
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X4, ARM64_REG_X5,
+                                              ARM64_REG_X0, (16 * 2),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X6, ARM64_REG_X7,
+                                              ARM64_REG_X0, (16 * 3),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X8, ARM64_REG_X9,
+                                              ARM64_REG_X0, (16 * 4),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X10, ARM64_REG_X11,
+                                              ARM64_REG_X0, (16 * 5),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X12, ARM64_REG_X13,
+                                              ARM64_REG_X0, (16 * 6),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X14, ARM64_REG_X15,
+                                              ARM64_REG_X0, (16 * 7),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X16, ARM64_REG_X17,
+                                              ARM64_REG_X0, (16 * 8),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X18, ARM64_REG_X19,
+                                              ARM64_REG_X0, (16 * 9),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X20, ARM64_REG_X21,
+                                              ARM64_REG_X0, (16 * 10),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X22, ARM64_REG_X23,
+                                              ARM64_REG_X0, (16 * 11),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X24, ARM64_REG_X25,
+                                              ARM64_REG_X0, (16 * 12),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X26, ARM64_REG_X27,
+                                              ARM64_REG_X0, (16 * 13),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X28, ARM64_REG_X29,
+                                              ARM64_REG_X0, (16 * 14),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  /* Don't restore RIP or RSP, use x1-x3 as clobber */
+
+  /* LR & Adjusted SP (clobber x1) */
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X30, ARM64_REG_X1,
+                                              ARM64_REG_X0, (16 * 15),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  /* PC (x2) & CPSR (x1) */
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X1,
+                                              ARM64_REG_X0, (16 * 16),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_instruction(cw, msr_nzcv_x1);
+
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_Q0, ARM64_REG_Q1,
+                                              ARM64_REG_X0, (16 * 17),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_Q2, ARM64_REG_Q3,
+                                              ARM64_REG_X0, (16 * 18),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_Q4, ARM64_REG_Q5,
+                                              ARM64_REG_X0, (16 * 19),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_Q6, ARM64_REG_Q7,
+                                              ARM64_REG_X0, (16 * 20),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+  /* x2 & x3 */
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X3,
+                                              ARM64_REG_X0, (16 * 1),
+                                              GUM_INDEX_SIGNED_OFFSET);
+  /* x0 & x1 */
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(cw, ARM64_REG_X0, ARM64_REG_X1,
+                                              ARM64_REG_X0, (16 * 0),
+                                              GUM_INDEX_SIGNED_OFFSET);
+
+}
+
+static void instrument_exit(GumArm64Writer *cw) {
+
+  gum_arm64_writer_put_mov_reg_reg(cw, ARM64_REG_X0, ARM64_REG_XZR);
+  gum_arm64_writer_put_call_address_with_arguments(
+      cw, GUM_ADDRESS(_exit), 1, GUM_ARG_REGISTER, ARM64_REG_X0);
+
+}
+
+static int instrument_afl_persistent_loop_func(void) {
+
+  int ret = __afl_persistent_loop(persistent_count);
+  previous_pc = 0;
+  return ret;
+
+}
+
+static void instrument_afl_persistent_loop(GumArm64Writer *cw) {
+
+  gum_arm64_writer_put_sub_reg_reg_imm(cw, ARM64_REG_SP, ARM64_REG_SP,
+                                       GUM_RED_ZONE_SIZE);
+  gum_arm64_writer_put_call_address_with_arguments(
+      cw, GUM_ADDRESS(instrument_afl_persistent_loop_func), 0);
+  gum_arm64_writer_put_add_reg_reg_imm(cw, ARM64_REG_SP, ARM64_REG_SP,
+                                       GUM_RED_ZONE_SIZE);
+
+}
+
+static void persistent_prologue_hook(GumArm64Writer *   cw,
+                                     struct arm64_regs *regs) {
+
+  if (hook == NULL) return;
+
+  gum_arm64_writer_put_sub_reg_reg_imm(cw, ARM64_REG_SP, ARM64_REG_SP,
+                                       GUM_RED_ZONE_SIZE);
+  gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X3,
+                                       GUM_ADDRESS(&__afl_fuzz_len));
+  gum_arm64_writer_put_ldr_reg_reg_offset(cw, ARM64_REG_X3, ARM64_REG_X3, 0);
+  gum_arm64_writer_put_ldr_reg_reg_offset(cw, ARM64_REG_X3, ARM64_REG_X3, 0);
+
+  gum_arm64_writer_put_and_reg_reg_imm(cw, ARM64_REG_X3, ARM64_REG_X3,
+                                       G_MAXULONG);
+
+  gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X2,
+                                       GUM_ADDRESS(&__afl_fuzz_ptr));
+  gum_arm64_writer_put_ldr_reg_reg_offset(cw, ARM64_REG_X2, ARM64_REG_X2, 0);
+
+  gum_arm64_writer_put_call_address_with_arguments(
+      cw, GUM_ADDRESS(hook), 4, GUM_ARG_ADDRESS, GUM_ADDRESS(regs),
+      GUM_ARG_ADDRESS, GUM_ADDRESS(0), GUM_ARG_REGISTER, ARM64_REG_X2,
+      GUM_ARG_REGISTER, ARM64_REG_X3);
+
+  gum_arm64_writer_put_add_reg_reg_imm(cw, ARM64_REG_SP, ARM64_REG_SP,
+                                       GUM_RED_ZONE_SIZE);
+
+}
+
+static void instrument_persitent_save_lr(GumArm64Writer *cw) {
+
+  gum_arm64_writer_put_stp_reg_reg_reg_offset(
+      cw, ARM64_REG_X0, ARM64_REG_X1, ARM64_REG_SP, -(16 + GUM_RED_ZONE_SIZE),
+      GUM_INDEX_PRE_ADJUST);
+
+  gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X0,
+                                       GUM_ADDRESS(&saved_lr));
+
+  gum_arm64_writer_put_str_reg_reg_offset(cw, ARM64_REG_LR, ARM64_REG_X0, 0);
+
+  gum_arm64_writer_put_ldp_reg_reg_reg_offset(
+      cw, ARM64_REG_X0, ARM64_REG_X1, ARM64_REG_SP, 16 + GUM_RED_ZONE_SIZE,
+      GUM_INDEX_POST_ADJUST);
 
 }
 
 void persistent_prologue(GumStalkerOutput *output) {
 
-  UNUSED_PARAMETER(output);
-  FATAL("Persistent mode not supported on this architecture");
+  /*
+   *  SAVE REGS
+   *  SAVE RET
+   *  POP RET
+   * loop:
+   *  CALL instrument_afl_persistent_loop
+   *  TEST EAX, EAX
+   *  JZ end:
+   *  call hook (optionally)
+   *  RESTORE REGS
+   *  call original
+   *  jmp loop:
+   *
+   * end:
+   *  JMP SAVED RET
+   *
+   * original:
+   *  INSTRUMENTED PERSISTENT FUNC
+   */
+
+  GumArm64Writer *cw = output->writer.arm64;
+
+  gconstpointer loop = cw->code + 1;
+
+  /* Stack must be 16-byte aligned per ABI */
+  instrument_persitent_save_regs(cw, &saved_regs);
+
+  /* loop: */
+  gum_arm64_writer_put_label(cw, loop);
+
+  /* call instrument_prologue_func */
+  instrument_afl_persistent_loop(cw);
+
+  /* jz done */
+  gconstpointer done = cw->code + 1;
+  gum_arm64_writer_put_cmp_reg_reg(cw, ARM64_REG_X0, ARM64_REG_XZR);
+  gum_arm64_writer_put_b_cond_label(cw, ARM64_CC_EQ, done);
+
+  /* Optionally call the persistent hook */
+  persistent_prologue_hook(cw, &saved_regs);
+
+  instrument_persitent_restore_regs(cw, &saved_regs);
+  gconstpointer original = cw->code + 1;
+  /* call original */
+
+  gum_arm64_writer_put_bl_label(cw, original);
+
+  /* jmp loop */
+  gum_arm64_writer_put_b_label(cw, loop);
+
+  /* done: */
+  gum_arm64_writer_put_label(cw, done);
+
+  instrument_exit(cw);
+
+  /* original: */
+  gum_arm64_writer_put_label(cw, original);
+
+  instrument_persitent_save_lr(cw);
+
+  if (persistent_debug) { gum_arm64_writer_put_brk_imm(cw, 0); }
 
 }
 
 void persistent_epilogue(GumStalkerOutput *output) {
 
-  UNUSED_PARAMETER(output);
-  FATAL("Persistent mode not supported on this architecture");
+  GumArm64Writer *cw = output->writer.arm64;
+
+  if (persistent_debug) { gum_arm64_writer_put_brk_imm(cw, 0); }
+
+  gum_arm64_writer_put_add_reg_reg_imm(cw, ARM64_REG_SP, ARM64_REG_SP,
+                                       persistent_ret_offset);
+
+  gum_arm64_writer_put_ldr_reg_address(cw, ARM64_REG_X0,
+                                       GUM_ADDRESS(&saved_lr));
+
+  gum_arm64_writer_put_ldr_reg_reg_offset(cw, ARM64_REG_X0, ARM64_REG_X0, 0);
+
+  gum_arm64_writer_put_br_reg(cw, ARM64_REG_X0);
 
 }
 
diff --git a/frida_mode/src/persistent/persistent_x64.c b/frida_mode/src/persistent/persistent_x64.c
index 4cb960fc..858ad38e 100644
--- a/frida_mode/src/persistent/persistent_x64.c
+++ b/frida_mode/src/persistent/persistent_x64.c
@@ -306,8 +306,6 @@ void persistent_prologue(GumStalkerOutput *output) {
 
   if (persistent_debug) { gum_x86_writer_put_breakpoint(cw); }
 
-  gum_x86_writer_flush(cw);
-
 }
 
 void persistent_epilogue(GumStalkerOutput *output) {
diff --git a/frida_mode/src/persistent/persistent_x86.c b/frida_mode/src/persistent/persistent_x86.c
index b30dfadf..0675edf4 100644
--- a/frida_mode/src/persistent/persistent_x86.c
+++ b/frida_mode/src/persistent/persistent_x86.c
@@ -246,8 +246,6 @@ void persistent_prologue(GumStalkerOutput *output) {
 
   if (persistent_debug) { gum_x86_writer_put_breakpoint(cw); }
 
-  gum_x86_writer_flush(cw);
-
 }
 
 void persistent_epilogue(GumStalkerOutput *output) {
diff --git a/frida_mode/src/stats/stats.c b/frida_mode/src/stats/stats.c
index 662fb6d5..0d7b9fb0 100644
--- a/frida_mode/src/stats/stats.c
+++ b/frida_mode/src/stats/stats.c
@@ -96,7 +96,6 @@ void stats_init(void) {
 void stats_vprint(int fd, char *format, va_list ap) {
 
   char buffer[4096] = {0};
-  int  ret;
   int  len;
 
   if (vsnprintf(buffer, sizeof(buffer) - 1, format, ap) < 0) { return; }
diff --git a/frida_mode/src/stats/stats_arm.c b/frida_mode/src/stats/stats_arm32.c
index 7eea7f91..7eea7f91 100644
--- a/frida_mode/src/stats/stats_arm.c
+++ b/frida_mode/src/stats/stats_arm32.c
diff --git a/frida_mode/test/cmplog/GNUmakefile b/frida_mode/test/cmplog/GNUmakefile
index 40de6a09..4c71bb33 100644
--- a/frida_mode/test/cmplog/GNUmakefile
+++ b/frida_mode/test/cmplog/GNUmakefile
@@ -13,7 +13,7 @@ CMP_LOG_INPUT:=$(TEST_DATA_DIR)in
 QEMU_OUT:=$(BUILD_DIR)qemu-out
 FRIDA_OUT:=$(BUILD_DIR)frida-out
 
-.PHONY: all 32 clean qemu frida format
+.PHONY: all 32 clean qemu frida frida-nocmplog format
 
 all: $(TEST_CMPLOG_OBJ)
 	make -C $(ROOT)frida_mode/
@@ -55,6 +55,15 @@ frida: $(TEST_CMPLOG_OBJ) $(CMP_LOG_INPUT)
 		-- \
 			$(TEST_CMPLOG_OBJ) @@
 
+frida-nocmplog: $(TEST_CMPLOG_OBJ) $(CMP_LOG_INPUT)
+	$(ROOT)afl-fuzz \
+		-O \
+		-i $(TEST_DATA_DIR) \
+		-o $(FRIDA_OUT) \
+		-Z \
+		-- \
+			$(TEST_CMPLOG_OBJ) @@
+
 debug: $(TEST_CMPLOG_OBJ) $(CMP_LOG_INPUT)
 	gdb \
 		--ex 'set environment LD_PRELOAD=$(ROOT)afl-frida-trace.so' \
diff --git a/frida_mode/test/cmplog/Makefile b/frida_mode/test/cmplog/Makefile
index 606b43a5..7ca9a9a5 100644
--- a/frida_mode/test/cmplog/Makefile
+++ b/frida_mode/test/cmplog/Makefile
@@ -15,6 +15,10 @@ qemu:
 frida:
 	@gmake frida
 
+
+frida-nocmplog:
+	@gmake frida-nocmplog
+
 format:
 	@gmake format
 
diff --git a/frida_mode/test/cmplog/cmplog.c b/frida_mode/test/cmplog/cmplog.c
index 99010645..ce5cf20e 100644
--- a/frida_mode/test/cmplog/cmplog.c
+++ b/frida_mode/test/cmplog/cmplog.c
@@ -53,7 +53,7 @@ int main(int argc, char **argv) {
 
   }
 
-#if defined(__x86_64__)
+#if defined(__x86_64__) || defined(__aarch64__)
   uint64_t x = 0;
   fread(&x, sizeof(x), 1, file);
   if (x != 0xCAFEBABECAFEBABE) {
diff --git a/frida_mode/test/fasan/GNUmakefile b/frida_mode/test/fasan/GNUmakefile
index 08b271de..c971c724 100644
--- a/frida_mode/test/fasan/GNUmakefile
+++ b/frida_mode/test/fasan/GNUmakefile
@@ -46,7 +46,7 @@ ifeq "$(ARCH)" "x86_64"
 LIBASAN_FILE:=libclang_rt.asan-x86_64.so
 endif
 
-ifeq "$(ARCH)" "aarch64"
+ifeq "$(ARCH)" "arm64"
 LIBASAN_FILE:=libclang_rt.asan-aarch64.so
 endif
 
@@ -110,7 +110,7 @@ $(TEST_DATA_DIR): | $(BUILD_DIR)
 	mkdir -p $@
 
 $(TEST_DATA_FILE): | $(TEST_DATA_DIR)
-	echo -n "TUODATM" > $@
+	echo -n "XUODATM" > $@
 
 frida-noasan: $(TEST_BIN) $(TEST_DATA_FILE)
 		$(ROOT)afl-fuzz \
diff --git a/frida_mode/test/persistent_ret/GNUmakefile b/frida_mode/test/persistent_ret/GNUmakefile
index df48d065..4c9d8a19 100644
--- a/frida_mode/test/persistent_ret/GNUmakefile
+++ b/frida_mode/test/persistent_ret/GNUmakefile
@@ -85,7 +85,7 @@ frida_ret: $(TESTINSTBIN) $(TESTINSTR_DATA_FILE)
 		-- \
 			$(TESTINSTBIN) @@
 
-debug: $(TESTINSTR_DATA_FILE)
+debug: $(TESTINSTBIN) $(TESTINSTR_DATA_FILE)
 	gdb \
 		--ex 'set environment AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR)' \
 		--ex 'set environment AFL_FRIDA_PERSISTENT_RET=$(AFL_FRIDA_PERSISTENT_RET)' \
@@ -96,7 +96,7 @@ debug: $(TESTINSTR_DATA_FILE)
 		--ex 'set disassembly-flavor intel' \
 		--args $(TESTINSTBIN) $(TESTINSTR_DATA_FILE)
 
-run: $(TESTINSTR_DATA_FILE)
+run: $(TESTINSTBIN) $(TESTINSTR_DATA_FILE)
 	AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR) \
 	AFL_FRIDA_PERSISTENT_RET=$(AFL_FRIDA_PERSISTENT_RET) \
 	AFL_FRIDA_PERSISTENT_RETADDR_OFFSET=$(AFL_FRIDA_PERSISTENT_RETADDR_OFFSET) \
diff --git a/frida_mode/test/png/persistent/GNUmakefile b/frida_mode/test/png/persistent/GNUmakefile
index ca6f0ff2..5af64822 100644
--- a/frida_mode/test/png/persistent/GNUmakefile
+++ b/frida_mode/test/png/persistent/GNUmakefile
@@ -5,6 +5,7 @@ BUILD_DIR:=$(PWD)build/
 TEST_BIN:=$(PWD)../build/test
 TEST_DATA_DIR:=../build/libpng/libpng-1.2.56/contrib/pngsuite/
 
+AFLPP_DRIVER_DUMMY_INPUT:=$(BUILD_DIR)in
 QEMU_OUT:=$(BUILD_DIR)qemu-out
 FRIDA_OUT:=$(BUILD_DIR)frida-out
 
@@ -22,8 +23,7 @@ endif
 
 AFL_QEMU_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s main -b 0x4000000000)
 
-ARCH=$(shell uname -m)
-ifeq "$(ARCH)" "aarch64"
+ifeq "$(ARCH)" "arm64"
  AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)get_symbol_addr.py -f $(TEST_BIN) -s main -b 0x0000aaaaaaaaa000)
 endif
 
@@ -46,6 +46,9 @@ all:
 $(BUILD_DIR):
 	mkdir -p $@
 
+$(AFLPP_DRIVER_DUMMY_INPUT): | $(BUILD_DIR)
+	truncate -s 1M $@
+
 qemu: | $(BUILD_DIR)
 	AFL_QEMU_PERSISTENT_ADDR=$(AFL_QEMU_PERSISTENT_ADDR) \
 	AFL_QEMU_PERSISTENT_GPR=1 \
@@ -94,5 +97,12 @@ frida_entry: | $(BUILD_DIR)
 		-- \
 			$(TEST_BIN) @@
 
+debug: $(AFLPP_DRIVER_DUMMY_INPUT)
+	gdb \
+		--ex 'set environment LD_PRELOAD=$(ROOT)afl-frida-trace.so' \
+		--ex 'set environment AFL_FRIDA_PERSISTENT_ADDR=$(AFL_FRIDA_PERSISTENT_ADDR)' \
+		--ex 'set disassembly-flavor intel' \
+		--args $(TEST_BIN) $(AFLPP_DRIVER_DUMMY_INPUT)
+
 clean:
 	rm -rf $(BUILD_DIR)
diff --git a/frida_mode/test/png/persistent/Makefile b/frida_mode/test/png/persistent/Makefile
index cde0cf30..c2bd55f9 100644
--- a/frida_mode/test/png/persistent/Makefile
+++ b/frida_mode/test/png/persistent/Makefile
@@ -20,3 +20,6 @@ frida:
 
 frida_entry:
 	@gmake frida_entry
+
+debug:
+	@gmake debug
diff --git a/frida_mode/test/png/persistent/hook/GNUmakefile b/frida_mode/test/png/persistent/hook/GNUmakefile
index 82f08fa4..b17f3775 100644
--- a/frida_mode/test/png/persistent/hook/GNUmakefile
+++ b/frida_mode/test/png/persistent/hook/GNUmakefile
@@ -34,7 +34,7 @@ endif
 
 AFL_QEMU_PERSISTENT_ADDR=$(shell $(PWD)../get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x4000000000)
 
-ifeq "$(ARCH)" "aarch64"
+ifeq "$(ARCH)" "arm64"
  AFL_FRIDA_PERSISTENT_ADDR=$(shell $(PWD)../get_symbol_addr.py -f $(TEST_BIN) -s LLVMFuzzerTestOneInput -b 0x0000aaaaaaaaa000)
 endif
 
@@ -124,7 +124,7 @@ frida_entry: $(AFLPP_DRIVER_DUMMY_INPUT) $(AFLPP_DRIVER_HOOK_OBJ) | $(BUILD_DIR)
 		-- \
 			$(TEST_BIN) $(AFLPP_DRIVER_DUMMY_INPUT)
 
-debug:
+debug: $(AFLPP_DRIVER_DUMMY_INPUT)
 	echo $(AFL_FRIDA_PERSISTENT_ADDR)
 	gdb \
 		--ex 'set environment LD_PRELOAD=$(ROOT)afl-frida-trace.so' \
diff --git a/frida_mode/test/png/persistent/hook/aflpp_qemu_driver_hook.c b/frida_mode/test/png/persistent/hook/aflpp_qemu_driver_hook.c
index 059d438d..1542c0bf 100644
--- a/frida_mode/test/png/persistent/hook/aflpp_qemu_driver_hook.c
+++ b/frida_mode/test/png/persistent/hook/aflpp_qemu_driver_hook.c
@@ -82,6 +82,102 @@ void afl_persistent_hook(struct x86_regs *regs, uint64_t guest_base,
   *arg2 = (void *)input_buf_len;
 
 }
+#elif defined(__aarch64__)
+
+struct arm64_regs {
+
+  uint64_t x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10;
+
+  union {
+
+    uint64_t x11;
+    uint32_t fp_32;
+
+  };
+
+  union {
+
+    uint64_t x12;
+    uint32_t ip_32;
+
+  };
+
+  union {
+
+    uint64_t x13;
+    uint32_t sp_32;
+
+  };
+
+  union {
+
+    uint64_t x14;
+    uint32_t lr_32;
+
+  };
+
+  union {
+
+    uint64_t x15;
+    uint32_t pc_32;
+
+  };
+
+  union {
+
+    uint64_t x16;
+    uint64_t ip0;
+
+  };
+
+  union {
+
+    uint64_t x17;
+    uint64_t ip1;
+
+  };
+
+  uint64_t x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28;
+
+  union {
+
+    uint64_t x29;
+    uint64_t fp;
+
+  };
+
+  union {
+
+    uint64_t x30;
+    uint64_t lr;
+
+  };
+
+  union {
+
+    uint64_t x31;
+    uint64_t sp;
+
+  };
+
+  // the zero register is not saved here ofc
+
+  uint64_t pc;
+
+  uint32_t cpsr;
+
+  uint8_t  vfp_zregs[32][16 * 16];
+  uint8_t  vfp_pregs[17][32];
+  uint32_t vfp_xregs[16];
+
+};
+
+void afl_persistent_hook(struct arm64_regs *regs, uint64_t guest_base,
+                         uint8_t *input_buf, uint32_t input_buf_len) {
+
+  memcpy((void *)regs->x0, input_buf, input_buf_len);
+  regs->x1 = input_buf_len;
+}
 
 #else
   #pragma error "Unsupported architecture"
diff --git a/instrumentation/README.llvm.md b/instrumentation/README.llvm.md
index 8ce5afb9..2d428e6d 100644
--- a/instrumentation/README.llvm.md
+++ b/instrumentation/README.llvm.md
@@ -6,7 +6,7 @@
 
 ## 1) Introduction
 
-! llvm_mode works with llvm versions 6.0 up to 12 !
+! llvm_mode works with llvm versions 3.8 up to 12 !
 
 The code in this directory allows you to instrument programs for AFL using
 true compiler-level instrumentation, instead of the more crude
diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
index 2089ce78..50117012 100644
--- a/instrumentation/afl-compiler-rt.o.c
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -83,14 +83,15 @@ extern ssize_t _kern_write(int fd, off_t pos, const void *buffer,
                            size_t bufferSize);
 #endif  // HAIKU
 
-u8   __afl_area_initial[MAP_INITIAL_SIZE];
-u8 * __afl_area_ptr_dummy = __afl_area_initial;
-u8 * __afl_area_ptr = __afl_area_initial;
-u8 * __afl_area_ptr_backup = __afl_area_initial;
-u8 * __afl_dictionary;
-u8 * __afl_fuzz_ptr;
-u32  __afl_fuzz_len_dummy;
-u32 *__afl_fuzz_len = &__afl_fuzz_len_dummy;
+static u8  __afl_area_initial[MAP_INITIAL_SIZE];
+static u8 *__afl_area_ptr_dummy = __afl_area_initial;
+static u8 *__afl_area_ptr_backup = __afl_area_initial;
+
+u8 *       __afl_area_ptr = __afl_area_initial;
+u8 *       __afl_dictionary;
+u8 *       __afl_fuzz_ptr;
+static u32 __afl_fuzz_len_dummy;
+u32 *      __afl_fuzz_len = &__afl_fuzz_len_dummy;
 
 u32 __afl_final_loc;
 u32 __afl_map_size = MAP_SIZE;
@@ -98,9 +99,9 @@ u32 __afl_dictionary_len;
 u64 __afl_map_addr;
 
 // for the __AFL_COVERAGE_ON/__AFL_COVERAGE_OFF features to work:
-int __afl_selective_coverage __attribute__((weak));
-int __afl_selective_coverage_start_off __attribute__((weak));
-int __afl_selective_coverage_temp = 1;
+int        __afl_selective_coverage __attribute__((weak));
+int        __afl_selective_coverage_start_off __attribute__((weak));
+static int __afl_selective_coverage_temp = 1;
 
 #if defined(__ANDROID__) || defined(__HAIKU__)
 PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX];
@@ -147,7 +148,7 @@ static int __afl_dummy_fd[2] = {2, 2};
 
 /* ensure we kill the child on termination */
 
-void at_exit(int signal) {
+static void at_exit(int signal) {
 
   if (child_pid > 0) { kill(child_pid, SIGKILL); }
 
@@ -179,7 +180,7 @@ void __afl_trace(const u32 x) {
 
 /* Error reporting to forkserver controller */
 
-void send_forkserver_error(int error) {
+static void send_forkserver_error(int error) {
 
   u32 status;
   if (!error || error > 0xffff) return;
@@ -629,6 +630,32 @@ static void __afl_unmap_shm(void) {
 
 }
 
+#define write_error(text) write_error_with_location(text, __FILE__, __LINE__)
+
+void write_error_with_location(char *text, char* filename, int linenumber) {
+
+  u8 *  o = getenv("__AFL_OUT_DIR");
+  char *e = strerror(errno);
+
+  if (o) {
+
+    char buf[4096];
+    snprintf(buf, sizeof(buf), "%s/error.txt", o);
+    FILE *f = fopen(buf, "a");
+
+    if (f) {
+
+      fprintf(f, "File %s, line %d: Error(%s): %s\n", filename, linenumber, text, e);
+      fclose(f);
+
+    }
+
+  }
+
+  fprintf(stderr, "File %s, line %d: Error(%s): %s\n", filename, linenumber, text, e);
+
+}
+
 #ifdef __linux__
 static void __afl_start_snapshots(void) {
 
@@ -655,7 +682,12 @@ static void __afl_start_snapshots(void) {
 
   if (__afl_sharedmem_fuzzing || (__afl_dictionary_len && __afl_dictionary)) {
 
-    if (read(FORKSRV_FD, &was_killed, 4) != 4) { _exit(1); }
+    if (read(FORKSRV_FD, &was_killed, 4) != 4) {
+
+      write_error("read to afl-fuzz");
+      _exit(1);
+
+    }
 
     if (__afl_debug) {
 
@@ -724,7 +756,12 @@ static void __afl_start_snapshots(void) {
     } else {
 
       /* Wait for parent by reading from the pipe. Abort if read fails. */
-      if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
+      if (read(FORKSRV_FD, &was_killed, 4) != 4) {
+
+        write_error("reading from afl-fuzz");
+        _exit(1);
+
+      }
 
     }
 
@@ -761,7 +798,12 @@ static void __afl_start_snapshots(void) {
     if (child_stopped && was_killed) {
 
       child_stopped = 0;
-      if (waitpid(child_pid, &status, 0) < 0) _exit(1);
+      if (waitpid(child_pid, &status, 0) < 0) {
+
+        write_error("child_stopped && was_killed");
+        _exit(1);  // TODO why exit?
+
+      }
 
     }
 
@@ -770,7 +812,12 @@ static void __afl_start_snapshots(void) {
       /* Once woken up, create a clone of our process. */
 
       child_pid = fork();
-      if (child_pid < 0) _exit(1);
+      if (child_pid < 0) {
+
+        write_error("fork");
+        _exit(1);
+
+      }
 
       /* In child process: close fds, resume execution. */
 
@@ -810,9 +857,19 @@ static void __afl_start_snapshots(void) {
 
     /* In parent process: write PID to pipe, then wait for child. */
 
-    if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) _exit(1);
+    if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) {
+
+      write_error("write to afl-fuzz");
+      _exit(1);
 
-    if (waitpid(child_pid, &status, WUNTRACED) < 0) _exit(1);
+    }
+
+    if (waitpid(child_pid, &status, WUNTRACED) < 0) {
+
+      write_error("waitpid");
+      _exit(1);
+
+    }
 
     /* In persistent mode, the child stops itself with SIGSTOP to indicate
        a successful run. In this case, we want to wake it up without forking
@@ -822,7 +879,12 @@ static void __afl_start_snapshots(void) {
 
     /* Relay wait status to pipe, then loop back. */
 
-    if (write(FORKSRV_FD + 1, &status, 4) != 4) _exit(1);
+    if (write(FORKSRV_FD + 1, &status, 4) != 4) {
+
+      write_error("writing to afl-fuzz");
+      _exit(1);
+
+    }
 
   }
 
@@ -955,7 +1017,12 @@ static void __afl_start_forkserver(void) {
 
     } else {
 
-      if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
+      if (read(FORKSRV_FD, &was_killed, 4) != 4) {
+
+        write_error("read from afl-fuzz");
+        _exit(1);
+
+      }
 
     }
 
@@ -992,7 +1059,12 @@ static void __afl_start_forkserver(void) {
     if (child_stopped && was_killed) {
 
       child_stopped = 0;
-      if (waitpid(child_pid, &status, 0) < 0) _exit(1);
+      if (waitpid(child_pid, &status, 0) < 0) {
+
+        write_error("child_stopped && was_killed");
+        _exit(1);
+
+      }
 
     }
 
@@ -1001,7 +1073,12 @@ static void __afl_start_forkserver(void) {
       /* Once woken up, create a clone of our process. */
 
       child_pid = fork();
-      if (child_pid < 0) _exit(1);
+      if (child_pid < 0) {
+
+        write_error("fork");
+        _exit(1);
+
+      }
 
       /* In child process: close fds, resume execution. */
 
@@ -1030,11 +1107,20 @@ static void __afl_start_forkserver(void) {
 
     /* In parent process: write PID to pipe, then wait for child. */
 
-    if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) _exit(1);
+    if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) {
+
+      write_error("write to afl-fuzz");
+      _exit(1);
+
+    }
+
+    if (waitpid(child_pid, &status, is_persistent ? WUNTRACED : 0) < 0) {
 
-    if (waitpid(child_pid, &status, is_persistent ? WUNTRACED : 0) < 0)
+      write_error("waitpid");
       _exit(1);
 
+    }
+
     /* In persistent mode, the child stops itself with SIGSTOP to indicate
        a successful run. In this case, we want to wake it up without forking
        again. */
@@ -1043,7 +1129,12 @@ static void __afl_start_forkserver(void) {
 
     /* Relay wait status to pipe, then loop back. */
 
-    if (write(FORKSRV_FD + 1, &status, 4) != 4) _exit(1);
+    if (write(FORKSRV_FD + 1, &status, 4) != 4) {
+
+      write_error("writing to afl-fuzz");
+      _exit(1);
+
+    }
 
   }
 
@@ -1668,7 +1759,7 @@ void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2) {
 
 }
 
-void __sanitizer_cov_trace_cost_cmp4(uint32_t arg1, uint32_t arg2) {
+void __sanitizer_cov_trace_const_cmp4(uint32_t arg1, uint32_t arg2) {
 
   __cmplog_ins_hook4(arg1, arg2, 0);
 
@@ -1990,3 +2081,4 @@ void __afl_coverage_interesting(u8 val, u32 id) {
 
 }
 
+#undef write_error
diff --git a/instrumentation/afl-llvm-pass.so.cc b/instrumentation/afl-llvm-pass.so.cc
index 6fe34ccd..94b77f7d 100644
--- a/instrumentation/afl-llvm-pass.so.cc
+++ b/instrumentation/afl-llvm-pass.so.cc
@@ -676,7 +676,7 @@ bool AFLCoverage::runOnModule(Module &M) {
                                                todo.push_back(MapPtrIdx);
                              
                                              } else {
-
+                             
                                      */
         IRB.CreateAtomicRMW(llvm::AtomicRMWInst::BinOp::Add, MapPtrIdx, One,
 #if LLVM_VERSION_MAJOR >= 13
diff --git a/src/afl-analyze.c b/src/afl-analyze.c
index aabdbf1a..606254d9 100644
--- a/src/afl-analyze.c
+++ b/src/afl-analyze.c
@@ -55,12 +55,7 @@
 #include <sys/types.h>
 #include <sys/resource.h>
 
-static s32 child_pid;                  /* PID of the tested program         */
-
-static u8 *trace_bits;                 /* SHM with instrumentation bitmap   */
-
-static u8 *in_file,                    /* Analyzer input test case          */
-    *prog_in;                          /* Targeted program input file       */
+static u8 *in_file;                    /* Analyzer input test case          */
 
 static u8 *in_data;                    /* Input data for analysis           */
 
@@ -73,20 +68,19 @@ static u64 orig_cksum;                 /* Original checksum                 */
 
 static u64 mem_limit = MEM_LIMIT;      /* Memory limit (MB)                 */
 
-static s32 dev_null_fd = -1;           /* FD to /dev/null                   */
-
 static bool edges_only,                  /* Ignore hit counts?              */
     use_hex_offsets,                   /* Show hex offsets?                 */
     use_stdin = true;                     /* Use stdin for program input?   */
 
-static volatile u8 stop_soon,          /* Ctrl-C pressed?                   */
-    child_timed_out;                   /* Child timed out?                  */
+static volatile u8 stop_soon;          /* Ctrl-C pressed?                   */
 
 static u8 *target_path;
 static u8  frida_mode;
 static u8  qemu_mode;
 static u32 map_size = MAP_SIZE;
 
+static afl_forkserver_t fsrv = {0};   /* The forkserver                     */
+
 /* Constants used for describing byte behavior. */
 
 #define RESP_NONE 0x00                 /* Changing byte is a no-op.         */
@@ -156,7 +150,7 @@ static void classify_counts(u8 *mem) {
 
 static inline u8 anything_set(void) {
 
-  u32 *ptr = (u32 *)trace_bits;
+  u32 *ptr = (u32 *)fsrv.trace_bits;
   u32  i = (map_size >> 2);
 
   while (i--) {
@@ -173,7 +167,7 @@ static inline u8 anything_set(void) {
 
 static void at_exit_handler(void) {
 
-  unlink(prog_in);                                         /* Ignore errors */
+  unlink(fsrv.out_file);                                         /* Ignore errors */
 
 }
 
@@ -205,116 +199,29 @@ static void read_initial_file(void) {
 
 }
 
-/* Write output file. */
-
-static s32 write_to_file(u8 *path, u8 *mem, u32 len) {
-
-  s32 ret;
-
-  unlink(path);                                            /* Ignore errors */
-
-  ret = open(path, O_RDWR | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
-
-  if (ret < 0) { PFATAL("Unable to create '%s'", path); }
-
-  ck_write(ret, mem, len, path);
-
-  lseek(ret, 0, SEEK_SET);
-
-  return ret;
-
-}
-
 /* Execute target application. Returns exec checksum, or 0 if program
    times out. */
 
-static u32 analyze_run_target(char **argv, u8 *mem, u32 len, u8 first_run) {
-
-  static struct itimerval it;
-  int                     status = 0;
-
-  s32 prog_in_fd;
-  u64 cksum;
-
-  memset(trace_bits, 0, map_size);
-  MEM_BARRIER();
-
-  prog_in_fd = write_to_file(prog_in, mem, len);
-
-  child_pid = fork();
-
-  if (child_pid < 0) { PFATAL("fork() failed"); }
-
-  if (!child_pid) {
-
-    struct rlimit r;
-
-    if (dup2(use_stdin ? prog_in_fd : dev_null_fd, 0) < 0 ||
-        dup2(dev_null_fd, 1) < 0 || dup2(dev_null_fd, 2) < 0) {
-
-      *(u32 *)trace_bits = EXEC_FAIL_SIG;
-      PFATAL("dup2() failed");
-
-    }
-
-    close(dev_null_fd);
-    close(prog_in_fd);
-
-    if (mem_limit) {
-
-      r.rlim_max = r.rlim_cur = ((rlim_t)mem_limit) << 20;
-
-#ifdef RLIMIT_AS
-
-      setrlimit(RLIMIT_AS, &r);                            /* Ignore errors */
+static u32 analyze_run_target(u8 *mem, u32 len, u8 first_run) {
 
-#else
-
-      setrlimit(RLIMIT_DATA, &r);                          /* Ignore errors */
-
-#endif                                                        /* ^RLIMIT_AS */
-
-    }
-
-    r.rlim_max = r.rlim_cur = 0;
-    setrlimit(RLIMIT_CORE, &r);                            /* Ignore errors */
-
-    execv(target_path, argv);
-
-    *(u32 *)trace_bits = EXEC_FAIL_SIG;
-    exit(0);
+  afl_fsrv_write_to_testcase(&fsrv, mem, len);
+  fsrv_run_result_t ret = afl_fsrv_run_target(&fsrv, exec_tmout, &stop_soon);
 
-  }
-
-  close(prog_in_fd);
-
-  /* Configure timeout, wait for child, cancel timeout. */
-
-  child_timed_out = 0;
-  it.it_value.tv_sec = (exec_tmout / 1000);
-  it.it_value.tv_usec = (exec_tmout % 1000) * 1000;
+  if (ret == FSRV_RUN_ERROR) {
 
-  setitimer(ITIMER_REAL, &it, NULL);
+    FATAL("Error in forkserver");
 
-  if (waitpid(child_pid, &status, 0) <= 0) { FATAL("waitpid() failed"); }
+  } else if (ret == FSRV_RUN_NOINST) {
 
-  child_pid = 0;
-  it.it_value.tv_sec = 0;
-  it.it_value.tv_usec = 0;
+    FATAL("Target not instrumented");
 
-  setitimer(ITIMER_REAL, &it, NULL);
+  } else if (ret == FSRV_RUN_NOBITS) {
 
-  MEM_BARRIER();
-
-  /* Clean up bitmap, analyze exit condition, etc. */
-
-  if (*(u32 *)trace_bits == EXEC_FAIL_SIG) {
-
-    FATAL("Unable to execute '%s'", argv[0]);
+    FATAL("Failed to run target");
 
   }
 
-  classify_counts(trace_bits);
+  classify_counts(fsrv.trace_bits);
   total_execs++;
 
   if (stop_soon) {
@@ -326,21 +233,19 @@ static u32 analyze_run_target(char **argv, u8 *mem, u32 len, u8 first_run) {
 
   /* Always discard inputs that time out. */
 
-  if (child_timed_out) {
+  if (fsrv.last_run_timed_out) {
 
     exec_hangs++;
     return 0;
 
   }
 
-  cksum = hash64(trace_bits, map_size, HASH_CONST);
+  u64 cksum = hash64(fsrv.trace_bits, fsrv.map_size, HASH_CONST);
 
-  /* We don't actually care if the target is crashing or not,
-     except that when it does, the checksum should be different. */
+  if (ret == FSRV_RUN_CRASH) {
 
-  if (WIFSIGNALED(status) ||
-      (WIFEXITED(status) && WEXITSTATUS(status) == MSAN_ERROR) ||
-      (WIFEXITED(status) && WEXITSTATUS(status))) {
+    /* We don't actually care if the target is crashing or not,
+       except that when it does, the checksum should be different. */
 
     cksum ^= 0xffffffff;
 
@@ -604,7 +509,7 @@ static void dump_hex(u32 len, u8 *b_data) {
 
 /* Actually analyze! */
 
-static void analyze(char **argv) {
+static void analyze() {
 
   u32 i;
   u32 boring_len = 0, prev_xff = 0, prev_x01 = 0, prev_s10 = 0, prev_a10 = 0;
@@ -630,16 +535,16 @@ static void analyze(char **argv) {
        code. */
 
     in_data[i] ^= 0xff;
-    xor_ff = analyze_run_target(argv, in_data, in_len, 0);
+    xor_ff = analyze_run_target(in_data, in_len, 0);
 
     in_data[i] ^= 0xfe;
-    xor_01 = analyze_run_target(argv, in_data, in_len, 0);
+    xor_01 = analyze_run_target(in_data, in_len, 0);
 
     in_data[i] = (in_data[i] ^ 0x01) - 0x10;
-    sub_10 = analyze_run_target(argv, in_data, in_len, 0);
+    sub_10 = analyze_run_target(in_data, in_len, 0);
 
     in_data[i] += 0x20;
-    add_10 = analyze_run_target(argv, in_data, in_len, 0);
+    add_10 = analyze_run_target(in_data, in_len, 0);
     in_data[i] -= 0x10;
 
     /* Classify current behavior. */
@@ -712,7 +617,7 @@ static void handle_stop_sig(int sig) {
   (void)sig;
   stop_soon = 1;
 
-  if (child_pid > 0) { kill(child_pid, SIGKILL); }
+  afl_fsrv_killall();
 
 }
 
@@ -724,10 +629,10 @@ static void set_up_environment(char **argv) {
   char *afl_preload;
   char *frida_afl_preload = NULL;
 
-  dev_null_fd = open("/dev/null", O_RDWR);
-  if (dev_null_fd < 0) { PFATAL("Unable to open /dev/null"); }
+  fsrv.dev_null_fd = open("/dev/null", O_RDWR);
+  if (fsrv.dev_null_fd < 0) { PFATAL("Unable to open /dev/null"); }
 
-  if (!prog_in) {
+  if (!fsrv.out_file) {
 
     u8 *use_dir = ".";
 
@@ -738,10 +643,15 @@ static void set_up_environment(char **argv) {
 
     }
 
-    prog_in = alloc_printf("%s/.afl-analyze-temp-%u", use_dir, (u32)getpid());
+    fsrv.out_file = alloc_printf("%s/.afl-analyze-temp-%u", use_dir, (u32)getpid());
 
   }
 
+  unlink(fsrv.out_file);
+  fsrv.out_fd = open(fsrv.out_file, O_RDWR | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
+
+  if (fsrv.out_fd < 0) { PFATAL("Unable to create '%s'", fsrv.out_file); }
+
   /* Set sane defaults... */
 
   x = get_afl_env("ASAN_OPTIONS");
@@ -965,6 +875,8 @@ int main(int argc, char **argv_orig, char **envp) {
 
   SAYF(cCYA "afl-analyze" VERSION cRST " by Michal Zalewski\n");
 
+  afl_fsrv_init(&fsrv);
+
   while ((opt = getopt(argc, argv, "+i:f:m:t:eOQUWh")) > 0) {
 
     switch (opt) {
@@ -977,9 +889,9 @@ int main(int argc, char **argv_orig, char **envp) {
 
       case 'f':
 
-        if (prog_in) { FATAL("Multiple -f options not supported"); }
-        use_stdin = 0;
-        prog_in = optarg;
+        if (fsrv.out_file) { FATAL("Multiple -f options not supported"); }
+        fsrv.use_stdin = 0;
+        fsrv.out_file = ck_strdup(optarg);
         break;
 
       case 'e':
@@ -1000,6 +912,7 @@ int main(int argc, char **argv_orig, char **envp) {
         if (!strcmp(optarg, "none")) {
 
           mem_limit = 0;
+          fsrv.mem_limit = 0;
           break;
 
         }
@@ -1038,6 +951,8 @@ int main(int argc, char **argv_orig, char **envp) {
 
         }
 
+        fsrv.mem_limit = mem_limit;
+
       }
 
       break;
@@ -1057,6 +972,8 @@ int main(int argc, char **argv_orig, char **envp) {
 
         }
 
+        fsrv.exec_tmout = exec_tmout;
+
         break;
 
       case 'O':                                               /* FRIDA mode */
@@ -1064,6 +981,7 @@ int main(int argc, char **argv_orig, char **envp) {
         if (frida_mode) { FATAL("Multiple -O options not supported"); }
 
         frida_mode = 1;
+        fsrv.frida_mode = frida_mode;
 
         break;
 
@@ -1073,6 +991,8 @@ int main(int argc, char **argv_orig, char **envp) {
         if (!mem_limit_given) { mem_limit = MEM_LIMIT_QEMU; }
 
         qemu_mode = 1;
+        fsrv.mem_limit = mem_limit;
+        fsrv.qemu_mode = qemu_mode;
         break;
 
       case 'U':
@@ -1081,6 +1001,7 @@ int main(int argc, char **argv_orig, char **envp) {
         if (!mem_limit_given) { mem_limit = MEM_LIMIT_UNICORN; }
 
         unicorn_mode = 1;
+        fsrv.mem_limit = mem_limit;
         break;
 
       case 'W':                                           /* Wine+QEMU mode */
@@ -1090,6 +1011,8 @@ int main(int argc, char **argv_orig, char **envp) {
         use_wine = 1;
 
         if (!mem_limit_given) { mem_limit = 0; }
+        fsrv.qemu_mode = qemu_mode;
+        fsrv.mem_limit = mem_limit;
 
         break;
 
@@ -1108,6 +1031,7 @@ int main(int argc, char **argv_orig, char **envp) {
   if (optind == argc || !in_file) { usage(argv[0]); }
 
   map_size = get_map_size();
+  fsrv.map_size = map_size;
 
   use_hex_offsets = !!get_afl_env("AFL_ANALYZE_HEX");
 
@@ -1117,14 +1041,15 @@ int main(int argc, char **argv_orig, char **envp) {
 
   /* initialize cmplog_mode */
   shm.cmplog_mode = 0;
-  trace_bits = afl_shm_init(&shm, map_size, 0);
+
   atexit(at_exit_handler);
   setup_signal_handlers();
 
   set_up_environment(argv);
 
-  target_path = find_binary(argv[optind]);
-  detect_file_args(argv + optind, prog_in, &use_stdin);
+  fsrv.target_path = find_binary(argv[optind]);
+  fsrv.trace_bits = afl_shm_init(&shm, map_size, 0);
+  detect_file_args(argv + optind, fsrv.out_file, &use_stdin);
 
   if (qemu_mode) {
 
@@ -1148,14 +1073,31 @@ int main(int argc, char **argv_orig, char **envp) {
 
   SAYF("\n");
 
+  if (getenv("AFL_FORKSRV_INIT_TMOUT")) {
+
+    s32 forksrv_init_tmout = atoi(getenv("AFL_FORKSRV_INIT_TMOUT"));
+    if (forksrv_init_tmout < 1) {
+
+      FATAL("Bad value specified for AFL_FORKSRV_INIT_TMOUT");
+
+    }
+
+    fsrv.init_tmout = (u32)forksrv_init_tmout;
+
+  }
+
+  fsrv.kill_signal =
+      parse_afl_kill_signal_env(getenv("AFL_KILL_SIGNAL"), SIGKILL);
+
   read_initial_file();
 
   ACTF("Performing dry run (mem limit = %llu MB, timeout = %u ms%s)...",
        mem_limit, exec_tmout, edges_only ? ", edges only" : "");
 
-  analyze_run_target(use_argv, in_data, in_len, 1);
+  afl_fsrv_start(&fsrv, use_argv, &stop_soon, false);
+  analyze_run_target(in_data, in_len, 1);
 
-  if (child_timed_out) {
+  if (fsrv.last_run_timed_out) {
 
     FATAL("Target binary times out (adjusting -t may help).");
 
@@ -1167,13 +1109,15 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
-  analyze(use_argv);
+  analyze();
 
   OKF("We're done here. Have a nice day!\n");
 
-  if (target_path) { ck_free(target_path); }
-
   afl_shm_deinit(&shm);
+  afl_fsrv_deinit(&fsrv);
+  if (fsrv.target_path) { ck_free(fsrv.target_path); }
+  if (in_data) { ck_free(in_data); }
+
 
   exit(0);
 
diff --git a/src/afl-cc.c b/src/afl-cc.c
index 486f7468..980e5d86 100644
--- a/src/afl-cc.c
+++ b/src/afl-cc.c
@@ -315,7 +315,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
   u8 fortify_set = 0, asan_set = 0, x_set = 0, bit_mode = 0, shared_linking = 0,
      preprocessor_only = 0, have_unroll = 0, have_o = 0, have_pic = 0,
-     have_c = 0;
+     have_c = 0, partial_linking = 0;
 
   cc_params = ck_alloc((argc + 128) * sizeof(u8 *));
 
@@ -767,6 +767,8 @@ static void edit_params(u32 argc, char **argv, char **envp) {
     if (!strcmp(cur, "-x")) x_set = 1;
     if (!strcmp(cur, "-E")) preprocessor_only = 1;
     if (!strcmp(cur, "-shared")) shared_linking = 1;
+    if (!strcmp(cur, "-Wl,-r")) partial_linking = 1;
+    if (!strcmp(cur, "-Wl,-i")) partial_linking = 1;
     if (!strcmp(cur, "-c")) have_c = 1;
 
     if (!strncmp(cur, "-O", 2)) have_o = 1;
@@ -996,7 +998,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
     switch (bit_mode) {
 
       case 0:
-        if (!shared_linking)
+        if (!shared_linking && !partial_linking)
           cc_params[cc_par_cnt++] =
               alloc_printf("%s/afl-compiler-rt.o", obj_path);
         if (lto_mode)
@@ -1005,7 +1007,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
         break;
 
       case 32:
-        if (!shared_linking) {
+        if (!shared_linking && !partial_linking) {
 
           cc_params[cc_par_cnt++] =
               alloc_printf("%s/afl-compiler-rt-32.o", obj_path);
@@ -1026,7 +1028,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
         break;
 
       case 64:
-        if (!shared_linking) {
+        if (!shared_linking && !partial_linking) {
 
           cc_params[cc_par_cnt++] =
               alloc_printf("%s/afl-compiler-rt-64.o", obj_path);
@@ -1049,7 +1051,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
     }
 
   #if !defined(__APPLE__) && !defined(__sun)
-    if (!shared_linking)
+    if (!shared_linking && !partial_linking)
       cc_params[cc_par_cnt++] =
           alloc_printf("-Wl,--dynamic-list=%s/dynamic_list.txt", obj_path);
   #endif
diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c
index 88b5bc02..872e3a32 100644
--- a/src/afl-fuzz-init.c
+++ b/src/afl-fuzz-init.c
@@ -480,13 +480,22 @@ void read_foreign_testcases(afl_state_t *afl, int first) {
 
   for (iter = 0; iter < afl->foreign_sync_cnt; iter++) {
 
-    if (afl->foreign_syncs[iter].dir != NULL &&
-        afl->foreign_syncs[iter].dir[0] != 0) {
+    if (afl->foreign_syncs[iter].dir && afl->foreign_syncs[iter].dir[0]) {
 
       if (first) ACTF("Scanning '%s'...", afl->foreign_syncs[iter].dir);
       time_t mtime_max = 0;
-      u8 *   name = strrchr(afl->foreign_syncs[iter].dir, '/');
-      if (!name) { name = afl->foreign_syncs[iter].dir; }
+
+      u8 *name = strrchr(afl->foreign_syncs[iter].dir, '/');
+      if (!name) {
+
+        name = afl->foreign_syncs[iter].dir;
+
+      } else {
+
+        ++name;
+
+      }
+
       if (!strcmp(name, "queue") || !strcmp(name, "out") ||
           !strcmp(name, "default")) {
 
diff --git a/src/afl-fuzz-stats.c b/src/afl-fuzz-stats.c
index 4884b942..9648d795 100644
--- a/src/afl-fuzz-stats.c
+++ b/src/afl-fuzz-stats.c
@@ -766,9 +766,9 @@ void show_stats(afl_state_t *afl) {
                 "   uniq hangs : " cRST "%-6s" bSTG         bV "\n",
        time_tmp, tmp);
 
-  SAYF(bVR bH bSTOP                                          cCYA
-       " cycle progress " bSTG bH10 bH5 bH2 bH2 bHB bH bSTOP cCYA
-       " map coverage " bSTG bH bHT bH20 bH2                 bVL "\n");
+  SAYF(bVR bH bSTOP                                              cCYA
+       " cycle progress " bSTG bH10 bH5 bH2 bH2 bH2 bHB bH bSTOP cCYA
+       " map coverage" bSTG bHT bH20 bH2                         bVL "\n");
 
   /* This gets funny because we want to print several variable-length variables
      together, but then cram them into a fixed-width field - so we need to
@@ -778,13 +778,13 @@ void show_stats(afl_state_t *afl) {
           afl->queue_cur->favored ? "." : "*", afl->queue_cur->fuzz_level,
           ((double)afl->current_entry * 100) / afl->queued_paths);
 
-  SAYF(bV bSTOP "  now processing : " cRST "%-16s " bSTG bV bSTOP, tmp);
+  SAYF(bV bSTOP "  now processing : " cRST "%-18s " bSTG bV bSTOP, tmp);
 
   sprintf(tmp, "%0.02f%% / %0.02f%%",
           ((double)afl->queue_cur->bitmap_size) * 100 / afl->fsrv.map_size,
           t_byte_ratio);
 
-  SAYF("    map density : %s%-21s" bSTG bV "\n",
+  SAYF("    map density : %s%-19s" bSTG bV "\n",
        t_byte_ratio > 70
            ? cLRD
            : ((t_bytes < 200 && !afl->non_instrumented_mode) ? cPIN : cRST),
@@ -793,23 +793,23 @@ void show_stats(afl_state_t *afl) {
   sprintf(tmp, "%s (%0.02f%%)", u_stringify_int(IB(0), afl->cur_skipped_paths),
           ((double)afl->cur_skipped_paths * 100) / afl->queued_paths);
 
-  SAYF(bV bSTOP " paths timed out : " cRST "%-16s " bSTG bV, tmp);
+  SAYF(bV bSTOP " paths timed out : " cRST "%-18s " bSTG bV, tmp);
 
   sprintf(tmp, "%0.02f bits/tuple", t_bytes ? (((double)t_bits) / t_bytes) : 0);
 
-  SAYF(bSTOP " count coverage : " cRST "%-21s" bSTG bV "\n", tmp);
+  SAYF(bSTOP " count coverage : " cRST "%-19s" bSTG bV "\n", tmp);
 
-  SAYF(bVR bH bSTOP                                         cCYA
-       " stage progress " bSTG bH10 bH5 bH2 bH2 bX bH bSTOP cCYA
-       " findings in depth " bSTG bH10 bH5 bH2 bH2          bVL "\n");
+  SAYF(bVR bH bSTOP                                             cCYA
+       " stage progress " bSTG bH10 bH5 bH2 bH2 bH2 bX bH bSTOP cCYA
+       " findings in depth " bSTG bH10 bH5 bH2                  bVL "\n");
 
   sprintf(tmp, "%s (%0.02f%%)", u_stringify_int(IB(0), afl->queued_favored),
           ((double)afl->queued_favored) * 100 / afl->queued_paths);
 
   /* Yeah... it's still going on... halp? */
 
-  SAYF(bV bSTOP "  now trying : " cRST "%-20s " bSTG bV bSTOP
-                " favored paths : " cRST "%-22s" bSTG   bV "\n",
+  SAYF(bV bSTOP "  now trying : " cRST "%-22s " bSTG bV bSTOP
+                " favored paths : " cRST "%-20s" bSTG   bV "\n",
        afl->stage_name, tmp);
 
   if (!afl->stage_max) {
@@ -824,12 +824,12 @@ void show_stats(afl_state_t *afl) {
 
   }
 
-  SAYF(bV bSTOP " stage execs : " cRST "%-21s" bSTG bV bSTOP, tmp);
+  SAYF(bV bSTOP " stage execs : " cRST "%-23s" bSTG bV bSTOP, tmp);
 
   sprintf(tmp, "%s (%0.02f%%)", u_stringify_int(IB(0), afl->queued_with_cov),
           ((double)afl->queued_with_cov) * 100 / afl->queued_paths);
 
-  SAYF("  new edges on : " cRST "%-22s" bSTG bV "\n", tmp);
+  SAYF("  new edges on : " cRST "%-20s" bSTG bV "\n", tmp);
 
   sprintf(tmp, "%s (%s%s unique)", u_stringify_int(IB(0), afl->total_crashes),
           u_stringify_int(IB(1), afl->unique_crashes),
@@ -837,14 +837,14 @@ void show_stats(afl_state_t *afl) {
 
   if (afl->crash_mode) {
 
-    SAYF(bV bSTOP " total execs : " cRST "%-20s " bSTG bV bSTOP
-                  "   new crashes : %s%-22s" bSTG         bV "\n",
+    SAYF(bV bSTOP " total execs : " cRST "%-22s " bSTG bV bSTOP
+                  "   new crashes : %s%-20s" bSTG         bV "\n",
          u_stringify_int(IB(0), afl->fsrv.total_execs), crash_color, tmp);
 
   } else {
 
-    SAYF(bV bSTOP " total execs : " cRST "%-20s " bSTG bV bSTOP
-                  " total crashes : %s%-22s" bSTG         bV "\n",
+    SAYF(bV bSTOP " total execs : " cRST "%-22s " bSTG bV bSTOP
+                  " total crashes : %s%-20s" bSTG         bV "\n",
          u_stringify_int(IB(0), afl->fsrv.total_execs), crash_color, tmp);
 
   }
@@ -856,12 +856,12 @@ void show_stats(afl_state_t *afl) {
     sprintf(tmp, "%s/sec (%s)", u_stringify_float(IB(0), afl->stats_avg_exec),
             afl->stats_avg_exec < 20 ? "zzzz..." : "slow!");
 
-    SAYF(bV bSTOP "  exec speed : " cLRD "%-20s ", tmp);
+    SAYF(bV bSTOP "  exec speed : " cLRD "%-22s ", tmp);
 
   } else {
 
     sprintf(tmp, "%s/sec", u_stringify_float(IB(0), afl->stats_avg_exec));
-    SAYF(bV bSTOP "  exec speed : " cRST "%-20s ", tmp);
+    SAYF(bV bSTOP "  exec speed : " cRST "%-22s ", tmp);
 
   }
 
@@ -869,13 +869,12 @@ void show_stats(afl_state_t *afl) {
           u_stringify_int(IB(1), afl->unique_tmouts),
           (afl->unique_hangs >= KEEP_UNIQUE_HANG) ? "+" : "");
 
-  SAYF(bSTG bV bSTOP "  total tmouts : " cRST "%-22s" bSTG bV "\n", tmp);
+  SAYF(bSTG bV bSTOP "  total tmouts : " cRST "%-20s" bSTG bV "\n", tmp);
 
   /* Aaaalmost there... hold on! */
 
-  SAYF(bVR bH cCYA                                                     bSTOP
-       " fuzzing strategy yields " bSTG bH10 bHT bH10 bH5 bHB bH bSTOP cCYA
-       " path geometry " bSTG bH5 bH2 bVL "\n");
+  SAYF(bVR bH cCYA bSTOP " fuzzing strategy yields " bSTG bH10 bH2 bHT bH10 bH2
+           bH bHB bH bSTOP cCYA " path geometry " bSTG bH5 bH2 bVL "\n");
 
   if (unlikely(afl->custom_only)) {
 
@@ -1017,9 +1016,10 @@ void show_stats(afl_state_t *afl) {
   if (unlikely(afl->afl_env.afl_custom_mutator_library)) {
 
     strcat(tmp, " ");
-    strcat(tmp, u_stringify_int(IB(2), afl->stage_finds[STAGE_PYTHON]));
+    strcat(tmp, u_stringify_int(IB(2), afl->stage_finds[STAGE_CUSTOM_MUTATOR]));
     strcat(tmp, "/");
-    strcat(tmp, u_stringify_int(IB(3), afl->stage_cycles[STAGE_PYTHON]));
+    strcat(tmp,
+           u_stringify_int(IB(3), afl->stage_cycles[STAGE_CUSTOM_MUTATOR]));
     strcat(tmp, ",");
 
   } else {
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index 196547f4..9a3780fb 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -575,7 +575,6 @@ int main(int argc, char **argv_orig, char **envp) {
         }
 
         afl->sync_id = ck_strdup(optarg);
-        afl->skip_deterministic = 0;  // force deterministic fuzzing
         afl->old_seed_selection = 1;  // force old queue walking seed selection
         afl->disable_trim = 1;        // disable trimming
 
@@ -1206,6 +1205,8 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
+  setenv("__AFL_OUT_DIR", afl->out_dir, 1);
+
   if (get_afl_env("AFL_DISABLE_TRIM")) { afl->disable_trim = 1; }
 
   if (getenv("AFL_NO_UI") && getenv("AFL_FORCE_UI")) {
diff --git a/test/test-llvm.sh b/test/test-llvm.sh
index 1152cc4e..7cdc83cb 100755
--- a/test/test-llvm.sh
+++ b/test/test-llvm.sh
@@ -4,14 +4,6 @@
 
 $ECHO "$BLUE[*] Testing: llvm_mode, afl-showmap, afl-fuzz, afl-cmin and afl-tmin"
 test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
-  # on FreeBSD need to set AFL_CC
-  test `uname -s` = 'FreeBSD' && {
-    if type clang >/dev/null; then
-      export AFL_CC=`command -v clang`
-    else
-      export AFL_CC=`$LLVM_CONFIG --bindir`/clang
-    fi
-  }
   ../afl-clang-fast -o test-instr.plain ../test-instr.c > /dev/null 2>&1
   AFL_HARDEN=1 ../afl-clang-fast -o test-compcov.harden test-compcov.c > /dev/null 2>&1
   test -e test-instr.plain && {