113 files changed, 7038 insertions, 5261 deletions
diff --git a/.gitignore b/.gitignore
index 6bb2092a..c8a92b7d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ afl-gcc
 afl-gcc-fast
 afl-g++-fast
 afl-gotcpu
+afl-ld
 afl-qemu-trace
 afl-showmap
 afl-tmin
@@ -39,8 +40,12 @@ afl-tmin.8
 afl-whatsup.8
 qemu_mode/libcompcov/compcovtest
 as
+ld
 qemu_mode/qemu-*
 unicorn_mode/unicornafl/
 unicorn_mode/samples/*/\.test-*
 unicorn_mode/samples/*/output/
 core\.*
+test/unittests/unit_maybe_alloc
+test/unittests/unit_preallocable
+test/unittests/unit_list
diff --git a/.travis.yml b/.travis.yml
index 278f312c..e6c6affa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,6 +5,7 @@ sudo: required
 branches:
   only:
     - master
+    - dev
 
 matrix:
   include:
@@ -28,6 +29,7 @@ matrix:
 jobs:
   allow_failures:
     - os: osx
+    - arch: arm64
 
 env:
   - AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1 AFL_NO_UI=1 AFL_STOP_MANUALLY=1
@@ -39,8 +41,8 @@ before_install:
   # export LLVM_DIR=${TRAVIS_BUILD_DIR}/${LLVM_PACKAGE}
   - echo Testing on $NAME
   - if [ "$TRAVIS_OS_NAME" = "osx" ]; then wget "$LINK""$NAME".tar.xz ; export LLVM_CONFIG=`pwd`/"$NAME" ; tar xJf "$NAME".tar.xz ; fi
-  - if [ "$MODERN" = "yes" ]; then sudo apt update ; sudo apt upgrade ; sudo apt install -y libtool libtool-bin automake bison libglib2.0 build-essential clang gcc-"$GCC" gcc-"$GCC"-plugin-dev libc++-"$GCC"-dev findutils ; fi
-  - if [ "$MODERN" = "no" ]; then sudo apt update ; sudo apt install -y libtool $EXTRA libpixman-1-dev automake bison libglib2.0 build-essential gcc-"$GCC" gcc-"$GCC"-plugin-dev libc++-dev findutils ; fi
+  - if [ "$MODERN" = "yes" ]; then sudo apt update ; sudo apt upgrade ; sudo apt install -y libtool libtool-bin automake bison libglib2.0 build-essential clang gcc-"$GCC" gcc-"$GCC"-plugin-dev libc++-"$GCC"-dev findutils libcmocka-dev ; fi
+  - if [ "$MODERN" = "no" ]; then sudo apt update ; sudo apt install -y libtool $EXTRA libpixman-1-dev automake bison libglib2.0 build-essential gcc-"$GCC" gcc-"$GCC"-plugin-dev libc++-dev findutils libcmocka-dev ; fi
 
 script:
   - gcc -v
diff --git a/GNUmakefile b/GNUmakefile
new file mode 100644
index 00000000..a3ac2e06
--- /dev/null
+++ b/GNUmakefile
@@ -0,0 +1,484 @@
+#
+# american fuzzy lop++ - makefile
+# -----------------------------
+#
+# Originally written by Michal Zalewski
+#
+# Copyright 2013, 2014, 2015, 2016, 2017 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+
+# For Heiko:
+#TEST_MMAP=1
+# the hash character is treated differently in different make versions
+# so use a variable for '#'
+HASH=\#
+
+PREFIX     ?= /usr/local
+BIN_PATH    = $(PREFIX)/bin
+HELPER_PATH = $(PREFIX)/lib/afl
+DOC_PATH    = $(PREFIX)/share/doc/afl
+MISC_PATH   = $(PREFIX)/share/afl
+MAN_PATH    = $(PREFIX)/man/man8
+
+PROGNAME    = afl
+VERSION     = $(shell grep '^$(HASH)define VERSION ' ../config.h | cut -d '"' -f2)
+
+# PROGS intentionally omit afl-as, which gets installed elsewhere.
+
+PROGS       = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze
+SH_PROGS    = afl-plot afl-cmin afl-cmin.bash afl-whatsup afl-system-config
+MANPAGES=$(foreach p, $(PROGS) $(SH_PROGS), $(p).8) afl-as.8
+ASAN_OPTIONS=detect_leaks=0
+
+ifeq "$(findstring android, $(shell $(CC) --version 2>/dev/null))" ""
+ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+	CFLAGS_FLTO ?= -flto=full
+else
+ ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -flto=thin -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+	CFLAGS_FLTO ?= -flto=thin
+ else
+  ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -flto -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+	CFLAGS_FLTO ?= -flto
+  endif
+ endif
+endif
+endif
+
+ifneq "$(shell uname)" "Darwin"
+ ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+	CFLAGS_OPT = -march=native
+ endif
+ # OS X does not like _FORTIFY_SOURCE=2
+ CFLAGS_OPT += -D_FORTIFY_SOURCE=2
+endif
+
+ifneq "$(shell uname -m)" "x86_64"
+ ifneq "$(shell uname -m)" "i386"
+  ifneq "$(shell uname -m)" "amd64"
+   ifneq "$(shell uname -m)" "i86pc"
+	AFL_NO_X86=1
+   endif
+  endif
+ endif
+endif
+
+CFLAGS     ?= -O3 -funroll-loops $(CFLAGS_OPT)
+override CFLAGS += -Wall -g -Wno-pointer-sign \
+			  -I include/ -Werror -DAFL_PATH=\"$(HELPER_PATH)\" \
+			  -DBIN_PATH=\"$(BIN_PATH)\" -DDOC_PATH=\"$(DOC_PATH)\"
+
+AFL_FUZZ_FILES = $(wildcard src/afl-fuzz*.c)
+
+ifneq "$(shell command -v python3m 2>/dev/null)" ""
+  ifneq "$(shell command -v python3m-config 2>/dev/null)" ""
+    PYTHON_INCLUDE  ?= $(shell python3m-config --includes)
+    PYTHON_VERSION  ?= $(strip $(shell python3m --version 2>&1))
+    # Starting with python3.8, we need to pass the `embed` flag. Earier versions didn't know this flag.
+    ifeq "$(shell python3m-config --embed --libs 2>/dev/null | grep -q lpython && echo 1 )" "1"
+      PYTHON_LIB      ?= $(shell python3m-config --libs --embed --ldflags)
+    else
+      PYTHON_LIB      ?= $(shell python3m-config --ldflags)
+    endif
+  endif
+endif
+
+ifneq "$(shell command -v python3 2>/dev/null)" ""
+  ifneq "$(shell command -v python3-config 2>/dev/null)" ""
+    PYTHON_INCLUDE  ?= $(shell python3-config --includes)
+    PYTHON_VERSION  ?= $(strip $(shell python3 --version 2>&1))
+    # Starting with python3.8, we need to pass the `embed` flag. Earier versions didn't know this flag.
+    ifeq "$(shell python3-config --embed --libs 2>/dev/null | grep -q lpython && echo 1 )" "1"
+      PYTHON_LIB      ?= $(shell python3-config --libs --embed --ldflags)
+    else
+      PYTHON_LIB      ?= $(shell python3-config --ldflags)
+    endif
+  endif
+endif
+
+ifneq "$(shell command -v python 2>/dev/null)" ""
+  ifneq "$(shell command -v python-config 2>/dev/null)" ""
+    PYTHON_INCLUDE  ?= $(shell python-config --includes)
+    PYTHON_LIB      ?= $(shell python-config --ldflags)
+    PYTHON_VERSION  ?= $(strip $(shell python --version 2>&1))
+  endif
+endif
+
+ifdef SOURCE_DATE_EPOCH
+    BUILD_DATE ?= $(shell date -u -d "@$(SOURCE_DATE_EPOCH)" "+%Y-%m-%d" 2>/dev/null || date -u -r "$(SOURCE_DATE_EPOCH)" "+%Y-%m-%d" 2>/dev/null || date -u "+%Y-%m-%d")
+else
+    BUILD_DATE ?= $(shell date "+%Y-%m-%d")
+endif
+
+ifneq "$(filter Linux GNU%,$(shell uname))" ""
+  LDFLAGS  += -ldl
+endif
+
+ifneq "$(findstring FreeBSD, $(shell uname))" ""
+  CFLAGS += -pthread
+  LDFLAGS  += -lpthread
+endif
+
+ifneq "$(findstring NetBSD, $(shell uname))" ""
+  CFLAGS += -pthread
+  LDFLAGS  += -lpthread
+endif
+
+ifeq "$(findstring clang, $(shell $(CC) --version 2>/dev/null))" ""
+  TEST_CC   = afl-gcc
+else
+  TEST_CC   = afl-clang
+endif
+
+COMM_HDR    = include/alloc-inl.h include/config.h include/debug.h include/types.h
+
+ifeq "$(shell echo '$(HASH)include <Python.h>@int main() {return 0; }' | tr @ '\n' | $(CC) $(CFLAGS) -x c - -o .test $(PYTHON_INCLUDE) $(LDFLAGS) $(PYTHON_LIB) 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+	PYTHON_OK=1
+	PYFLAGS=-DUSE_PYTHON $(PYTHON_INCLUDE) $(LDFLAGS) $(PYTHON_LIB) -DPYTHON_VERSION="\"$(PYTHON_VERSION)\""
+else
+	PYTHON_OK=0
+	PYFLAGS=
+endif
+
+ifdef STATIC
+  $(info Compiling static version of binaries)
+  # Disable python for static compilation to simplify things
+  PYTHON_OK=0
+  PYFLAGS=
+
+  CFLAGS += -static
+  LDFLAGS += -lm -lpthread -lz -lutil
+endif
+
+ASAN_CFLAGS=-fsanitize=address -fstack-protector-all -fno-omit-frame-pointer
+ASAN_LDFLAGS+=-fsanitize=address -fstack-protector-all -fno-omit-frame-pointer
+
+ifdef ASAN_BUILD
+  $(info Compiling ASAN version of binaries)
+  CFLAGS+=$(ASAN_CFLAGS)
+  LDFLAGS+=$(ASAN_LDFLAGS)
+endif
+
+ifdef PROFILING
+  $(info Compiling profiling version of binaries)
+  CFLAGS+=-pg
+  LDFLAGS+=-pg
+endif
+
+ifeq "$(shell echo '$(HASH)include <sys/ipc.h>@$(HASH)include <sys/shm.h>@int main() { int _id = shmget(IPC_PRIVATE, 65536, IPC_CREAT | IPC_EXCL | 0600); shmctl(_id, IPC_RMID, 0); return 0;}' | tr @ '\n' | $(CC) $(CFLAGS) -x c - -o .test2 2>/dev/null && echo 1 || echo 0 ; rm -f .test2 )" "1"
+	SHMAT_OK=1
+else
+	SHMAT_OK=0
+	CFLAGS+=-DUSEMMAP=1
+	LDFLAGS+=-Wno-deprecated-declarations
+endif
+
+ifeq "$(TEST_MMAP)" "1"
+	SHMAT_OK=0
+	CFLAGS+=-DUSEMMAP=1
+	LDFLAGS+=-Wno-deprecated-declarations
+endif
+
+all:	test_x86 test_shm test_python ready $(PROGS) afl-as test_build all_done
+
+man:    $(MANPAGES)
+
+tests:	source-only
+	@cd test ; ./test.sh
+	@rm -f test/errors
+
+performance-tests:	performance-test
+test-performance:	performance-test
+
+performance-test:	source-only
+	@cd test ; ./test-performance.sh
+
+
+# hint: make targets are also listed in the top level README.md
+help:
+	@echo "HELP --- the following make targets exist:"
+	@echo "=========================================="
+	@echo "all: just the main afl++ binaries"
+	@echo "binary-only: everything for binary-only fuzzing: qemu_mode, unicorn_mode, libdislocator, libtokencap, radamsa"
+	@echo "source-only: everything for source code fuzzing: llvm_mode, gcc_plugin, libdislocator, libtokencap, radamsa"
+	@echo "distrib: everything (for both binary-only and source code fuzzing)"
+	@echo "man: creates simple man pages from the help option of the programs"
+	@echo "install: installs everything you have compiled with the build option above"
+	@echo "clean: cleans everything. for qemu_mode it means it deletes all downloads as well"
+	@echo "code-format: format the code, do this before you commit and send a PR please!"
+	@echo "tests: this runs the test framework. It is more catered for the developers, but if you run into problems this helps pinpointing the problem"
+	@echo "unit: perform unit tests (based on cmocka)"
+	@echo "document: creates afl-fuzz-document which will only do one run and save all manipulated inputs into out/queue/mutations"
+	@echo "help: shows these build options :-)"
+	@echo "=========================================="
+	@echo "Recommended: \"distrib\" or \"source-only\", then \"install\""
+	@echo
+	@echo Known build environment options:
+	@echo "=========================================="
+	@echo STATIC - compile AFL++ static
+	@echo ASAN_BUILD - compiles with memory sanitizer for debug purposes
+	@echo PROFILING - compile afl-fuzz with profiling information
+	@echo AFL_NO_X86 - if compiling on non-intel/amd platforms
+	@echo "=========================================="
+	@echo e.g.: make ASAN_BUILD=1
+
+ifndef AFL_NO_X86
+
+test_x86:
+	@echo "[*] Checking for the default compiler cc..."
+	@type $(CC) >/dev/null || ( echo; echo "Oops, looks like there is no compiler '"$(CC)"' in your path."; echo; echo "Don't panic! You can restart with '"$(_)" CC=<yourCcompiler>'."; echo; exit 1 )
+	@echo "[*] Checking for the ability to compile x86 code..."
+	@echo 'main() { __asm__("xorb %al, %al"); }' | $(CC) $(CFLAGS) -w -x c - -o .test1 || ( echo; echo "Oops, looks like your compiler can't generate x86 code."; echo; echo "Don't panic! You can use the LLVM or QEMU mode, but see docs/INSTALL first."; echo "(To ignore this error, set AFL_NO_X86=1 and try again.)"; echo; exit 1 )
+	@rm -f .test1
+
+else
+
+test_x86:
+	@echo "[!] Note: skipping x86 compilation checks (AFL_NO_X86 set)."
+
+endif
+
+
+ifeq "$(SHMAT_OK)" "1"
+
+test_shm:
+	@echo "[+] shmat seems to be working."
+	@rm -f .test2
+
+else
+
+test_shm:
+	@echo "[-] shmat seems not to be working, switching to mmap implementation"
+
+endif
+
+
+ifeq "$(PYTHON_OK)" "1"
+
+test_python:
+	@rm -f .test 2> /dev/null
+	@echo "[+] $(PYTHON_VERSION) support seems to be working."
+
+else
+
+test_python:
+	@echo "[-] You seem to need to install the package python3-dev or python2-dev (and perhaps python[23]-apt), but it is optional so we continue"
+
+endif
+
+
+ready:
+	@echo "[+] Everything seems to be working, ready to compile."
+
+afl-gcc: src/afl-gcc.c $(COMM_HDR) | test_x86
+	$(CC) $(CFLAGS) src/$@.c -o $@ $(LDFLAGS)
+	set -e; for i in afl-g++ afl-clang afl-clang++; do ln -sf afl-gcc $$i; done
+
+afl-as: src/afl-as.c include/afl-as.h $(COMM_HDR) | test_x86
+	$(CC) $(CFLAGS) src/$@.c -o $@ $(LDFLAGS)
+	ln -sf afl-as as
+
+src/afl-common.o : $(COMM_HDR) src/afl-common.c include/common.h
+	$(CC) $(CFLAGS) $(CFLAGS_FLTO) -c src/afl-common.c -o src/afl-common.o
+
+src/afl-forkserver.o : $(COMM_HDR) src/afl-forkserver.c include/forkserver.h
+	$(CC) $(CFLAGS) $(CFLAGS_FLTO) -c src/afl-forkserver.c -o src/afl-forkserver.o
+
+src/afl-sharedmem.o : $(COMM_HDR) src/afl-sharedmem.c include/sharedmem.h
+	$(CC) $(CFLAGS) $(CFLAGS_FLTO) -c src/afl-sharedmem.c -o src/afl-sharedmem.o
+
+radamsa: src/third_party/libradamsa/libradamsa.so
+	cp src/third_party/libradamsa/libradamsa.so .
+
+src/third_party/libradamsa/libradamsa.so: src/third_party/libradamsa/libradamsa.c src/third_party/libradamsa/radamsa.h
+	$(MAKE) -C src/third_party/libradamsa/ CFLAGS="$(CFLAGS)"
+
+afl-fuzz: $(COMM_HDR) include/afl-fuzz.h $(AFL_FUZZ_FILES) src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o | test_x86
+	$(CC) $(CFLAGS) $(CFLAGS_FLTO) $(AFL_FUZZ_FILES) src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o -o $@ $(PYFLAGS) $(LDFLAGS)
+
+afl-showmap: src/afl-showmap.c src/afl-common.o src/afl-sharedmem.o $(COMM_HDR) | test_x86
+	$(CC) $(CFLAGS) $(CFLAGS_FLTO) src/$@.c src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o -o $@ $(LDFLAGS)
+
+afl-tmin: src/afl-tmin.c src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o $(COMM_HDR) | test_x86
+	$(CC) $(CFLAGS) $(CFLAGS_FLTO) src/$@.c src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o -o $@ $(LDFLAGS)
+
+afl-analyze: src/afl-analyze.c src/afl-common.o src/afl-sharedmem.o $(COMM_HDR) | test_x86
+	$(CC) $(CFLAGS) $(CFLAGS_FLTO) src/$@.c src/afl-common.o src/afl-sharedmem.o -o $@ $(LDFLAGS)
+
+afl-gotcpu: src/afl-gotcpu.c src/afl-common.o $(COMM_HDR) | test_x86
+	$(CC) $(CFLAGS) src/$@.c src/afl-common.o -o $@ $(LDFLAGS)
+
+
+# document all mutations and only do one run (use with only one input file!)
+document: $(COMM_HDR) include/afl-fuzz.h $(AFL_FUZZ_FILES) src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o | test_x86
+	$(CC) -D_AFL_DOCUMENT_MUTATIONS $(CFLAGS) $(CFLAGS_FLTO) $(AFL_FUZZ_FILES) src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o -o afl-fuzz-document $(PYFLAGS) $(LDFLAGS)
+
+test/unittests/unit_maybe_alloc.o : $(COMM_HDR) include/alloc-inl.h test/unittests/unit_maybe_alloc.c $(AFL_FUZZ_FILES)
+	$(CC) $(CFLAGS) $(ASAN_CFLAGS) -c test/unittests/unit_maybe_alloc.c -o test/unittests/unit_maybe_alloc.o
+
+test/unittests/unit_preallocable.o : $(COMM_HDR) include/alloc-inl.h test/unittests/unit_preallocable.c $(AFL_FUZZ_FILES)
+	$(CC) $(CFLAGS) $(ASAN_CFLAGS) -c test/unittests/unit_preallocable.c -o test/unittests/unit_preallocable.o
+
+unit_maybe_alloc: test/unittests/unit_maybe_alloc.o
+	$(CC) $(CFLAGS) -Wl,--wrap=exit -Wl,--wrap=printf test/unittests/unit_maybe_alloc.o -o test/unittests/unit_maybe_alloc $(LDFLAGS) $(ASAN_LDFLAGS) -lcmocka
+	./test/unittests/unit_maybe_alloc
+
+test/unittests/unit_list.o : $(COMM_HDR) include/list.h test/unittests/unit_list.c $(AFL_FUZZ_FILES)
+	$(CC) $(CFLAGS) $(ASAN_CFLAGS) -c test/unittests/unit_list.c -o test/unittests/unit_list.o
+
+unit_list: test/unittests/unit_list.o
+	$(CC) $(CFLAGS) $(ASAN_CFLAGS) -Wl,--wrap=exit -Wl,--wrap=printf test/unittests/unit_list.o -o test/unittests/unit_list  $(LDFLAGS) $(ASAN_LDFLAGS) -lcmocka
+	./test/unittests/unit_list
+
+test/unittests/preallocable.o : $(COMM_HDR) include/afl-prealloc.h test/unittests/preallocable.c $(AFL_FUZZ_FILES)
+	$(CC) $(CFLAGS) $(ASAN_CFLAGS) $(CFLAGS_FLTO) -c test/unittests/preallocable.c -o test/unittests/preallocable.o
+
+unit_preallocable: test/unittests/unit_preallocable.o
+	$(CC) $(CFLAGS) $(ASAN_CFLAGS) -Wl,--wrap=exit -Wl,--wrap=printf test/unittests/unit_preallocable.o -o test/unittests/unit_preallocable $(LDFLAGS) $(ASAN_LDFLAGS) -lcmocka
+	./test/unittests/unit_preallocable
+
+unit_clean:
+	@rm -f ./test/unittests/unit_preallocable ./test/unittests/unit_list ./test/unittests/unit_maybe_alloc test/unittests/*.o
+
+unit: unit_maybe_alloc unit_preallocable unit_list unit_clean
+
+code-format:
+	./.custom-format.py -i src/*.c
+	./.custom-format.py -i include/*.h
+	./.custom-format.py -i libdislocator/*.c
+	./.custom-format.py -i libtokencap/*.c
+	./.custom-format.py -i llvm_mode/*.c
+	./.custom-format.py -i llvm_mode/*.h
+	./.custom-format.py -i llvm_mode/*.cc
+	./.custom-format.py -i gcc_plugin/*.c
+	#./.custom-format.py -i gcc_plugin/*.h
+	./.custom-format.py -i gcc_plugin/*.cc
+	./.custom-format.py -i examples/*/*.c
+	./.custom-format.py -i examples/*/*.h
+	./.custom-format.py -i test/*.c
+	./.custom-format.py -i qemu_mode/patches/*.h
+	./.custom-format.py -i qemu_mode/libcompcov/*.c
+	./.custom-format.py -i qemu_mode/libcompcov/*.cc
+	./.custom-format.py -i qemu_mode/libcompcov/*.h
+	./.custom-format.py -i qbdi_mode/*.c
+	./.custom-format.py -i qbdi_mode/*.cpp
+	./.custom-format.py -i *.h
+	./.custom-format.py -i *.c
+
+
+ifndef AFL_NO_X86
+
+test_build: afl-gcc afl-as afl-showmap
+	@echo "[*] Testing the CC wrapper and instrumentation output..."
+	@unset AFL_USE_ASAN AFL_USE_MSAN AFL_CC; AFL_DEBUG=1 AFL_INST_RATIO=100 AFL_PATH=. ./$(TEST_CC) $(CFLAGS) test-instr.c -o test-instr $(LDFLAGS) 2>&1 | grep 'afl-as' >/dev/null || (echo "Oops, afl-as did not get called from "$(TEST_CC)". This is normally achieved by "$(CC)" honoring the -B option."; exit 1 )
+	ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
+	echo 1 | ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr1 ./test-instr
+	@rm -f test-instr
+	@cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation does not seem to be behaving correctly!"; echo; echo "Please post to https://github.com/AFLplusplus/AFLplusplus/issues to troubleshoot the issue."; echo; exit 1; fi
+	@echo "[+] All right, the instrumentation seems to be working!"
+
+else
+
+test_build: afl-gcc afl-as afl-showmap
+	@echo "[!] Note: skipping build tests (you may need to use LLVM or QEMU mode)."
+
+endif
+
+
+all_done: test_build
+	@if [ ! "`type clang 2>/dev/null`" = "" ]; then echo "[+] LLVM users: see llvm_mode/README.md for a faster alternative to afl-gcc."; fi
+	@echo "[+] All done! Be sure to review the README.md - it's pretty short and useful."
+	@if [ "`uname`" = "Darwin" ]; then printf "\nWARNING: Fuzzing on MacOS X is slow because of the unusually high overhead of\nfork() on this OS. Consider using Linux or *BSD. You can also use VirtualBox\n(virtualbox.org) to put AFL inside a Linux or *BSD VM.\n\n"; fi
+	@! tty <&1 >/dev/null || printf "\033[0;30mNOTE: If you can read this, your terminal probably uses white background.\nThis will make the UI hard to read. See docs/status_screen.md for advice.\033[0m\n" 2>/dev/null
+
+.NOTPARALLEL: clean
+
+clean:
+	rm -f $(PROGS) libradamsa.so afl-fuzz-document afl-as as afl-g++ afl-clang afl-clang++ *.o src/*.o *~ a.out core core.[1-9][0-9]* *.stackdump .test .test1 .test2 test-instr .test-instr0 .test-instr1 qemu_mode/qemu-3.1.1.tar.xz afl-qemu-trace afl-gcc-fast afl-gcc-pass.so afl-gcc-rt.o afl-g++-fast ld *.so *.8 test/unittests/*.o test/unittests/unit_maybe_alloc test/unittests/preallocable
+	rm -rf out_dir qemu_mode/qemu-3.1.1 *.dSYM */*.dSYM
+	-$(MAKE) -C llvm_mode clean
+	-$(MAKE) -C gcc_plugin clean
+	$(MAKE) -C libdislocator clean
+	$(MAKE) -C libtokencap clean
+	$(MAKE) -C examples/socket_fuzzing clean
+	$(MAKE) -C examples/argv_fuzzing clean
+	$(MAKE) -C qemu_mode/unsigaction clean
+	$(MAKE) -C qemu_mode/libcompcov clean
+	$(MAKE) -C src/third_party/libradamsa/ clean
+	-rm -rf unicorn_mode/unicornafl
+
+distrib: all radamsa
+	-$(MAKE) -C llvm_mode
+	-$(MAKE) -C gcc_plugin
+	$(MAKE) -C libdislocator
+	$(MAKE) -C libtokencap
+	$(MAKE) -C examples/socket_fuzzing
+	$(MAKE) -C examples/argv_fuzzing
+	cd qemu_mode && sh ./build_qemu_support.sh
+	cd unicorn_mode && sh ./build_unicorn_support.sh
+
+binary-only: all radamsa
+	$(MAKE) -C libdislocator
+	$(MAKE) -C libtokencap
+	$(MAKE) -C examples/socket_fuzzing
+	$(MAKE) -C examples/argv_fuzzing
+	cd qemu_mode && sh ./build_qemu_support.sh
+	cd unicorn_mode && sh ./build_unicorn_support.sh
+
+source-only: all radamsa
+	-$(MAKE) -C llvm_mode
+	-$(MAKE) -C gcc_plugin
+	$(MAKE) -C libdislocator
+	$(MAKE) -C libtokencap
+
+%.8:	%
+	@echo .TH $* 8 $(BUILD_DATE) "afl++" > $@
+	@echo .SH NAME >> $@
+	@echo .B $* >> $@
+	@echo >> $@
+	@echo .SH SYNOPSIS >> $@
+	@./$* -h 2>&1 | head -n 3 | tail -n 1 | sed 's/^\.\///' >> $@
+	@echo >> $@
+	@echo .SH OPTIONS >> $@
+	@echo .nf >> $@
+	@./$* -hh 2>&1 | tail -n +4 >> $@
+	@echo >> $@
+	@echo .SH AUTHOR >> $@
+	@echo "afl++ was written by Michal \"lcamtuf\" Zalewski and is maintained by Marc \"van Hauser\" Heuse <mh@mh-sec.de>, Heiko \"hexcoder-\" Eissfeldt <heiko.eissfeldt@hexco.de>, Andrea Fioraldi <andreafioraldi@gmail.com> and Dominik Maier <domenukk@gmail.com>" >> $@
+	@echo  The homepage of afl++ is: https://github.com/AFLplusplus/AFLplusplus >> $@
+	@echo >> $@
+	@echo .SH LICENSE >> $@
+	@echo Apache License Version 2.0, January 2004 >> $@
+
+install: all $(MANPAGES)
+	install -d -m 755 $${DESTDIR}$(BIN_PATH) $${DESTDIR}$(HELPER_PATH) $${DESTDIR}$(DOC_PATH) $${DESTDIR}$(MISC_PATH)
+	rm -f $${DESTDIR}$(BIN_PATH)/afl-plot.sh
+	install -m 755 $(PROGS) $(SH_PROGS) $${DESTDIR}$(BIN_PATH)
+	rm -f $${DESTDIR}$(BIN_PATH)/afl-as
+	if [ -f afl-qemu-trace ]; then install -m 755 afl-qemu-trace $${DESTDIR}$(BIN_PATH); fi
+	if [ -f afl-gcc-fast ]; then set e; install -m 755 afl-gcc-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-gcc-fast $${DESTDIR}$(BIN_PATH)/afl-g++-fast; install -m 755 afl-gcc-pass.so afl-gcc-rt.o $${DESTDIR}$(HELPER_PATH); fi
+	if [ -f afl-clang-fast ]; then $(MAKE) -C llvm_mode install; fi
+	if [ -f libdislocator.so ]; then set -e; install -m 755 libdislocator.so $${DESTDIR}$(HELPER_PATH); fi
+	if [ -f libtokencap.so ]; then set -e; install -m 755 libtokencap.so $${DESTDIR}$(HELPER_PATH); fi
+	if [ -f libcompcov.so ]; then set -e; install -m 755 libcompcov.so $${DESTDIR}$(HELPER_PATH); fi
+	if [ -f libradamsa.so ]; then set -e; install -m 755 libradamsa.so $${DESTDIR}$(HELPER_PATH); fi
+	if [ -f afl-fuzz-document ]; then set -e; install -m 755 afl-fuzz-document $${DESTDIR}$(BIN_PATH); fi
+	if [ -f socketfuzz32.so -o -f socketfuzz64.so ]; then $(MAKE) -C examples/socket_fuzzing install; fi
+	if [ -f argvfuzz32.so -o -f argvfuzz64.so ]; then $(MAKE) -C examples/argv_fuzzing install; fi
+
+	set -e; ln -sf afl-gcc $${DESTDIR}$(BIN_PATH)/afl-g++
+	set -e; if [ -f afl-clang-fast ] ; then ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang ; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang++ ; else ln -sf afl-gcc $${DESTDIR}$(BIN_PATH)/afl-clang ; ln -sf afl-gcc $${DESTDIR}$(BIN_PATH)/afl-clang++; fi
+
+	mkdir -m 0755 -p ${DESTDIR}$(MAN_PATH)
+	install -m0644 *.8 ${DESTDIR}$(MAN_PATH)
+
+	install -m 755 afl-as $${DESTDIR}$(HELPER_PATH)
+	ln -sf afl-as $${DESTDIR}$(HELPER_PATH)/as
+	install -m 644 docs/*.md $${DESTDIR}$(DOC_PATH)
+	cp -r testcases/ $${DESTDIR}$(MISC_PATH)
+	cp -r dictionaries/ $${DESTDIR}$(MISC_PATH)
diff --git a/Makefile b/Makefile
index c8095b05..0b306dde 100644
--- a/Makefile
+++ b/Makefile
@@ -1,453 +1,2 @@
-#
-# american fuzzy lop++ - makefile
-# -----------------------------
-#
-# Originally written by Michal Zalewski
-# 
-# Copyright 2013, 2014, 2015, 2016, 2017 Google Inc. All rights reserved.
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-
-# For Heiko:
-#TEST_MMAP=1
-# the hash character is treated differently in different make versions
-# so use a variable for '#'
-HASH=\#
-
-PREFIX     ?= /usr/local
-BIN_PATH    = $(PREFIX)/bin
-HELPER_PATH = $(PREFIX)/lib/afl
-DOC_PATH    = $(PREFIX)/share/doc/afl
-MISC_PATH   = $(PREFIX)/share/afl
-MAN_PATH    = $(PREFIX)/man/man8
-
-PROGNAME    = afl
-VERSION     = $(shell grep '^$(HASH)define VERSION ' ../config.h | cut -d '"' -f2)
-
-# PROGS intentionally omit afl-as, which gets installed elsewhere.
-
-PROGS       = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze
-SH_PROGS    = afl-plot afl-cmin afl-cmin.bash afl-whatsup afl-system-config
-MANPAGES=$(foreach p, $(PROGS) $(SH_PROGS), $(p).8) afl-as.8
-ASAN_OPTIONS=detect_leaks=0
-
-ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-	CFLAGS_FLTO ?= -flto=full
-else
- ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=thin -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-	CFLAGS_FLTO ?= -flto=thin
- else
-  ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-	CFLAGS_FLTO ?= -flto
-  endif
- endif
-endif
-
-ifneq "$(shell uname)" "Darwin"
- ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-	CFLAGS_OPT = -march=native
- endif
-endif
-
-ifneq "$(shell uname -m)" "x86_64"
- ifneq "$(shell uname -m)" "i386"
-  ifneq "$(shell uname -m)" "amd64"
-   ifneq "$(shell uname -m)" "i86pc"
-	AFL_NO_X86=1
-   endif
-  endif
- endif
-endif
-
-CFLAGS     ?= -O3 -funroll-loops $(CFLAGS_OPT)
-override CFLAGS     += -Wall -g -Wno-pointer-sign -I include/ \
-              -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \
-              -DDOC_PATH=\"$(DOC_PATH)\" -Wno-unused-function -fcommon
-
-AFL_FUZZ_FILES = $(wildcard src/afl-fuzz*.c)
-
-ifneq "$(shell type python3m 2>/dev/null)" ""
-  ifneq "$(shell type python3m-config 2>/dev/null)" ""
-    PYTHON_INCLUDE  ?= $(shell python3m-config --includes)
-    PYTHON_VERSION  ?= $(strip $(shell python3m --version 2>&1))
-    # Starting with python3.8, we need to pass the `embed` flag. Earier versions didn't know this flag.
-    ifeq "$(shell python3m-config --embed --libs 2>/dev/null | grep -q lpython && echo 1 )" "1"
-      PYTHON_LIB      ?= $(shell python3m-config --libs --embed)
-    else
-      PYTHON_LIB      ?= $(shell python3m-config --ldflags)
-    endif
-  endif
-endif
-
-ifneq "$(shell type python3 2>/dev/null)" ""
-  ifneq "$(shell type python3-config 2>/dev/null)" ""
-    PYTHON_INCLUDE  ?= $(shell python3-config --includes)
-    PYTHON_VERSION  ?= $(strip $(shell python3 --version 2>&1))
-    # Starting with python3.8, we need to pass the `embed` flag. Earier versions didn't know this flag.
-    ifeq "$(shell python3-config --embed --libs 2>/dev/null | grep -q lpython && echo 1 )" "1"
-      PYTHON_LIB      ?= $(shell python3-config --libs --embed)
-    else
-      PYTHON_LIB      ?= $(shell python3-config --ldflags)
-    endif
-  endif
-endif
-
-ifneq "$(shell type python 2>/dev/null)" ""
-  ifneq "$(shell type python-config 2>/dev/null)" ""
-    PYTHON_INCLUDE  ?= $(shell python-config --includes)
-    PYTHON_LIB      ?= $(shell python-config --ldflags)
-    PYTHON_VERSION  ?= $(strip $(shell python --version 2>&1))
-  endif
-endif
-
-ifdef SOURCE_DATE_EPOCH
-    BUILD_DATE ?= $(shell date -u -d "@$(SOURCE_DATE_EPOCH)" "+%Y-%m-%d" 2>/dev/null || date -u -r "$(SOURCE_DATE_EPOCH)" "+%Y-%m-%d" 2>/dev/null || date -u "+%Y-%m-%d")
-else
-    BUILD_DATE ?= $(shell date "+%Y-%m-%d")
-endif
-
-ifneq "$(filter Linux GNU%,$(shell uname))" ""
-  LDFLAGS  += -ldl
-endif
-
-ifneq "$(findstring FreeBSD, $(shell uname))" ""
-  CFLAGS += -pthread
-  LDFLAGS  += -lpthread
-endif
-
-ifneq "$(findstring NetBSD, $(shell uname))" ""
-  CFLAGS += -pthread
-  LDFLAGS  += -lpthread
-endif
-
-ifeq "$(findstring clang, $(shell $(CC) --version 2>/dev/null))" ""
-  TEST_CC   = afl-gcc
-else
-  TEST_CC   = afl-clang
-endif
-
-COMM_HDR    = include/alloc-inl.h include/config.h include/debug.h include/types.h
-
-ifeq "$(shell echo '$(HASH)include <Python.h>@int main() {return 0; }' | tr @ '\n' | $(CC) -x c - -o .test $(PYTHON_INCLUDE) $(LDFLAGS) $(PYTHON_LIB) 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-	PYTHON_OK=1
-	PYFLAGS=-DUSE_PYTHON $(PYTHON_INCLUDE) $(LDFLAGS) $(PYTHON_LIB) -DPYTHON_VERSION="\"$(PYTHON_VERSION)\""
-else
-	PYTHON_OK=0
-	PYFLAGS=
-endif
-
-ifdef STATIC
-  $(info Compiling static version of binaries)
-  # Disable python for static compilation to simplify things
-  PYTHON_OK=0
-  PYFLAGS=
-
-  CFLAGS += -static
-  LDFLAGS += -lm -lpthread -lz -lutil
-endif
-
-ifeq "$(shell echo '$(HASH)include <sys/ipc.h>@$(HASH)include <sys/shm.h>@int main() { int _id = shmget(IPC_PRIVATE, 65536, IPC_CREAT | IPC_EXCL | 0600); shmctl(_id, IPC_RMID, 0); return 0;}' | tr @ '\n' | $(CC) -x c - -o .test2 2>/dev/null && echo 1 || echo 0 ; rm -f .test2 )" "1"
-	SHMAT_OK=1
-else
-	SHMAT_OK=0
-	CFLAGS+=-DUSEMMAP=1
-	LDFLAGS+=-Wno-deprecated-declarations
-endif
-
-ifeq "$(TEST_MMAP)" "1"
-	SHMAT_OK=0
-	CFLAGS+=-DUSEMMAP=1
-	LDFLAGS+=-Wno-deprecated-declarations
-endif
-
-ifdef ASAN_BUILD
-  CFLAGS+=-fsanitize=address
-	LDFLAGS+=-fsanitize=address
-endif
-
-all:	test_x86 test_shm test_python ready $(PROGS) afl-as test_build all_done
-
-man:    $(MANPAGES) 
-	-$(MAKE) -C llvm_mode
-	-$(MAKE) -C gcc_plugin
-
-tests:	source-only
-	@cd test ; ./test.sh
-	@rm -f test/errors
-
-performance-tests:	performance-test
-test-performance:	performance-test
-
-performance-test:	source-only
-	@cd test ; ./test-performance.sh
-
-
-help:
-	@echo "HELP --- the following make targets exist:"
-	@echo "=========================================="
-	@echo "all: just the main afl++ binaries"
-	@echo "binary-only: everything for binary-only fuzzing: qemu_mode, unicorn_mode, libdislocator, libtokencap, radamsa"
-	@echo "source-only: everything for source code fuzzing: llvm_mode, gcc_plugin, libdislocator, libtokencap, radamsa"
-	@echo "distrib: everything (for both binary-only and source code fuzzing)"
-	@echo "man: creates simple man pages from the help option of the programs"
-	@echo "install: installs everything you have compiled with the build option above"
-	@echo "clean: cleans everything. for qemu_mode it means it deletes all downloads as well"
-	@echo "code-format: format the code, do this before you commit and send a PR please!"
-	@echo "tests: this runs the test framework. It is more catered for the developers, but if you run into problems this helps pinpointing the problem"
-	@echo "document: creates afl-fuzz-document which will only do one run and save all manipulated inputs into out/queue/mutations"
-	@echo "help: shows these build options :-)"
-	@echo "=========================================="
-	@echo "Recommended: \"distrib\" or \"source-only\", then \"install\""
-	@echo
-	@echo Known build environment options:
-	@echo "=========================================="
-	@echo STATIC - compile AFL++ static
-	@echo ASAN_BUILD - compiles with memory sanitizer for debug purposes
-	@echo AFL_NO_X86 - if compiling on non-intel/amd platforms
-	@echo "=========================================="
-	@echo e.g.: make ASAN_BUILD=1
-
-ifndef AFL_NO_X86
-
-test_x86:
-	@echo "[*] Checking for the default compiler cc..."
-	@type $(CC) >/dev/null || ( echo; echo "Oops, looks like there is no compiler '"$(CC)"' in your path."; echo; echo "Don't panic! You can restart with '"$(_)" CC=<yourCcompiler>'."; echo; exit 1 )
-	@echo "[*] Checking for the ability to compile x86 code..."
-	@echo 'main() { __asm__("xorb %al, %al"); }' | $(CC) -w -x c - -o .test1 || ( echo; echo "Oops, looks like your compiler can't generate x86 code."; echo; echo "Don't panic! You can use the LLVM or QEMU mode, but see docs/INSTALL first."; echo "(To ignore this error, set AFL_NO_X86=1 and try again.)"; echo; exit 1 )
-	@rm -f .test1
-
-else
-
-test_x86:
-	@echo "[!] Note: skipping x86 compilation checks (AFL_NO_X86 set)."
-
-endif
-
-
-ifeq "$(SHMAT_OK)" "1"
-
-test_shm:
-	@echo "[+] shmat seems to be working."
-	@rm -f .test2
-
-else
-
-test_shm:
-	@echo "[-] shmat seems not to be working, switching to mmap implementation"
-
-endif
-
-
-ifeq "$(PYTHON_OK)" "1"
-
-test_python:
-	@rm -f .test 2> /dev/null
-	@echo "[+] $(PYTHON_VERSION) support seems to be working."
-
-else
-
-test_python:
-	@echo "[-] You seem to need to install the package python3-dev or python2-dev (and perhaps python[23]-apt), but it is optional so we continue"
-
-endif
-
-
-ready:
-	@echo "[+] Everything seems to be working, ready to compile."
-
-afl-gcc: src/afl-gcc.c $(COMM_HDR) | test_x86
-	$(CC) $(CFLAGS) src/$@.c -o $@ $(LDFLAGS)
-	set -e; for i in afl-g++ afl-clang afl-clang++; do ln -sf afl-gcc $$i; done
-
-afl-as: src/afl-as.c include/afl-as.h $(COMM_HDR) | test_x86
-	$(CC) $(CFLAGS) src/$@.c -o $@ $(LDFLAGS)
-	ln -sf afl-as as
-
-src/afl-common.o : $(COMM_HDR) src/afl-common.c include/common.h
-	$(CC) $(CFLAGS) $(CFLAGS_FLTO) -c src/afl-common.c -o src/afl-common.o
-
-src/afl-forkserver.o : $(COMM_HDR) src/afl-forkserver.c include/forkserver.h
-	$(CC) $(CFLAGS) $(CFLAGS_FLTO) -c src/afl-forkserver.c -o src/afl-forkserver.o
-
-src/afl-sharedmem.o : $(COMM_HDR) src/afl-sharedmem.c include/sharedmem.h
-	$(CC) $(CFLAGS) $(CFLAGS_FLTO) -c src/afl-sharedmem.c -o src/afl-sharedmem.o
-
-radamsa: src/third_party/libradamsa/libradamsa.so
-	cp src/third_party/libradamsa/libradamsa.so .
-
-src/third_party/libradamsa/libradamsa.so: src/third_party/libradamsa/libradamsa.c src/third_party/libradamsa/radamsa.h
-	$(MAKE) -C src/third_party/libradamsa/ CFLAGS="$(CFLAGS)"
-
-afl-fuzz: $(COMM_HDR) include/afl-fuzz.h $(AFL_FUZZ_FILES) src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o | test_x86
-	$(CC) $(CFLAGS) $(CFLAGS_FLTO) $(AFL_FUZZ_FILES) src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o -o $@ $(PYFLAGS) $(LDFLAGS)
-
-afl-showmap: src/afl-showmap.c src/afl-common.o src/afl-sharedmem.o $(COMM_HDR) | test_x86
-	$(CC) $(CFLAGS) $(CFLAGS_FLTO) src/$@.c src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o -o $@ $(LDFLAGS)
-
-afl-tmin: src/afl-tmin.c src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o $(COMM_HDR) | test_x86
-	$(CC) $(CFLAGS) $(CFLAGS_FLTO) src/$@.c src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o -o $@ $(LDFLAGS)
-
-afl-analyze: src/afl-analyze.c src/afl-common.o src/afl-sharedmem.o $(COMM_HDR) | test_x86
-	$(CC) $(CFLAGS) $(CFLAGS_FLTO) src/$@.c src/afl-common.o src/afl-sharedmem.o -o $@ $(LDFLAGS)
-
-afl-gotcpu: src/afl-gotcpu.c $(COMM_HDR) | test_x86
-	$(CC) $(CFLAGS) src/$@.c -o $@ $(LDFLAGS)
-
-
-# document all mutations and only do one run (use with only one input file!)
-document: $(COMM_HDR) include/afl-fuzz.h $(AFL_FUZZ_FILES) src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o | test_x86
-	$(CC) -D_AFL_DOCUMENT_MUTATIONS $(CFLAGS) $(CFLAGS_FLTO) $(AFL_FUZZ_FILES) src/afl-common.o src/afl-sharedmem.o src/afl-forkserver.o -o afl-fuzz-document $(PYFLAGS) $(LDFLAGS)
-
-
-
-code-format:
-	./.custom-format.py -i src/*.c
-	./.custom-format.py -i include/*.h
-	./.custom-format.py -i libdislocator/*.c 
-	./.custom-format.py -i libtokencap/*.c 
-	./.custom-format.py -i llvm_mode/*.c
-	./.custom-format.py -i llvm_mode/*.h
-	./.custom-format.py -i llvm_mode/*.cc
-	./.custom-format.py -i gcc_plugin/*.c
-	#./.custom-format.py -i gcc_plugin/*.h
-	./.custom-format.py -i gcc_plugin/*.cc
-	./.custom-format.py -i examples/*/*.c
-	./.custom-format.py -i examples/*/*.h
-	./.custom-format.py -i qemu_mode/patches/*.h
-	./.custom-format.py -i qemu_mode/libcompcov/*.c
-	./.custom-format.py -i qemu_mode/libcompcov/*.cc
-	./.custom-format.py -i qemu_mode/libcompcov/*.h
-	./.custom-format.py -i qbdi_mode/*.c
-	./.custom-format.py -i qbdi_mode/*.cpp
-	./.custom-format.py -i *.h
-	./.custom-format.py -i *.c
-
-
-ifndef AFL_NO_X86
-
-test_build: afl-gcc afl-as afl-showmap
-	@echo "[*] Testing the CC wrapper and instrumentation output..."
-	@unset AFL_USE_ASAN AFL_USE_MSAN AFL_CC; AFL_DEBUG=1 AFL_INST_RATIO=100 AFL_PATH=. ./$(TEST_CC) $(CFLAGS) test-instr.c -o test-instr $(LDFLAGS) 2>&1 | grep 'afl-as' >/dev/null || (echo "Oops, afl-as did not get called from "$(TEST_CC)". This is normally achieved by "$(CC)" honoring the -B option."; exit 1 )
-	ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
-	echo 1 | ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr1 ./test-instr
-	@rm -f test-instr
-	@cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation does not seem to be behaving correctly!"; echo; echo "Please post to https://github.com/AFLplusplus/AFLplusplus/issues to troubleshoot the issue."; echo; exit 1; fi
-	@echo "[+] All right, the instrumentation seems to be working!"
-
-else
-
-test_build: afl-gcc afl-as afl-showmap
-	@echo "[!] Note: skipping build tests (you may need to use LLVM or QEMU mode)."
-
-endif
-
-
-all_done: test_build
-	@if [ ! "`type clang 2>/dev/null`" = "" ]; then echo "[+] LLVM users: see llvm_mode/README.md for a faster alternative to afl-gcc."; fi
-	@echo "[+] All done! Be sure to review the README.md - it's pretty short and useful."
-	@if [ "`uname`" = "Darwin" ]; then printf "\nWARNING: Fuzzing on MacOS X is slow because of the unusually high overhead of\nfork() on this OS. Consider using Linux or *BSD. You can also use VirtualBox\n(virtualbox.org) to put AFL inside a Linux or *BSD VM.\n\n"; fi
-	@! tty <&1 >/dev/null || printf "\033[0;30mNOTE: If you can read this, your terminal probably uses white background.\nThis will make the UI hard to read. See docs/status_screen.md for advice.\033[0m\n" 2>/dev/null
-
-.NOTPARALLEL: clean
-
-clean:
-	rm -f $(PROGS) libradamsa.so afl-fuzz-document afl-as as afl-g++ afl-clang afl-clang++ *.o src/*.o *~ a.out core core.[1-9][0-9]* *.stackdump .test .test1 .test2 test-instr .test-instr0 .test-instr1 qemu_mode/qemu-3.1.1.tar.xz afl-qemu-trace afl-gcc-fast afl-gcc-pass.so afl-gcc-rt.o afl-g++-fast ld *.so *.8
-	rm -rf out_dir qemu_mode/qemu-3.1.1 *.dSYM */*.dSYM
-	-$(MAKE) -C llvm_mode clean
-	-$(MAKE) -C gcc_plugin clean
-	$(MAKE) -C libdislocator clean
-	$(MAKE) -C libtokencap clean
-	$(MAKE) -C examples/socket_fuzzing clean
-	$(MAKE) -C examples/argv_fuzzing clean
-	$(MAKE) -C qemu_mode/unsigaction clean
-	$(MAKE) -C qemu_mode/libcompcov clean
-	$(MAKE) -C src/third_party/libradamsa/ clean
-	-rm -rf unicorn_mode/unicornafl
-
-distrib: all radamsa
-	-$(MAKE) -C llvm_mode
-	-$(MAKE) -C gcc_plugin
-	$(MAKE) -C libdislocator
-	$(MAKE) -C libtokencap
-	$(MAKE) -C examples/socket_fuzzing
-	$(MAKE) -C examples/argv_fuzzing
-	cd qemu_mode && sh ./build_qemu_support.sh
-	cd unicorn_mode && sh ./build_unicorn_support.sh
-
-binary-only: all radamsa
-	$(MAKE) -C libdislocator
-	$(MAKE) -C libtokencap
-	$(MAKE) -C examples/socket_fuzzing
-	$(MAKE) -C examples/argv_fuzzing
-	cd qemu_mode && sh ./build_qemu_support.sh
-	cd unicorn_mode && sh ./build_unicorn_support.sh
-
-source-only: all radamsa
-	-$(MAKE) -C llvm_mode
-	-$(MAKE) -C gcc_plugin
-	$(MAKE) -C libdislocator
-	$(MAKE) -C libtokencap
-
-%.8:	%
-	@echo .TH $* 8 $(BUILD_DATE) "afl++" > $@
-	@echo .SH NAME >> $@
-	@echo .B $* >> $@
-	@echo >> $@
-	@echo .SH SYNOPSIS >> $@
-	@./$* -h 2>&1 | head -n 3 | tail -n 1 | sed 's/^\.\///' >> $@
-	@echo >> $@
-	@echo .SH OPTIONS >> $@
-	@echo .nf >> $@
-	@./$* -hh 2>&1 | tail -n +4 >> $@
-	@echo >> $@
-	@echo .SH AUTHOR >> $@
-	@echo "afl++ was written by Michal \"lcamtuf\" Zalewski and is maintained by Marc \"van Hauser\" Heuse <mh@mh-sec.de>, Heiko \"hexcoder-\" Eissfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com>" >> $@
-	@echo  The homepage of afl++ is: https://github.com/AFLplusplus/AFLplusplus >> $@
-	@echo >> $@
-	@echo .SH LICENSE >> $@
-	@echo Apache License Version 2.0, January 2004 >> $@
-
-install: all $(MANPAGES)
-	install -d -m 755 $${DESTDIR}$(BIN_PATH) $${DESTDIR}$(HELPER_PATH) $${DESTDIR}$(DOC_PATH) $${DESTDIR}$(MISC_PATH)
-	rm -f $${DESTDIR}$(BIN_PATH)/afl-plot.sh
-	install -m 755 $(PROGS) $(SH_PROGS) $${DESTDIR}$(BIN_PATH)
-	rm -f $${DESTDIR}$(BIN_PATH)/afl-as
-	if [ -f afl-qemu-trace ]; then install -m 755 afl-qemu-trace $${DESTDIR}$(BIN_PATH); fi
-	if [ -f afl-gcc-fast ]; then set e; install -m 755 afl-gcc-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-gcc-fast $${DESTDIR}$(BIN_PATH)/afl-g++-fast; install -m 755 afl-gcc-pass.so afl-gcc-rt.o $${DESTDIR}$(HELPER_PATH); fi
-ifndef AFL_TRACE_PC
-	if [ -f afl-clang-fast -a -f libLLVMInsTrim.so -a -f afl-llvm-rt.o ]; then set -e; install -m 755 afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 libLLVMInsTrim.so afl-llvm-pass.so afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi
-	if [ -f afl-clang-lto -a -f afl-ld ]; then set -e; install -m 755 afl-clang-lto $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto++; install -m 755 afl-ld $${DESTDIR}$(HELPER_PATH); ln -sf afl-ld $${DESTDIR}$(HELPER_PATH)/ld; install -m 755 afl-llvm-lto-instrumentation.so $${DESTDIR}$(HELPER_PATH); install -m 755 afl-llvm-lto-whitelist.so $${DESTDIR}$(HELPER_PATH); fi
-else
-	if [ -f afl-clang-fast -a -f afl-llvm-rt.o ]; then set -e; install -m 755 afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi
-endif
-	if [ -f afl-llvm-rt-32.o ]; then set -e; install -m 755 afl-llvm-rt-32.o $${DESTDIR}$(HELPER_PATH); fi
-	if [ -f afl-llvm-rt-64.o ]; then set -e; install -m 755 afl-llvm-rt-64.o $${DESTDIR}$(HELPER_PATH); fi
-	if [ -f compare-transform-pass.so ]; then set -e; install -m 755 compare-transform-pass.so $${DESTDIR}$(HELPER_PATH); fi
-	if [ -f split-compares-pass.so ]; then set -e; install -m 755 split-compares-pass.so $${DESTDIR}$(HELPER_PATH); fi
-	if [ -f split-switches-pass.so ]; then set -e; install -m 755 split-switches-pass.so $${DESTDIR}$(HELPER_PATH); fi
-	if [ -f cmplog-instructions-pass.so ]; then set -e; install -m 755 cmplog-*-pass.so $${DESTDIR}$(HELPER_PATH); fi
-	if [ -f libdislocator.so ]; then set -e; install -m 755 libdislocator.so $${DESTDIR}$(HELPER_PATH); fi
-	if [ -f libtokencap.so ]; then set -e; install -m 755 libtokencap.so $${DESTDIR}$(HELPER_PATH); fi
-	if [ -f libcompcov.so ]; then set -e; install -m 755 libcompcov.so $${DESTDIR}$(HELPER_PATH); fi
-	if [ -f libradamsa.so ]; then set -e; install -m 755 libradamsa.so $${DESTDIR}$(HELPER_PATH); fi
-	if [ -f afl-fuzz-document ]; then set -e; install -m 755 afl-fuzz-document $${DESTDIR}$(BIN_PATH); fi
-	$(MAKE) -C examples/socket_fuzzing install
-	$(MAKE) -C examples/argv_fuzzing install
-
-	set -e; ln -sf afl-gcc $${DESTDIR}$(BIN_PATH)/afl-g++
-	set -e; if [ -f afl-clang-fast ] ; then ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang ; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang++ ; else ln -sf afl-gcc $${DESTDIR}$(BIN_PATH)/afl-clang ; ln -sf afl-gcc $${DESTDIR}$(BIN_PATH)/afl-clang++; fi
-
-	mkdir -m 0755 -p ${DESTDIR}$(MAN_PATH)
-	install -m0644 *.8 ${DESTDIR}$(MAN_PATH)
-
-	install -m 755 afl-as $${DESTDIR}$(HELPER_PATH)
-	ln -sf afl-as $${DESTDIR}$(HELPER_PATH)/as
-	install -m 644 docs/*.md $${DESTDIR}$(DOC_PATH)
-	cp -r testcases/ $${DESTDIR}$(MISC_PATH)
-	cp -r dictionaries/ $${DESTDIR}$(MISC_PATH)
-
+all:
+	@echo please use GNU make, thanks!
diff --git a/README.md b/README.md
index ca321f31..302a8acf 100644
--- a/README.md
+++ b/README.md
@@ -4,9 +4,9 @@
 
   ![Travis State](https://api.travis-ci.com/AFLplusplus/AFLplusplus.svg?branch=master)
 
-  Release Version: [2.62c](https://github.com/AFLplusplus/AFLplusplus/releases)
+  Release Version: [2.63c](https://github.com/AFLplusplus/AFLplusplus/releases)
 
-  Github Version: 2.62d
+  Github Version: 2.63d
 
   includes all necessary/interesting changes from Google's afl 2.56b
 
@@ -21,20 +21,7 @@
     * Dominik Maier <mail@dmnk.co>.
 
   Note that although afl now has a Google afl repository [https://github.com/Google/afl](https://github.com/Google/afl),
-  it is unlikely to receive any noteable enhancements: [https://twitter.com/Dor3s/status/1154737061787660288](https://twitter.com/Dor3s/status/1154737061787660288)
-
-## Branches
-
-  The following branches exist:
-
-  * [master/trunk](https://github.com/AFLplusplus/AFLplusplus/) : stable state of afl++ - it is synced from dev from time to
-    time when we are satisfied with it's stability
-  * [dev](https://github.com/AFLplusplus/AFLplusplus/tree/dev) : development state of afl++ - bleeding edge and you might catch a
-    checkout which does not compile or has a bug. *We only accept PRs in dev!!*
-  * (any other) : experimental branches to work on specific features or testing
-    new functionality or changes
-
-  For releases, please see the [Releases](https://github.com/AFLplusplus/AFLplusplus/releases) tab.
+  it is unlikely to receive any notable enhancements: [https://twitter.com/Dor3s/status/1154737061787660288](https://twitter.com/Dor3s/status/1154737061787660288)
 
 ## The enhancements compared to the original stock afl
 
@@ -57,9 +44,9 @@
 
   * Custom mutator by a library (instead of Python) by kyakdan
 
-  * unicorn_mode which allows fuzzing of binaries from completely different platforms (integration provided by domenukk)
+  * Unicorn mode which allows fuzzing of binaries from completely different platforms (integration provided by domenukk)
 
-  * laf-intel or CompCov support for llvm_mode, qemu_mode and unicorn_mode
+  * LAF-Intel or CompCov support for llvm_mode, qemu_mode and unicorn_mode
 
   * NeverZero patch for afl-gcc, llvm_mode, qemu_mode and unicorn_mode which prevents a wrapping map value to zero, increases coverage
   
@@ -67,35 +54,42 @@
   
   * Win32 PE binary-only fuzzing with QEMU and Wine
 
-  * Radamsa mutator (enable with `-R` to add or `-RR` to run it exclusivly).
+  * Radamsa mutator (enable with `-R` to add or `-RR` to run it exclusively).
 
-  * qbdi_mode: fuzz android native libraries via QBDI framework
+  * QBDI mode to fuzz android native libraries via QBDI framework
 
   * The new CmpLog instrumentation for LLVM and QEMU inspired by [Redqueen](https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Redqueen.pdf)
 
+  * LLVM mode Ngram coverage by Adrian Herrera [https://github.com/adrianherrera/afl-ngram-pass](https://github.com/adrianherrera/afl-ngram-pass)
+
   A more thorough list is available in the PATCHES file.
 
   | Feature/Instrumentation | afl-gcc | llvm_mode | gcc_plugin | qemu_mode        | unicorn_mode |
   | ----------------------- |:-------:|:---------:|:----------:|:----------------:|:------------:|
   | NeverZero               |    x    |     x(1)  |      (2)   |         x        |       x      |
   | Persistent mode         |         |     x     |     x      | x86[_64]/arm[64] |       x      |
-  | laf-intel / CompCov     |         |     x     |            | x86[_64]/arm[64] | x86[_64]/arm |
+  | LAF-Intel / CompCov     |         |     x     |            | x86[_64]/arm[64] | x86[_64]/arm |
   | CmpLog                  |         |     x     |            | x86[_64]/arm[64] |              |
   | Whitelist               |         |     x     |     x      |        (x)(3)    |              |
-  | non-colliding coverage  |         |     x(4)  |            |        (x)(5)    |              |
+  | Non-colliding coverage  |         |     x(4)  |            |        (x)(5)    |              |
   | InsTrim                 |         |     x     |            |                  |              |
+  | Ngram prev_loc coverage |         |     x(6)  |            |                  |              |
+  | Context coverage        |         |     x     |            |                  |              |
+  | Snapshot LKM support    |         |     x     |            |        (x)(5)    |              |
 
   neverZero:
 
-  (1) only in LLVM >= 9.0 due to a bug in llvm in previous versions
+  (1) default for LLVM >= 9.0, env var for older version due an efficiency bug in llvm <= 8
 
-  (2) gcc creates non-performant code, hence it is disabled in gcc_plugin
+  (2) GCC creates non-performant code, hence it is disabled in gcc_plugin
 
   (3) partially via AFL_CODE_START/AFL_CODE_END
 
   (4) Only for LLVM >= 9 and not all targets compile
 
-  (5) upcoming, development in branch
+  (5) upcoming, development in the branch
+
+  (6) not compatible with LTO and InsTrim and needs at least LLVM >= 4.1
 
   So all in all this is the best-of afl that is currently out there :-)
 
@@ -108,24 +102,36 @@
   See [docs/QuickStartGuide.md](docs/QuickStartGuide.md) if you don't have time to
   read this file.
 
+## Branches
+
+  The following branches exist:
+
+  * [master/trunk](https://github.com/AFLplusplus/AFLplusplus/) : stable state of afl++ - it is synced from dev from time to
+    time when we are satisfied with it's stability
+  * [dev](https://github.com/AFLplusplus/AFLplusplus/tree/dev) : development state of afl++ - bleeding edge and you might catch a
+    checkout which does not compile or has a bug. *We only accept PRs in dev!!*
+  * (any other) : experimental branches to work on specific features or testing
+    new functionality or changes.
+
+  For releases, please see the [Releases](https://github.com/AFLplusplus/AFLplusplus/releases) tab.
 
 ## Google Summer of Code 2020 (and any other students and enthusiast developers)
 
 We are happy to be part of [Google Summer of Code 2020](https://summerofcode.withgoogle.com/organizations/5100744400699392/)! :-)
 
 We have several ideas we would like to see in AFL++ to make it even better.
-However we already work on so many things that we do not have the time for
+However, we already work on so many things that we do not have the time for
 all the big ideas.
 
 This can be your way to support and contribute to AFL++ - extend it to
-something cool
+something cool.
 
-We have an idea list in [docs/ideas.md](docs/ideas.md)
+We have an idea list in [docs/ideas.md](docs/ideas.md).
 
 For everyone who wants to contribute (and send pull requests) please read
 [CONTRIBUTING.md](CONTRIBUTING.md) before your submit.
 
-## 0) Building and installing afl++
+## Building and installing afl++
 
 afl++ has many build options.
 The easiest is to build and install everything:
@@ -152,10 +158,12 @@ These build targets exist:
 * binary-only: everything for binary-only fuzzing: qemu_mode, unicorn_mode, libdislocator, libtokencap, radamsa
 * source-only: everything for source code fuzzing: llvm_mode, libdislocator, libtokencap, radamsa
 * distrib: everything (for both binary-only and source code fuzzing)
+* man: creates simple man pages from the help option of the programs
 * install: installs everything you have compiled with the build options above
 * clean: cleans everything. for qemu_mode and unicorn_mode it means it deletes all downloads as well
 * code-format: format the code, do this before you commit and send a PR please!
 * tests: runs test cases to ensure that all features are still working as they should
+* unit: perform unit tests (based on cmocka)
 * help: shows these build options
 
 [Unless you are on Mac OS X](https://developer.apple.com/library/archive/qa/qa1118/_index.html) you can also build statically linked versions of the 
@@ -169,6 +177,7 @@ These build options exist:
 
 * STATIC - compile AFL++ static
 * ASAN_BUILD - compiles with memory sanitizer for debug purposes
+* PROFILING - compile with profiling information (gprof)
 * AFL_NO_X86 - if compiling on non-intel/amd platforms
 * LLVM_CONFIG - if your distro doesn't use the standard name for llvm-config (e.g. Debian)
 
@@ -176,7 +185,7 @@ e.g.: make ASAN_BUILD=1
 
 
 Note that afl++ is faster and better the newer the compilers used are.
-Hence gcc-9 and especially llvm-9 should be the compilers of choice.
+Hence at least gcc-9 and especially llvm-9 should be the compilers of choice.
 If your distribution does not have them, you can use the Dockerfile:
 
 ```shell
@@ -185,7 +194,7 @@ $ sudo docker build -t aflplusplus .
 ```
 
 
-## 1) Challenges of guided fuzzing
+## Challenges of guided fuzzing
 
 Fuzzing is one of the most powerful and proven strategies for identifying
 security issues in real-world software; it is responsible for the vast
@@ -200,9 +209,9 @@ There have been numerous attempts to solve this problem. One of the early
 approaches - pioneered by Tavis Ormandy - is corpus distillation. The method
 relies on coverage signals to select a subset of interesting seeds from a
 massive, high-quality corpus of candidate files, and then fuzz them by
-traditional means. The approach works exceptionally well, but requires such
+traditional means. The approach works exceptionally well but requires such
 a corpus to be readily available. In addition, block coverage measurements
-provide only a very simplistic understanding of program state, and are less
+provide only a very simplistic understanding of the program state and are less
 useful for guiding the fuzzing effort in the long haul.
 
 Other, more sophisticated research has focused on techniques such as program
@@ -212,7 +221,7 @@ to suffer from reliability and performance problems in practical uses - and
 currently do not offer a viable alternative to "dumb" fuzzing techniques.
 
 
-## 2) The afl-fuzz approach
+## The afl-fuzz approach
 
 American Fuzzy Lop is a brute-force fuzzer coupled with an exceedingly simple
 but rock-solid instrumentation-guided genetic algorithm. It uses a modified
@@ -223,7 +232,7 @@ Simplifying a bit, the overall algorithm can be summed up as:
 
   1) Load user-supplied initial test cases into the queue,
 
-  2) Take next input file from the queue,
+  2) Take the next input file from the queue,
 
   3) Attempt to trim the test case to the smallest size that doesn't alter
      the measured behavior of the program,
@@ -251,10 +260,10 @@ The fuzzer is thoroughly tested to deliver out-of-the-box performance far
 superior to blind fuzzing or coverage-only tools.
 
 
-## 3) Instrumenting programs for use with AFL
+## Instrumenting programs for use with AFL
 
 PLEASE NOTE: llvm_mode compilation with afl-clang-fast/afl-clang-fast++
-instead of afl-gcc/afl-g++ is much faster and has a few cool features.
+instead of afl-gcc/afl-g++ is much faster and has many cool features.
 See llvm_mode/ - however few code does not compile with llvm.
 We support llvm versions 3.8.0 to 11.
 
@@ -290,7 +299,7 @@ Using partial instrumentation is also recommended, see
 When testing libraries, you need to find or write a simple program that reads
 data from stdin or from a file and passes it to the tested library. In such a
 case, it is essential to link this executable against a static version of the
-instrumented library, or to make sure that the correct .so file is loaded at
+instrumented library or to make sure that the correct .so file is loaded at
 runtime (usually by setting `LD_LIBRARY_PATH`). The simplest option is a static
 build, usually possible via:
 
@@ -307,7 +316,7 @@ PS. ASAN users are advised to review [docs/notes_for_asan.md](docs/notes_for_asa
 file for important caveats.
 
 
-## 4) Instrumenting binary-only apps
+## Instrumenting binary-only apps
 
 When source code is *NOT* available, the fuzzer offers experimental support for
 fast, on-the-fly instrumentation of black-box binaries. This is accomplished
@@ -335,10 +344,9 @@ the speed compared to qemu_mode.
 A more comprehensive description of these and other options can be found in
 [docs/binaryonly_fuzzing.md](docs/binaryonly_fuzzing.md)
 
+## Good examples and writeups
 
-## 5) Good examples and writeups
-
-Here are some good writeups to show how to effectibly use AFL++:
+Here are some good writeups to show how to effectively use AFL++:
 
  * [https://aflplus.plus/docs/tutorials/libxml2_tutorial/](https://aflplus.plus/docs/tutorials/libxml2_tutorial/)
  * [https://bananamafia.dev/post/gb-fuzz/](https://bananamafia.dev/post/gb-fuzz/)
@@ -351,7 +359,7 @@ structure is), these two links have you covered:
 
 If you find other good ones, please send them to us :-)
 
-## 6) Power schedules
+## Power schedules
 
 The power schedules were copied from Marcel Böhme's excellent AFLfast
 implementation and expand on the ability to discover new paths and
@@ -365,9 +373,10 @@ The available schedules are:
  - quad
  - lin
  - exploit
- - mmopt
+ - mmopt (experimental)
+ - rare (experimental)
 
-In parallel mode (-M/-S, several instances with shared queue), we suggest to
+In parallel mode (-M/-S, several instances with the shared queue), we suggest to
 run the master using the explore or fast schedule (-p explore) and the slaves
 with a combination of cut-off-exponential (-p coe), exponential (-p fast),
 explore (-p explore) and mmopt (-p mmopt) schedules. If a schedule does
@@ -375,13 +384,13 @@ not perform well for a target, restart the slave with a different schedule.
 
 In single mode, using -p fast is usually slightly more beneficial than the
 default explore mode.
-(We don't want to change the default behaviour of afl, so "fast" has not been
+(We don't want to change the default behavior of afl, so "fast" has not been
 made the default mode).
 
 More details can be found in the paper published at the 23rd ACM Conference on
 Computer and Communications Security [CCS'16](https://www.sigsac.org/ccs/CCS2016/accepted-papers/)
 
-## 7) Choosing initial test cases
+## Choosing initial test cases
 
 To operate correctly, the fuzzer requires one or more starting file that
 contains a good example of the input data normally expected by the targeted
@@ -402,7 +411,7 @@ the afl-cmin utility to identify a subset of functionally distinct files that
 exercise different code paths in the target binary.
 
 
-## 8) Fuzzing binaries
+## Fuzzing binaries
 
 The fuzzing process itself is carried out by the afl-fuzz utility. This program
 requires a read-only directory with initial test cases, a separate place to
@@ -439,18 +448,18 @@ steps, which can take several days, but tend to produce neat test cases. If you
 want quick & dirty results right away - akin to zzuf and other traditional
 fuzzers - add the -d option to the command line.
 
-## 9) Interpreting output
+## Interpreting output
 
 See the [docs/status_screen.md](docs/status_screen.md) file for information on
 how to interpret the displayed stats and monitor the health of the process. Be
 sure to consult this file especially if any UI elements are highlighted in red.
 
-The fuzzing process will continue until you press Ctrl-C. At minimum, you want
+The fuzzing process will continue until you press Ctrl-C. At a minimum, you want
 to allow the fuzzer to complete one queue cycle, which may take anywhere from a
 couple of hours to a week or so.
 
 There are three subdirectories created within the output directory and updated
-in real time:
+in real-time:
 
   - queue/   - test cases for every distinctive execution path, plus all the
                starting files given by the user. This is the synthesized corpus
@@ -475,7 +484,7 @@ involve any state transitions not seen in previously-recorded faults. If a
 single bug can be reached in multiple ways, there will be some count inflation
 early in the process, but this should quickly taper off.
 
-The file names for crashes and hangs are correlated with parent, non-faulting
+The file names for crashes and hangs are correlated with the parent, non-faulting
 queue entries. This should help with debugging.
 
 When you can't reproduce a crash found by afl-fuzz, the most likely cause is
@@ -499,7 +508,7 @@ If you have gnuplot installed, you can also generate some pretty graphs for any
 active fuzzing task using afl-plot. For an example of how this looks like,
 see [http://lcamtuf.coredump.cx/afl/plot/](http://lcamtuf.coredump.cx/afl/plot/).
 
-## 10) Parallelized fuzzing
+## Parallelized fuzzing
 
 Every instance of afl-fuzz takes up roughly one core. This means that on
 multi-core systems, parallelization is necessary to fully utilize the hardware.
@@ -510,7 +519,7 @@ The parallel fuzzing mode also offers a simple way for interfacing AFL to other
 fuzzers, to symbolic or concolic execution engines, and so forth; again, see the
 last section of [docs/parallel_fuzzing.md](docs/parallel_fuzzing.md) for tips.
 
-## 12) Fuzzer dictionaries
+## Fuzzer dictionaries
 
 By default, afl-fuzz mutation engine is optimized for compact data formats -
 say, images, multimedia, compressed data, regular expression syntax, or shell
@@ -539,13 +548,13 @@ instrumentation feedback alone. This actually works in practice, say:
 PS. Even when no explicit dictionary is given, afl-fuzz will try to extract
 existing syntax tokens in the input corpus by watching the instrumentation
 very closely during deterministic byte flips. This works for some types of
-parsers and grammars, but isn't nearly as good as the -x mode.
+parsers and grammars but isn't nearly as good as the -x mode.
 
 If a dictionary is really hard to come by, another option is to let AFL run
-for a while, and then use the token capture library that comes as a companion
+for a while and then use the token capture library that comes as a companion
 utility with AFL. For that, see [libtokencap/README.md](libtokencap/README.tokencap.md).
 
-## 13) Crash triage
+## Crash triage
 
 The coverage-based grouping of crashes usually produces a small data set that
 can be quickly triaged manually or with a very simple GDB or Valgrind script.
@@ -557,7 +566,7 @@ difficult to quickly evaluate for exploitability without a lot of debugging and
 code analysis work. To assist with this task, afl-fuzz supports a very unique
 "crash exploration" mode enabled with the -C flag.
 
-In this mode, the fuzzer takes one or more crashing test cases as the input,
+In this mode, the fuzzer takes one or more crashing test cases as the input
 and uses its feedback-driven fuzzing strategies to very quickly enumerate all
 code paths that can be reached in the program while keeping it in the
 crashing state.
@@ -592,12 +601,11 @@ be critical, and which are not; while not bulletproof, it can often offer quick
 insights into complex file formats. More info about its operation can be found
 near the end of [docs/technical_details.md](docs/technical_details.md).
 
-
-## 14) Going beyond crashes
+## Going beyond crashes
 
 Fuzzing is a wonderful and underutilized technique for discovering non-crashing
 design and implementation errors, too. Quite a few interesting bugs have been
-found by modifying the target programs to call abort() when, say:
+found by modifying the target programs to call abort() when say:
 
   - Two bignum libraries produce different outputs when given the same
     fuzzer-generated input,
@@ -616,10 +624,10 @@ if you are the maintainer of a particular package, you can make this code
 conditional with `#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION` (a flag also
 shared with libfuzzer) or `#ifdef __AFL_COMPILER` (this one is just for AFL).
 
-## 15) Common-sense risks
+## Common-sense risks
 
 Please keep in mind that, similarly to many other computationally-intensive
-tasks, fuzzing may put strain on your hardware and on the OS. In particular:
+tasks, fuzzing may put a strain on your hardware and on the OS. In particular:
 
   - Your CPU will run hot and will need adequate cooling. In most cases, if
     cooling is insufficient or stops working properly, CPU speeds will be
@@ -645,14 +653,14 @@ tasks, fuzzing may put strain on your hardware and on the OS. In particular:
     $ iostat -d 3 -x -k [...optional disk ID...]
 ```
 
-## 16) Known limitations & areas for improvement
+## Known limitations & areas for improvement
 
 Here are some of the most important caveats for AFL:
 
   - AFL detects faults by checking for the first spawned process dying due to
     a signal (SIGSEGV, SIGABRT, etc). Programs that install custom handlers for
     these signals may need to have the relevant code commented out. In the same
-    vein, faults in child processed spawned by the fuzzed target may evade
+    vein, faults in child processes spawned by the fuzzed target may evade
     detection unless you manually add some code to catch that.
 
   - As with any other brute-force tool, the fuzzer offers limited coverage if
@@ -685,7 +693,7 @@ Here are some of the most important caveats for AFL:
 
 Beyond this, see INSTALL for platform-specific tips.
 
-## 17) Special thanks
+## Special thanks
 
 Many of the improvements to the original afl and afl++ wouldn't be possible
 without feedback, bug reports, or patches from:
@@ -739,7 +747,7 @@ without feedback, bug reports, or patches from:
 Thank you!
 (For people sending pull requests - please add yourself to this list :-)
 
-## 18) Contact
+## Contact
 
 Questions? Concerns? Bug reports? The contributors can be reached via
 [https://github.com/AFLplusplus/AFLplusplus](https://github.com/AFLplusplus/AFLplusplus)
diff --git a/TODO.md b/TODO.md
index ffd6b5ad..703ba4ed 100644
--- a/TODO.md
+++ b/TODO.md
@@ -1,9 +1,11 @@
 # TODO list for AFL++
 
-## Roadmap 2.63
-
- - get "no global vars" working
+## Roadmap 2.64
 
+ - random crc32 HASH_CONST per run? because with 65536 paths we have collisions
+ - namespace for targets? e.g. network
+ - libradamsa as a custom module?
+ - fix stability calculation bug
 
 ## Further down the road
 
@@ -13,6 +15,11 @@ afl-fuzz:
  - ascii_only mode for mutation output
  - setting min_len/max_len/start_offset/end_offset limits for mutation output
 
+llvm_mode:
+ - added context sensitive branch coverage
+ - add CT cov and ngram cov to LTO and InsTrim
+ - better whitelist solution for LTO
+
 gcc_plugin:
  - laf-intel
  - better instrumentation (seems to be better with gcc-9+)
diff --git a/afl-cmin b/afl-cmin
index d96a103f..d38e7a97 100755
--- a/afl-cmin
+++ b/afl-cmin
@@ -264,7 +264,7 @@ BEGIN {
 
   if (target_bin && !exists_and_is_executable(target_bin)) {
 
-    "type "target_bin" | awk '{print $NF}' 2>/dev/null" | getline tnew
+    "command -v "target_bin" 2>/dev/null" | getline tnew
     if (!tnew || !exists_and_is_executable(tnew)) {
       print "[-] Error: binary '"target_bin"' not found or not executable." > "/dev/stderr"
       exit 1
@@ -313,7 +313,7 @@ BEGIN {
     if (0 == system("test -f afl-cmin")) {
       showmap = "./afl-showmap"
     } else {
-      "type afl-showmap | awk '{print $NF}' 2>/dev/null" | getline showmap
+      "command -v afl-showmap 2>/dev/null" | getline showmap
     }
   } else {
     showmap = ENVIRON["AFL_PATH"] "/afl-showmap"
diff --git a/afl-plot b/afl-plot
index 0f0154a6..6ad3f790 100755
--- a/afl-plot
+++ b/afl-plot
@@ -68,7 +68,7 @@ BANNER="`cat "$1/fuzzer_stats" | grep '^afl_banner ' | cut -d: -f2- | cut -b2-`"
 
 test "$BANNER" = "" && BANNER="(none)"
 
-GNUPLOT=`which gnuplot 2>/dev/null`
+GNUPLOT=`command -v gnuplot 2>/dev/null`
 
 if [ "$GNUPLOT" = "" ]; then
 
diff --git a/afl-whatsup b/afl-whatsup
index 5983ca82..1a276964 100755
--- a/afl-whatsup
+++ b/afl-whatsup
@@ -61,8 +61,12 @@ if [ -d queue ]; then
 
 fi
 
-RED=`tput setaf 1 1 1`
+RED=`tput setaf 9 1 1`
+GREEN=`tput setaf 2 1 1`
+BLUE=`tput setaf 4 1 1`
+YELLOW=`tput setaf 11 1 1`
 NC=`tput sgr0`
+RESET="$NC"
 
 CUR_TIME=`date +%s`
 
@@ -116,6 +120,10 @@ fmt_duration()
   fi
 }
 
+FIRST=true
+TOTAL_WCOP=
+TOTAL_LAST_PATH=0
+
 for i in `find . -maxdepth 2 -iname fuzzer_stats | sort`; do
 
   sed 's/^command_line.*$/_skip:1/;s/[ ]*:[ ]*/="/;s/$/"/' "$i" >"$TMP"
@@ -125,6 +133,12 @@ for i in `find . -maxdepth 2 -iname fuzzer_stats | sort`; do
   RUN_DAYS=$((RUN_UNIX / 60 / 60 / 24))
   RUN_HRS=$(((RUN_UNIX / 60 / 60) % 24))
 
+  test -n "$cycles_wo_finds" && {
+    test -z "$FIRST" && TOTAL_WCOP="${TOTAL_WCOP}/"
+    TOTAL_WCOP="${TOTAL_WCOP}${cycles_wo_finds}"
+    FIRST=
+  }
+
   if [ "$SUMMARY_ONLY" = "" ]; then
 
     echo ">>> $afl_banner ($RUN_DAYS days, $RUN_HRS hrs) fuzzer PID: $fuzzer_pid <<<"
@@ -158,6 +172,10 @@ for i in `find . -maxdepth 2 -iname fuzzer_stats | sort`; do
   TOTAL_PENDING=$((TOTAL_PENDING + pending_total))
   TOTAL_PFAV=$((TOTAL_PFAV + pending_favs))
 
+  if [ "$last_path" -gt "$TOTAL_LAST_PATH" ]; then
+    TOTAL_LAST_PATH=$last_path
+  fi
+
   if [ "$SUMMARY_ONLY" = "" ]; then
 
     # Warnings in red
@@ -173,10 +191,17 @@ for i in `find . -maxdepth 2 -iname fuzzer_stats | sort`; do
     fmt_duration $last_path && FMT_PATH=$DUR_STRING
     fmt_duration $last_crash && FMT_CRASH=$DUR_STRING
     fmt_duration $last_hang && FMT_HANG=$DUR_STRING
-
-    echo "  last_path  : $FMT_PATH"
-    echo "  last_crash : $FMT_CRASH"
-    echo "  last_hang  : $FMT_HANG"
+    FMT_CWOP="not available"
+    test -n "$cycles_wo_finds" && {
+      test "$cycles_wo_finds" = 0 && FMT_CWOP="$cycles_wo_finds"
+      test "$cycles_wo_finds" -gt 10 && FMT_CWOP="${YELLOW}$cycles_wo_finds${NC}"
+      test "$cycles_wo_finds" -gt 50 && FMT_CWOP="${RED}$cycles_wo_finds${NC}"
+    }
+
+    echo "  last_path       : $FMT_PATH"
+    echo "  last_crash      : $FMT_CRASH"
+    echo "  last_hang       : $FMT_HANG"
+    echo "  cycles_wo_finds : $FMT_CWOP"
 
     CPU_USAGE=$(ps aux | grep $fuzzer_pid | grep -v grep | awk '{print $3}')
     MEM_USAGE=$(ps aux | grep $fuzzer_pid | grep -v grep | awk '{print $4}')
@@ -215,6 +240,9 @@ rm -f "$TMP"
 TOTAL_DAYS=$((TOTAL_TIME / 60 / 60 / 24))
 TOTAL_HRS=$(((TOTAL_TIME / 60 / 60) % 24))
 
+test -z "$TOTAL_WCOP" && TOTAL_WCOP="not available"
+fmt_duration $TOTAL_LAST_PATH && TOTAL_LAST_PATH=$DUR_STRING
+
 test "$TOTAL_TIME" = "0" && TOTAL_TIME=1
 
 echo "Summary stats"
@@ -239,6 +267,8 @@ if [ "$ALIVE_CNT" -gt "1" ]; then
 fi
 
 echo "       Crashes found : $TOTAL_CRASHES locally unique"
+echo "Cycles without finds : $TOTAL_WCOP"
+echo "  Time without finds : $TOTAL_LAST_PATH"
 echo
 
 exit 0
diff --git a/dictionaries/regexp.dict b/dictionaries/regexp.dict
index 2f29357e..4e2d7a0f 100644
--- a/dictionaries/regexp.dict
+++ b/dictionaries/regexp.dict
@@ -238,362 +238,6 @@
 "\\p{Nd}"
 "\\P{Any}"
 "\\p{Changes_When_NFKC_Casefolded}"
-"L~"
-"P{scx=Greek}??"
-"Q~"
-"R??"
-"R!??oo(E=?ar)baz-"
-"Sc?Sc{?{?"
-"U~"
-"V~"
-"W~"
-"Xdtc"
-"X~"
-"X?"
-"[-123],}"
-"[-????]+,}"
-"[00011],}"
-"[011],}"
-"[0],}"
-"[1111],}"
-"[111],}"
-"[118],}"
-"[11],}"
-"[11a],}"
-"[[]{}()%^# ],}"
-"[]"
-"[],}"
-"[]{}()%^# ,}"
-"[^123],}"
-"[a-b-c],}"
-"[a-zA-Z0-9],}"
-"[b"
-"[bfoo(?!bar)baz"
-"[c!],}"
-"[c1],}"
-"[cA],}"
-"[cZ],}"
-"[c_],}"
-"[ca],}"
-"[cz],}"
-"[c~],}"
-"[c~]w"
-"[d-d],}"
-"[d-z],}"
-"[u???[11<([c?]?:u??<a>)dccc]"
-"[ud808udf45-ud809udccc],}"
-"[x"
-"[x],}"
-"[xdz],}"
-"[xyz],}"
-"[x?"
-"[x?n4n4"
-"[x??19?"
-"[z-d],}"
-"[~?"
-"[?????"
-"[?"
-"[???],}"
-"[????-????],}"
-"[????"
-"]"
-"],}"
-"]QrC[w~]Qr"
-"]}"
-"]~"
-"^?000???????????????????????????x60?"
-"^12(a(?:1(b12))2)1dyb?9"
-"^xi!q"
-"^xxx$,}"
-"abc"
-"abc60,0}?{?"
-"aic"
-"b~"
-"c"
-"c!,}"
-"c,}"
-"cA,}"
-"c_,}"
-"cjcJcicIckcK,}"
-"c~"
-"c~,}"
-"d"
-"d?"
-"d??"
-"d(?:ab[]?9}"
-"dpN?(?<a>.)?"
-"duu{123a?"
-"d{1,9"
-"d~"
-"e"
-"e~"
-"e?}"
-"f~"
-"g~"
-"h~"
-"i~"
-"j~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xx?~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxb~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxc~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxd~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxe~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxf~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxg~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxh~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxi~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxj~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxk~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxl~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxm~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxn~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxo~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxp~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxq~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxr~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxs~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxt~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxu~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxv~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxw~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxx~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxy~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxz~"
-"k?@a??=bbC?:!k?x!k0}??@??@a(P=b@??s@xx?~"
-"k?@a??=bbC?:!k?x!k0}??@??@a(P=b@??s@xxn~"
-"k?@a??=bbC?:!k?x!k0}??@??@a(P=b@??s@xxo~"
-"k?@a??=bbC?:!k?x!k0}??@??@a(P=b@??s@xxp~"
-"k?@a??=bbC?:!k?x!k0}??@??@a(P=b@??s@xxq~"
-"k?@a??=bbC?:!k?x!k0}??@??@a(P=b@??s@xxr~"
-"k?@a??=bbC?:!k?x!k0}??@??@a(P=b@??s@xxs~"
-"k?@a??=bbC?:!k?x!k0}??@??@a(P=b@??s@xxt~"
-"k?@a??=bbC?:!k?x!k0}??@??@a(P=b@??s@xxu~"
-"k?@a??=bbC?:!k?x!k0}??@??@a(P=b@??s@xxv~"
-"k?@a??=bbC?:!k?x!k0}??@??@a(P=b@??s@xxw~"
-"k?@a??=bbC?:!k?x!k0}??@??@a(P=b@??s@xxx~"
-"k?@a??=bbC?:!k?x!k0}??@??@a(P=b@??s@xxy~"
-"k?@a??=bbC?:!k?x!k0}??@??@a(P=b@??s@xxz~"
-"k?@a(?=bbb.~"
-"k?@a(?=bbbb~"
-"k?@a(?=bbbc~"
-"k?@a(?=bbbd~"
-"k?@a(?=bbbe~"
-"k?@a(?=bbbf~"
-"k?@a(?=bbbg~"
-"k?@a(?=bbbh~"
-"k?@a(?=bbbi~"
-"k?@a(?=bbbj~"
-"k?@a(?=bbbk~"
-"k?@a(?=bbbl~"
-"k?@a(?=bbbm~"
-"k?@a(?=bbbn~"
-"k?@a(?=bbbo~"
-"k?@a(?=bbbp~"
-"k?@a(?=bbbq~"
-"k?@a(?=bbbr~"
-"k?@a(?=bbbs~"
-"k?@a(?=bbbt~"
-"k?@a(?=bbbu~"
-"k?@a(?=bbbv~"
-"k?@a(?=bbbw~"
-"k?@a(?=bbbx~"
-"k?@a(?=bbby~"
-"k?@a(?=bbbz~"
-"k?@a(?=by?bC?:!k??????????????b~"
-"k?@a(?=by?bC?:!k??????????????c~"
-"k?@a(?=by?bC?:!k??????????????d~"
-"k?@a(?=by?bC?:!k??????????????e~"
-"k?@a(?=by?bC?:!k??????????????f~"
-"k?@a(?=by?bC?:!k??????????????g~"
-"k?@a(?=by?bC?:!k??????????????h~"
-"k?@a(?=by?bC?:!k??????????????i~"
-"k?@a(?=by?bC?:!k??????????????j~"
-"k?@a(?=by?bC?:!k??????????????k~"
-"k?@a(?=by?bC?:!k??????????????l~"
-"k?@a(?=by?bC?:!k??????????????m~"
-"k?@a(?=by?bC?:!k??????????????n~"
-"k?@a(?=by?bC?:!k??????????????o~"
-"k?@a(?=by?bC?:!k??????????????p~"
-"k?@a(?=by?bC?:!k??????????????q~"
-"k?@a(?=by?bC?:!k??????????????r~"
-"k?@a(?=by?bC?:!k??????????????s~"
-"k?@a(?=by?bC?:!k??????????????t~"
-"k?@a(?=by?bC?:!k??????????????u~"
-"k?@a(?=by?bC?:!k??????????????v~"
-"k?@a(?=by?bC?:!k??????????????w~"
-"k?@a(?=by?bC?:!k??????????????x~"
-"k?@a(?=by?bC?:!k??????????????y~"
-"k?@a(?=by?bC?:!k??????????????z~"
-"k?@a(?=by?bC?:!k???????????????~"
-"k?@a(?~"
-"k?@a(b~"
-"k?@a(c~"
-"k?@a(d~"
-"k?@a(e~"
-"k?@a(f~"
-"k?@a(g~"
-"k?@a(h~"
-"k?@a(i~"
-"k?@a(j~"
-"k?@a(k~"
-"k?@a(l~"
-"k?@a(m~"
-"k?@a(n~"
-"k?@a(o~"
-"k?@a(p~"
-"k?@a(q~"
-"k?@a(r~"
-"k?@a(s~"
-"k?@a(t~"
-"k?@a(u~"
-"k?@a(v~"
-"k?@a(w~"
-"k?@a(x~"
-"k?@a(y~"
-"k?@a(z~"
-"k0X@ab~"
-"k0X@ac~"
-"k0X@ad~"
-"k0X@ae~"
-"k0X@af~"
-"k0X@ag~"
-"k0X@ah~"
-"k0X@ai~"
-"k0X@aj~"
-"k0X@ak~"
-"k0X@al~"
-"k0X@am~"
-"k0X@an~"
-"k0X@ao~"
-"k0X@ap~"
-"k0X@aq~"
-"k0X@ar~"
-"k0X@as~"
-"k0X@at~"
-"k0X@au~"
-"k0X@av~"
-"k0X@aw~"
-"k0X@ax~"
-"k0X@ay~"
-"k0X@az~"
-"k0X@a?~"
-"k~"
-"l~"
-"m~"
-"n~"
-"o~"
-"p~"
-"q,}"
-"q~"
-"r~"
-"r?[c~]"
-"s~"
-"t~"
-"u0034,}"
-"u003z,}"
-"u0060,}"
-"ud808udf45*,}"
-"u~"
-"v~"
-"w"
-"w~"
-"x3z,}"
-"x60,}"
-"xyz?9"
-"x~"
-"y~"
-"z~"
-"{"
-"{??"
-"{ ,,?"
-"{-"
-"{0,d?????!"
-"{12345}pu{234:P}?"
-"{1?5"
-"{@"
-"{M,??"
-"{M,P{scx=Greek}???sn"
-"{M,??"
-"{M,??"
-"{M,?M,??"
-"{O"
-"{r~"
-"{s~"
-"{t~"
-"{u~"
-"{v~"
-"{w~"
-"{x~"
-"{y~"
-"{z~"
-"{}"
-"{}~"
-"{??@"
-"{?~"
-"},}"
-"}}"
-"}}}}}?}!}}}}}}}}}}}}}}}}}?},}"
-"}~"
-"}?w~???"
-"~~"
-"?!~"
-"?$"
-"?*?9?nnRnnn?"
-"?.~"
-"?123222222??"
-"?:??"
-"?R"
-"?b~"
-"?c~"
-"?d~"
-"?d???"
-"?e~"
-"?f~"
-"?g~"
-"?h~"
-"?i~"
-"?j~"
-"?k~"
-"?l~"
-"?m~"
-"?n~"
-"?o~"
-"?p~"
-"?q~"
-"?r~"
-"?s~"
-"?t~"
-"?u~"
-"?v~"
-"?v~?v"
-"?w~"
-"?x~"
-"?y~"
-"?z~"
-"?}"
-"??~"
-"?????????dadi(?!bbb"
-"??~"
-"k?@a??=bbC?:!k?x!k0}??@???@a(P=b@??s@xxq~>>>>>>>>>>>>>>>>>>"
-"?f??123222222??"
-"?fP{gc=Decimal_Number}"
-"?f2jq?oo@ooooh??"
-"?[???],}f?"
-"?[???],}nbbc2jocom"
-"?[]"
-"?[],}?"
-"?[],}f?"
-"?[]f?"
-"?[]{}()%^#"
-"?[^123],}f?"
-"?[^123]nbbc2jocom"
-"?[a-b-c],}f?"
-"?[a-b-c]nbbc2jocom"
-"?[a-zA-Z0-9],}f?"
-"?[a-zA-Z0-9],}jocom"
-"?[a-zA-Z0-9]c2jocom"
-"?[bfoo(?!bar)bazcom"
-"?[bfoo(?!bar)bazf?"
 "(?:a?)??"
 "a?)"xyz{93}"
 "{93}"
@@ -601,3 +245,12 @@
 "[\x8f]"
 "[\xf0\x9f\x92\xa9-\xf4\x8f\xbf\x92\xa9-\xf4\x8f\xbf\xbf]"
 "[\x92\xa9-\xf4\x8f\xbf\xbf]"
+"\\1\\2(b\\1\\2))\\2)\\1"
+"\\1\\2(a(?:\\1\\2))\\2)\\1"
+"?:\\1"
+"\\1(b\\1\\2))\\2)\\1"
+"\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1"
+"foo(?=bar)bar)baz"
+"fo(?o(?o(?o(?=bar)baz"
+"foo(?=bar)baz"
+"foo(?=bar)bar)az"
diff --git a/docs/Changelog.md b/docs/Changelog.md
index e1f3cd7e..108ebd08 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -9,27 +9,65 @@ Want to stay in the loop on major new features? Join our mailing list by
 sending a mail to <afl-users+subscribe@googlegroups.com>.
 
 
-### Version ++2.62d (develop):
-
+### Version ++2.63d (development):
+  - llvm_mode LTO mode:
+    - now requires llvm11 - but compiles all targets! :)
+    - autodictionary feature added, enable with AFL_LLVM_LTO_AUTODICTIONARY
+    - variable map size usage
+  - afl-fuzz:
+    - variable map size support added (only LTO mode can use this)
+    - snapshot feature usage now visible in UI
+  - compare-transform/AFL_LLVM_LAF_TRANSFORM_COMPARES now transforms also
+    static global and local variable comparisons (cannot find all though)
+  - extended forkserver: map_size and more information is communicated to
+    afl-fuzz (and afl-fuzz acts accordingly)
+  - more refactoring
+  - if AFL_CC/AFL_CXX is set but empty afl compilers did fail, fixed
+    (this bug is in vanilla afl too)
+
+
+### Version ++2.63c (release):
+
+  ! the repository was moved from vanhauser-thc to AFLplusplus. It is now
+    an own organisation :)
+  ! development and acceptance of PRs now happen only in the dev branch
+    and only occasionally when everything is fine we PR to master
   - all:
-    - big code changes to make afl-fuzz thread-safe so afl-fuzz can spawn 
+    - big code changes to make afl-fuzz thread-safe so afl-fuzz can spawn
       multiple fuzzing threads in the future or even become a library
     - afl basic tools now report on the environment variables picked up
     - more tools get environment variable usage info in the help output
+    - force all output to stdout (some OK/SAY/WARN messages were sent to
+      stdout, some to stderr)
+    - uninstrumented mode uses an internal forkserver ("fauxserver")
+    - now builds with `-D_FORTIFY_SOURCE=2`
+    - drastically reduced number of (de)allocations during fuzzing
   - afl-fuzz:
     - python mutator modules and custom mutator modules now use the same
       interface and hence the API changed
     - AFL_AUTORESUME will resume execution without the need to specify `-i -`
-    - added experimental power schedule -p mmopt that ignores the runtime of
-      queue entries and gives higher weighting to the last 5 queue entries
-      it is currently experimental and subject to change but preliminary
-      results are good
+    - added experimental power schedules (-p):
+      - mmopt: ignores runtime of queue entries, gives higher weighting to
+               the last 5 queue entries
+      - rare: puts focus on queue entries that hits rare branches, also ignores
+              runtime
+  - llvm_mode: 
+    - added SNAPSHOT feature (using https://github.com/AFLplusplus/AFL-Snapshot-LKM)
+    - added Control Flow Integrity sanitizer (AFL_USE_CFISAN)
+    - added AFL_LLVM_INSTRUMENT option to control the instrumentation type
+      easier: DEFAULT, CFG (INSTRIM), LTO, CTX, NGRAM-x (x=2-16)
+    - made USE_TRACE_PC compile obsolete
   - LTO collision free instrumented added in llvm_mode with afl-clang-lto -
-    note that this mode is amazing, but quite some targets won't compile
+    this mode is amazing but requires you to build llvm 11 yourself
+  - Added llvm_mode NGRAM prev_loc coverage by Adrean Herrera
+    (https://github.com/adrianherrera/afl-ngram-pass/), activate by setting
+    AFL_LLVM_INSTRUMENT=NGRAM-<value> or AFL_LLVM_NGRAM_SIZE=<value>
+  - Added llvm_mode context sensitive branch coverage, activated by setting
+    AFL_LLVM_INSTRUMENT=CTX or AFL_LLVM_CTX=1
   - llvm_mode InsTrim mode:
-    - removed workaround for bug where paths were not instrumented and 
+    - removed workaround for bug where paths were not instrumented and
       imported fix by author
-    - made skipping 1 block functions an option and is disable by default,
+    - made skipping 1 block functions an option and is disabled by default,
       set AFL_LLVM_INSTRIM_SKIPSINGLEBLOCK=1 to re-enable this
   - qemu_mode:
     - qemu_mode now uses solely the internal capstone version to fix builds
@@ -39,6 +77,8 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
     - now supports hang mode `-H` to minimize hangs
     - fixed potential afl-tmin missbehavior for targets with multiple hangs
   - Pressing Control-c in afl-cmin did not terminate it for some OS
+  - the custom API was rewritten and is now the same for Python and shared
+    libraries.
 
 
 ### Version ++2.62c (release):
@@ -192,7 +232,7 @@ sending a mail to <afl-users+subscribe@googlegroups.com>.
 
   - big code refactoring:
     * all includes are now in include/
-    * all afl sources are now in src/ - see src/README.src
+    * all afl sources are now in src/ - see src/README.md
     * afl-fuzz was splitted up in various individual files for including
       functionality in other programs (e.g. forkserver, memory map, etc.)
       for better readability.
diff --git a/docs/PATCHES.md b/docs/PATCHES.md
index 1dfb6622..a6783523 100644
--- a/docs/PATCHES.md
+++ b/docs/PATCHES.md
@@ -20,6 +20,7 @@ afl-qemu-speed.diff			by abiondo on github
 afl-qemu-optimize-map.diff		by mh(at)mh-sec(dot)de
 ```
 
++ llvm_mode ngram prev_loc coverage (github.com/adrianherrera/afl-ngram-pass)
 + Custom mutator (native library) (by kyakdan)
 + unicorn_mode (modernized and updated by domenukk)
 + instrim (https://github.com/csienslab/instrim) was integrated
diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md
index 14d8f518..49ce761e 100644
--- a/docs/custom_mutators.md
+++ b/docs/custom_mutators.md
@@ -27,18 +27,16 @@ performed with the custom mutator.
 
 C/C++:
 ```c
-void afl_custom_init(unsigned int seed);
-size_t afl_custom_fuzz(uint8_t** buf, size_t buf_size, uint8_t* add_buf,
-                       size_t add_buf_size, size_t max_size);
-size_t afl_custom_pre_save(uint8_t* buf, size_t buf_size, uint8_t** out_buf);
-uint32_t afl_custom_init_trim(uint8_t* buf, size_t buf_size);
-void afl_custom_trim(uint8_t** out_buf, size_t* out_buf_size);
-uint32_t afl_custom_post_trim(uint8_t success);
-size_t afl_custom_havoc_mutation(uint8_t** buf, size_t buf_size, size_t max_size);
-uint8_t afl_custom_havoc_mutation_probability(void);
-uint8_t afl_custom_queue_get(const uint8_t* filename);
-void afl_custom_queue_new_entry(const uint8_t* filename_new_queue,
-                                const uint8_t* filename_orig_queue);
+void *afl_custom_init(afl_t *afl, unsigned int seed);
+size_t afl_custom_fuzz(void *data, uint8_t *buf, size_t buf_size, u8 **out_buf, uint8_t *add_buf, size_t add_buf_size, size_t max_size);
+size_t afl_custom_pre_save(void *data, uint8_t *buf, size_t buf_size, uint8_t **out_buf);
+int32_t afl_custom_init_trim(void *data, uint8_t *buf, size_t buf_size);
+size_t afl_custom_trim(void *data, uint8_t **out_buf);
+int32_t afl_custom_post_trim(void *data, int success) {
+size_t afl_custom_havoc_mutation(void *data, u8 *buf, size_t buf_size, u8 **out_buf, size_t max_size);
+uint8_t afl_custom_havoc_mutation_probability(void *data);
+uint8_t afl_custom_queue_get(void *data, const uint8_t *filename); void afl_custom_queue_new_entry(void *data, const uint8_t *filename_new_queue, const uint8_t *filename_orig_queue);
+void afl_custom_deinit(void *data);
 ```
 
 Python:
@@ -76,9 +74,9 @@ def queue_new_entry(filename_new_queue, filename_orig_queue):
 
 ### Custom Mutation
 
-- `init` (optional):
+- `init`:
 
-    This method is called when AFL++ starts up and is used to seed RNG.
+    This method is called when AFL++ starts up and is used to seed RNG and set up buffers and state.
 
 - `queue_get` (optional):
 
@@ -110,7 +108,7 @@ def queue_new_entry(filename_new_queue, filename_orig_queue):
 
 - `queue_new_entry` (optional):
 
-    This methods is called after adding a new test case to the queue. 
+    This methods is called after adding a new test case to the queue.
 
 ### Trimming Support
 
@@ -144,7 +142,7 @@ trimmed input. Here's a quick API description:
 
     This method is called for each trimming operation. It doesn't have any
     arguments because we already have the initial buffer from `init_trim` and we
-    can memorize the current state in global variables. This can also save
+    can memorize the current state in the data variables. This can also save
     reparsing steps for each iteration. It should return the trimmed input
     buffer, where the returned data must not exceed the initial input data in
     length. Returning anything that is larger than the original data (passed to
@@ -158,6 +156,8 @@ trimmed input. Here's a quick API description:
     In any case, this method must return the next trim iteration index (from 0
     to the maximum amount of steps you returned in `init_trim`).
 
+`deinit` the last method to be called, deinitializing the state.
+
 Omitting any of three methods will cause the trimming to be disabled and trigger
 a fallback to the builtin default trimming routine.
 
@@ -166,7 +166,7 @@ a fallback to the builtin default trimming routine.
 Optionally, the following environment variables are supported:
 
 - `AFL_CUSTOM_MUTATOR_ONLY`
- 
+
     Disable all other mutation stages. This can prevent broken testcases
     (those that your Python module can't work with anymore) to fill up your
     queue. Best combined with a custom trimming routine (see below) because
diff --git a/docs/env_variables.md b/docs/env_variables.md
index 8c7510cd..7890da35 100644
--- a/docs/env_variables.md
+++ b/docs/env_variables.md
@@ -31,7 +31,9 @@ tools make fairly broad use of environmental variables:
 
     (You can also enable MSAN via AFL_USE_MSAN; ASAN and MSAN come with the
     same gotchas; the modes are mutually exclusive. UBSAN can be enabled
-    similarly by setting the environment variable AFL_USE_UBSAN=1)
+    similarly by setting the environment variable AFL_USE_UBSAN=1. Finally
+    there is the Control Flow Integrity sanitizer that can be activated by
+    AFL_USE_CFISAN=1)
 
   - Setting AFL_CC, AFL_CXX, and AFL_AS lets you use alternate downstream
     compilation tools, rather than the default 'clang', 'gcc', or 'as' binaries
@@ -91,25 +93,81 @@ of the settings discussed in section #1, with the exception of:
 
 Then there are a few specific features that are only available in llvm_mode:
 
+### Select the instrumentation mode
+
+    - AFL_LLVM_INSTRUMENT - this configures the instrumentation mode. 
+      Available options:
+        DEFAULT - classic AFL (map[cur_loc ^ prev_loc >> 1]++)
+        CFG - InsTrim instrumentation (see below)
+        LTO - LTO instrumentation (see below)
+        CTX - context sensitive instrumentation (see below)
+        NGRAM-x - deeper previous location coverage (from NGRAM-2 up to NGRAM-16)
+      Only one can be used.
+
 ### LTO
 
-This is a different kind way of instrumentation: first it compiles all
-code in LTO (link time optimization) and then performs an edge inserting
-instrumentation which is 100% collision free (collisions are a big issue
-in afl and afl-like instrumentations). This is performed by using
-afl-clang-lto/afl-clang-lto++ instead of afl-clang-fast, but is only
-built if LLVM 9 or newer is used.
+    This is a different kind way of instrumentation: first it compiles all
+    code in LTO (link time optimization) and then performs an edge inserting
+    instrumentation which is 100% collision free (collisions are a big issue
+    in afl and afl-like instrumentations). This is performed by using
+    afl-clang-lto/afl-clang-lto++ instead of afl-clang-fast, but is only
+    built if LLVM 11 or newer is used.
+
+   - AFL_LLVM_LTO_AUTODICTIONARY will generate a dictionary in the target
+     binary based on string compare and memory compare functions.
+     afl-fuzz will automatically get these transmitted when starting to
+     fuzz.
 
-None of these options are necessary to be used and are rather for manual
-use (which only ever the author of this LTO implementation will use ;-)
-These are used if several seperated instrumentation are performed which
-are then later combined.
+    None of the following options are necessary to be used and are rather for
+    manual use (which only ever the author of this LTO implementation will use).
+    These are used if several seperated instrumentation are performed which
+    are then later combined.
 
    - AFL_LLVM_LTO_STARTID sets the starting location ID for the instrumentation.
      This defaults to 1
    - AFL_LLVM_LTO_DONTWRITEID prevents that the highest location ID written
      into the instrumentation is set in a global variable
 
+    See llvm_mode/README.LTO.md for more information.
+
+### INSTRIM
+
+    This feature increases the speed by ~15% without any disadvantages.
+
+    - Setting AFL_LLVM_INSTRIM or AFL_LLVM_INSTRUMENT=CFG to activates this mode
+
+    - Setting AFL_LLVM_INSTRIM_LOOPHEAD=1 expands on INSTRIM to optimize loops.
+      afl-fuzz will only be able to see the path the loop took, but not how
+      many times it was called (unless it is a complex loop).
+
+    - Setting AFL_LLVM_INSTRIM_SKIPSINGLEBLOCK=1 will skip instrumenting
+      functions with a single basic block. This is useful for most C and
+      some C++ targets.
+
+    See llvm_mode/README.instrim.md
+
+### NGRAM
+
+    - Setting AFL_LLVM_NGRAM_SIZE or AFL_LLVM_INSTRUMENT=NGRAM-{value}
+      activates ngram prev_loc coverage, good values are 2, 4 or 8
+      (any value between 2 and 16 is valid).
+      It is highly recommended to increase the MAP_SIZE_POW2 definition in
+      config.h to at least 18 and maybe up to 20 for this as otherwise too
+      many map collisions occur.
+
+    See llvm_mode/README.ctx.md
+
+### CTX
+
+    - Setting AFL_LLVM_CTX or AFL_LLVM_INSTRUMENT=CTX
+      activates context sensitive branch coverage - meaning that each edge
+      is additionally combined with its caller.
+      It is highly recommended to increase the MAP_SIZE_POW2 definition in
+      config.h to at least 18 and maybe up to 20 for this as otherwise too
+      many map collisions occur.
+
+    See llvm_mode/README.ngram.md
+
 ### LAF-INTEL
 
     This great feature will split compares to series of single byte comparisons
@@ -134,23 +192,6 @@ are then later combined.
 
     See llvm_mode/README.whitelist.md for more information.
 
-### INSTRIM
-
-    This feature increases the speed by whopping 20% but at the cost of a
-    lower path discovery and therefore coverage.
-
-    - Setting AFL_LLVM_INSTRIM activates this mode
-
-    - Setting AFL_LLVM_INSTRIM_LOOPHEAD=1 expands on INSTRIM to optimize loops.
-      afl-fuzz will only be able to see the path the loop took, but not how
-      many times it was called (unless it is a complex loop).
-
-    - Setting AFL_LLVM_INSTRIM_SKIPSINGLEBLOCK=1 will skip instrumenting
-      functions with a single basic block. This is useful for most C and
-      some C++ targets.
-
-    See llvm_mode/README.instrim.md
-
 ### NOT_ZERO
 
     - Setting AFL_LLVM_NOT_ZERO=1 during compilation will use counters
@@ -221,6 +262,9 @@ checks or alter some of the more exotic semantics of the tool:
   - AFL_NO_ARITH causes AFL to skip most of the deterministic arithmetics.
     This can be useful to speed up the fuzzing of text-based file formats.
 
+  - AFL_NO_SNAPSHOT will advice afl-fuzz not to use the snapshot feature
+    if the snapshot lkm is loaded
+
   - AFL_SHUFFLE_QUEUE randomly reorders the input queue on startup. Requested
     by some users for unorthodox parallelized fuzzing setups, but not
     advisable otherwise.
diff --git a/docs/ideas.md b/docs/ideas.md
index 44dcccb2..686c262d 100644
--- a/docs/ideas.md
+++ b/docs/ideas.md
@@ -33,7 +33,7 @@ This is an excellent mutations scheduler based on Particle Swarm
 Optimization but the current implementation schedule only the mutations
 that were present on AFL.
 
-AFL++ added a lost of optional mutators like the Input-2-State one based
+AFL++ added a lot of optional mutators like the Input-2-State one based
 on Redqueen, the Radamsa mutator, the Custom mutator (the user can define
 its own mutator) and the work is to generalize MOpt for all the current
 and future mutators.
diff --git a/docs/notes_for_asan.md b/docs/notes_for_asan.md
index feac49f9..6a4806c0 100644
--- a/docs/notes_for_asan.md
+++ b/docs/notes_for_asan.md
@@ -28,6 +28,10 @@ Note that ASAN is incompatible with -static, so be mindful of that.
 
 (You can also use AFL_USE_MSAN=1 to enable MSAN instead.)
 
+NOTE: if you run several slaves only one should run the target compiled with
+ASAN (and UBSAN, CFISAN), the others should run the target with no sanitizers
+compiled in.
+
 There is also the option of generating a corpus using a non-ASAN binary, and
 then feeding it to an ASAN-instrumented one to check for bugs. This is faster,
 and can give you somewhat comparable results. You can also try using
diff --git a/docs/power_schedules.md b/docs/power_schedules.md
index cdada0f6..c69c64d2 100644
--- a/docs/power_schedules.md
+++ b/docs/power_schedules.md
@@ -20,6 +20,7 @@ We find that AFL's exploitation-based constant schedule assigns **too much energ
 | `-p lin` | ![LIN](http://latex.codecogs.com/gif.latex?p%28i%29%20%3D%20%5Cmin%5Cleft%28%5Cfrac%7B%5Calpha%28i%29%7D%7B%5Cbeta%7D%5Ccdot%5Cfrac%7Bs%28i%29%7D%7Bf%28i%29%7D%2CM%5Cright%29) |
 | `-p exploit` (AFL) | ![LIN](http://latex.codecogs.com/gif.latex?p%28i%29%20%3D%20%5Calpha%28i%29) |
 | `-p mmopt` | Experimental: `explore` with no weighting to runtime and increased weighting on the last 5 queue entries |
+| `-p rare` | Experimental: `rare` puts focus on queue entries that hit rare edges |
 where *α(i)* is the performance score that AFL uses to compute for the seed input *i*, *β(i)>1* is a constant, *s(i)* is the number of times that seed *i* has been chosen from the queue, *f(i)* is the number of generated inputs that exercise the same path as seed *i*, and *μ* is the average number of generated inputs exercising a path.
   
 More details can be found in the paper that was accepted at the [23rd ACM Conference on Computer and Communications Security (CCS'16)](https://www.sigsac.org/ccs/CCS2016/accepted-papers/).
diff --git a/docs/status_screen.md b/docs/status_screen.md
index 0bc636c4..a66558b9 100644
--- a/docs/status_screen.md
+++ b/docs/status_screen.md
@@ -372,26 +372,40 @@ For unattended operation, some of the key status screen information can be also
 found in a machine-readable format in the fuzzer_stats file in the output
 directory. This includes:
 
-  - `start_time`     - unix time indicating the start time of afl-fuzz
-  - `last_update`    - unix time corresponding to the last update of this file
-  - `fuzzer_pid`     - PID of the fuzzer process
-  - `cycles_done`    - queue cycles completed so far
-  - `execs_done`     - number of execve() calls attempted
-  - `execs_per_sec`  - overall number of execs per second
-  - `paths_total`    - total number of entries in the queue
-  - `paths_found`    - number of entries discovered through local fuzzing
-  - `paths_imported` - number of entries imported from other instances
-  - `max_depth`      - number of levels in the generated data set
-  - `cur_path`       - currently processed entry number
-  - `pending_favs`   - number of favored entries still waiting to be fuzzed
-  - `pending_total`  - number of all entries waiting to be fuzzed
-  - `stability      - percentage of bitmap bytes that behave consistently
-  - `variable_paths` - number of test cases showing variable behavior
-  - `unique_crashes` - number of unique crashes recorded
-  - `unique_hangs`   - number of unique hangs encountered
-  - `command_line`   - full command line used for the fuzzing session
-  - `slowest_exec_ms`- real time of the slowest execution in seconds
-  - `peak_rss_mb`    - max rss usage reached during fuzzing in MB
+  - `start_time`        - unix time indicating the start time of afl-fuzz
+  - `last_update`       - unix time corresponding to the last update of this file
+  - `run_time`          - run time in seconds to the last update of this file
+  - `fuzzer_pid`        - PID of the fuzzer process
+  - `cycles_done`       - queue cycles completed so far
+  - `cycles_wo_finds`   - number of cycles without any new paths found
+  - `execs_done`        - number of execve() calls attempted
+  - `execs_per_sec`     - overall number of execs per second
+  - `paths_total`       - total number of entries in the queue
+  - `paths_favored`     - number of queue entries that are favored
+  - `paths_found`       - number of entries discovered through local fuzzing
+  - `paths_imported`    - number of entries imported from other instances
+  - `max_depth`         - number of levels in the generated data set
+  - `cur_path`          - currently processed entry number
+  - `pending_favs`      - number of favored entries still waiting to be fuzzed
+  - `pending_total`     - number of all entries waiting to be fuzzed
+  - `variable_paths`    - number of test cases showing variable behavior
+  - `stability`         - percentage of bitmap bytes that behave consistently
+  - `bitmap_cvg`        - percentage of edge coverage found in the map so far
+  - `unique_crashes`    - number of unique crashes recorded
+  - `unique_hangs`      - number of unique hangs encountered
+  - `last_path`         - seconds since the last path was found
+  - `last_crash`        - seconds since the last crash was found
+  - `last_hang`         - seconds since the last hang was found
+  - `execs_since_crash` - execs since the last crash was found
+  - `exec_timeout`      - the -t command line value
+  - `slowest_exec_ms`   - real time of the slowest execution in ms
+  - `peak_rss_mb`       - max rss usage reached during fuzzing in MB
+  - `edges_found`       - how many edges have been found
+  - `var_byte_count`    - how many edges are non-deterministic
+  - `afl_banner`        - banner text (e.g. the target name)
+  - `afl_version`       - the version of afl used
+  - `target_mode`       - default, persistent, qemu, unicorn, dumb
+  - `command_line`      - full command line used for the fuzzing session
 
 Most of these map directly to the UI elements discussed earlier on.
 
diff --git a/examples/README.md b/examples/README.md
index 37fae1a0..3c5aa9f2 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -2,9 +2,7 @@
 
 Here's a quick overview of the stuff you can find in this directory:
 
-  - custom_mutstors      - An example custom mutator
-  
-  - python_mutators      - Python mutators examples
+  - custom_mutators      - example custom mutators in python an c
 
   - argv_fuzzing         - a simple wrapper to allow cmdline to be fuzzed
                            (e.g., to test setuid programs).
@@ -15,8 +13,8 @@ Here's a quick overview of the stuff you can find in this directory:
   - bash_shellshock      - a simple hack used to find a bunch of
                            post-Shellshock bugs in bash.
 
-  - canvas_harness       - a test harness used to find browser bugs with a 
-                           corpus generated using simple image parsing 
+  - canvas_harness       - a test harness used to find browser bugs with a
+                           corpus generated using simple image parsing
                            binaries & afl-fuzz.
 
   - clang_asm_normalize  - a script that makes it easy to instrument
@@ -34,7 +32,7 @@ Here's a quick overview of the stuff you can find in this directory:
                            mode to speed up certain fuzzing jobs.
 
   - post_library         - an example of how to build postprocessors for AFL.
-  
+
   - socket_fuzzing       - a LD_PRELOAD library 'redirects' a socket to stdin
                            for fuzzing access with afl++
 
diff --git a/examples/argv_fuzzing/Makefile b/examples/argv_fuzzing/Makefile
index 34192e39..104d0f55 100644
--- a/examples/argv_fuzzing/Makefile
+++ b/examples/argv_fuzzing/Makefile
@@ -20,19 +20,24 @@ HELPER_PATH = $(PREFIX)/lib/afl
 CFLAGS = -fPIC -Wall -Wextra
 LDFLAGS = -shared
 
-ifneq "$(filter Linux GNU%,$(shell uname))" ""
-  LDFLAGS  += -ldl
-endif
+UNAME_SAYS_LINUX=$(shell uname | grep -E '^Linux|^GNU' >/dev/null; echo $$?)
+UNAME_SAYS_LINUX:sh=uname | grep -E '^Linux|^GNU' >/dev/null; echo $$?
+
+_LDFLAGS_ADD=$(UNAME_SAYS_LINUX:1=)
+LDFLAGS_ADD=$(_LDFLAGS_ADD:0=-ldl)
+LDFLAGS  += $(LDFLAGS_ADD)
 
 # on gcc for arm there is no -m32, but -mbe32
 M32FLAG = -m32
 M64FLAG = -m64
-ifeq "$(findstring clang, $(shell $(CC) --version 2>/dev/null))" ""
- ifneq (,$(findstring arm, "$(shell $(CC) -v 2>&1 >/dev/null)"))
-  M32FLAG = -mbe32
- endif
-endif
 
+CC_IS_GCC=$(shell $(CC) --version 2>/dev/null | grep gcc; echo $$?)
+CC_IS_ARMCOMPILER=$(shell $(CC) -v 2>&1 >/dev/null | grep arm; echo $$?)
+
+_M32FLAG=$(CC_IS_GCC)$(CC_IS_ARMCOMPILER)
+__M32FLAG=$(_M32FLAG:00=-mbe32)
+___M32FLAG=$(__M32FLAG:$(CC_IS_GCC)$(CC_IS_ARMCOMPILER)=-m32)
+M32FLAG=$(___M32FLAG)
 
 all: argvfuzz32.so argvfuzz64.so
 
diff --git a/examples/crash_triage/triage_crashes.sh b/examples/crash_triage/triage_crashes.sh
index 6d026d61..bf763cba 100755
--- a/examples/crash_triage/triage_crashes.sh
+++ b/examples/crash_triage/triage_crashes.sh
@@ -91,10 +91,10 @@ for crash in $DIR/crashes/id:*; do
   for a in $@; do
 
     if [ "$a" = "@@" ] ; then
-      args="$use_args $crash"
+      use_args="$use_args $crash"
       unset use_stdio
     else
-      args="$use_args $a"
+      use_args="$use_args $a"
     fi
 
   done
diff --git a/examples/custom_mutators/Makefile b/examples/custom_mutators/Makefile
new file mode 100644
index 00000000..9849f3f4
--- /dev/null
+++ b/examples/custom_mutators/Makefile
@@ -0,0 +1,7 @@
+all: libexamplemutator.so
+
+libexamplemutator.so:
+	$(CC) $(CFLAGS) -D_FORTIFY_SOURCE=2 -O3 -fPIC -shared -g -I ../../include example.c -o libexamplemutator.so
+
+clean:
+	rm -rf libexamplemutator.so
diff --git a/examples/custom_mutators/README.md b/examples/custom_mutators/README.md
index ce49436e..6fc7be6c 100644
--- a/examples/custom_mutators/README.md
+++ b/examples/custom_mutators/README.md
@@ -1,13 +1,13 @@
 # Examples for the custom mutator
 
 These are example and helper files for the custom mutator feature.
-See [docs/python_mutators.md](../docs/custom_mutators.md) for more information
+See [docs/custom_mutators.md](../docs/custom_mutators.md) for more information
 
 Note that if you compile with python3.7 you must use python3 scripts, and if
-you use pyton2.7 to compile python2 scripts!
+you use python2.7 to compile python2 scripts!
 
 example.c - this is a simple example written in C and should be compiled to a
-          shared library
+          shared library. Use make to compile it and produce libexamplemutator.so
 
 example.py - this is the template you can use, the functions are there but they
            are empty
@@ -20,3 +20,9 @@ common.py - this can be used for common functions and helpers.
 wrapper_afl_min.py - mutation of XML documents, loads XmlMutatorMin.py
 
 XmlMutatorMin.py - module for XML mutation
+
+custom_mutator_helpers.h is an header that defines some helper routines
+like surgical_havoc_mutate() that allow to perform a randomly chosen
+mutation from a subset of the havoc mutations.
+If you do so, you have to specify -I /path/to/AFLplusplus/include when
+compiling.
diff --git a/examples/custom_mutators/custom_mutator_helpers.h b/examples/custom_mutators/custom_mutator_helpers.h
new file mode 100644
index 00000000..0848321f
--- /dev/null
+++ b/examples/custom_mutators/custom_mutator_helpers.h
@@ -0,0 +1,342 @@
+#ifndef CUSTOM_MUTATOR_HELPERS
+#define CUSTOM_MUTATOR_HELPERS
+
+#include "config.h"
+#include "types.h"
+#include <stdlib.h>
+
+#define INITIAL_GROWTH_SIZE (64)
+
+#define RAND_BELOW(limit) (rand() % (limit))
+
+/* Use in a struct: creates a name_buf and a name_size variable. */
+#define BUF_VAR(type, name) \
+  type * name##_buf;        \
+  size_t name##_size;
+/* this filles in `&structptr->something_buf, &structptr->something_size`. */
+#define BUF_PARAMS(struct, name) \
+  (void **)&struct->name##_buf, &struct->name##_size
+
+typedef struct {
+
+} afl_t;
+
+static void surgical_havoc_mutate(u8 *out_buf, s32 begin, s32 end) {
+
+  static s8  interesting_8[] = {INTERESTING_8};
+  static s16 interesting_16[] = {INTERESTING_8, INTERESTING_16};
+  static s32 interesting_32[] = {INTERESTING_8, INTERESTING_16, INTERESTING_32};
+
+  switch (RAND_BELOW(12)) {
+
+    case 0: {
+
+      /* Flip a single bit somewhere. Spooky! */
+
+      s32 bit_idx = ((RAND_BELOW(end - begin) + begin) << 3) + RAND_BELOW(8);
+
+      out_buf[bit_idx >> 3] ^= 128 >> (bit_idx & 7);
+
+      break;
+
+    }
+
+    case 1: {
+
+      /* Set byte to interesting value. */
+
+      u8 val = interesting_8[RAND_BELOW(sizeof(interesting_8))];
+      out_buf[(RAND_BELOW(end - begin) + begin)] = val;
+
+      break;
+
+    }
+
+    case 2: {
+
+      /* Set word to interesting value, randomly choosing endian. */
+
+      if (end - begin < 2) break;
+
+      s32 byte_idx = (RAND_BELOW(end - begin) + begin);
+
+      if (byte_idx >= end - 1) break;
+
+      switch (RAND_BELOW(2)) {
+
+        case 0:
+          *(u16 *)(out_buf + byte_idx) =
+              interesting_16[RAND_BELOW(sizeof(interesting_16) >> 1)];
+          break;
+        case 1:
+          *(u16 *)(out_buf + byte_idx) =
+              SWAP16(interesting_16[RAND_BELOW(sizeof(interesting_16) >> 1)]);
+          break;
+
+      }
+
+      break;
+
+    }
+
+    case 3: {
+
+      /* Set dword to interesting value, randomly choosing endian. */
+
+      if (end - begin < 4) break;
+
+      s32 byte_idx = (RAND_BELOW(end - begin) + begin);
+
+      if (byte_idx >= end - 3) break;
+
+      switch (RAND_BELOW(2)) {
+
+        case 0:
+          *(u32 *)(out_buf + byte_idx) =
+              interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)];
+          break;
+        case 1:
+          *(u32 *)(out_buf + byte_idx) =
+              SWAP32(interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]);
+          break;
+
+      }
+
+      break;
+
+    }
+
+    case 4: {
+
+      /* Set qword to interesting value, randomly choosing endian. */
+
+      if (end - begin < 8) break;
+
+      s32 byte_idx = (RAND_BELOW(end - begin) + begin);
+
+      if (byte_idx >= end - 7) break;
+
+      switch (RAND_BELOW(2)) {
+
+        case 0:
+          *(u64 *)(out_buf + byte_idx) =
+              (s64)interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)];
+          break;
+        case 1:
+          *(u64 *)(out_buf + byte_idx) = SWAP64(
+              (s64)interesting_32[RAND_BELOW(sizeof(interesting_32) >> 2)]);
+          break;
+
+      }
+
+      break;
+
+    }
+
+    case 5: {
+
+      /* Randomly subtract from byte. */
+
+      out_buf[(RAND_BELOW(end - begin) + begin)] -= 1 + RAND_BELOW(ARITH_MAX);
+
+      break;
+
+    }
+
+    case 6: {
+
+      /* Randomly add to byte. */
+
+      out_buf[(RAND_BELOW(end - begin) + begin)] += 1 + RAND_BELOW(ARITH_MAX);
+
+      break;
+
+    }
+
+    case 7: {
+
+      /* Randomly subtract from word, random endian. */
+
+      if (end - begin < 2) break;
+
+      s32 byte_idx = (RAND_BELOW(end - begin) + begin);
+
+      if (byte_idx >= end - 1) break;
+
+      if (RAND_BELOW(2)) {
+
+        *(u16 *)(out_buf + byte_idx) -= 1 + RAND_BELOW(ARITH_MAX);
+
+      } else {
+
+        u16 num = 1 + RAND_BELOW(ARITH_MAX);
+
+        *(u16 *)(out_buf + byte_idx) =
+            SWAP16(SWAP16(*(u16 *)(out_buf + byte_idx)) - num);
+
+      }
+
+      break;
+
+    }
+
+    case 8: {
+
+      /* Randomly add to word, random endian. */
+
+      if (end - begin < 2) break;
+
+      s32 byte_idx = (RAND_BELOW(end - begin) + begin);
+
+      if (byte_idx >= end - 1) break;
+
+      if (RAND_BELOW(2)) {
+
+        *(u16 *)(out_buf + byte_idx) += 1 + RAND_BELOW(ARITH_MAX);
+
+      } else {
+
+        u16 num = 1 + RAND_BELOW(ARITH_MAX);
+
+        *(u16 *)(out_buf + byte_idx) =
+            SWAP16(SWAP16(*(u16 *)(out_buf + byte_idx)) + num);
+
+      }
+
+      break;
+
+    }
+
+    case 9: {
+
+      /* Randomly subtract from dword, random endian. */
+
+      if (end - begin < 4) break;
+
+      s32 byte_idx = (RAND_BELOW(end - begin) + begin);
+
+      if (byte_idx >= end - 3) break;
+
+      if (RAND_BELOW(2)) {
+
+        *(u32 *)(out_buf + byte_idx) -= 1 + RAND_BELOW(ARITH_MAX);
+
+      } else {
+
+        u32 num = 1 + RAND_BELOW(ARITH_MAX);
+
+        *(u32 *)(out_buf + byte_idx) =
+            SWAP32(SWAP32(*(u32 *)(out_buf + byte_idx)) - num);
+
+      }
+
+      break;
+
+    }
+
+    case 10: {
+
+      /* Randomly add to dword, random endian. */
+
+      if (end - begin < 4) break;
+
+      s32 byte_idx = (RAND_BELOW(end - begin) + begin);
+
+      if (byte_idx >= end - 3) break;
+
+      if (RAND_BELOW(2)) {
+
+        *(u32 *)(out_buf + byte_idx) += 1 + RAND_BELOW(ARITH_MAX);
+
+      } else {
+
+        u32 num = 1 + RAND_BELOW(ARITH_MAX);
+
+        *(u32 *)(out_buf + byte_idx) =
+            SWAP32(SWAP32(*(u32 *)(out_buf + byte_idx)) + num);
+
+      }
+
+      break;
+
+    }
+
+    case 11: {
+
+      /* Just set a random byte to a random value. Because,
+         why not. We use XOR with 1-255 to eliminate the
+         possibility of a no-op. */
+
+      out_buf[(RAND_BELOW(end - begin) + begin)] ^= 1 + RAND_BELOW(255);
+
+      break;
+
+    }
+
+  }
+
+}
+
+/* This function calculates the next power of 2 greater or equal its argument.
+ @return The rounded up power of 2 (if no overflow) or 0 on overflow.
+*/
+static inline size_t next_pow2(size_t in) {
+
+  if (in == 0 || in > (size_t)-1)
+    return 0;                  /* avoid undefined behaviour under-/overflow */
+  size_t out = in - 1;
+  out |= out >> 1;
+  out |= out >> 2;
+  out |= out >> 4;
+  out |= out >> 8;
+  out |= out >> 16;
+  return out + 1;
+
+}
+
+/* This function makes sure *size is > size_needed after call.
+ It will realloc *buf otherwise.
+ *size will grow exponentially as per:
+ https://blog.mozilla.org/nnethercote/2014/11/04/please-grow-your-buffers-exponentially/
+ Will return NULL and free *buf if size_needed is <1 or realloc failed.
+ @return For convenience, this function returns *buf.
+ */
+static inline void *maybe_grow(void **buf, size_t *size, size_t size_needed) {
+
+  /* No need to realloc */
+  if (likely(size_needed && *size >= size_needed)) return *buf;
+
+  /* No initial size was set */
+  if (size_needed < INITIAL_GROWTH_SIZE) size_needed = INITIAL_GROWTH_SIZE;
+
+  /* grow exponentially */
+  size_t next_size = next_pow2(size_needed);
+
+  /* handle overflow */
+  if (!next_size) { next_size = size_needed; }
+
+  /* alloc */
+  *buf = realloc(*buf, next_size);
+  *size = *buf ? next_size : 0;
+
+  return *buf;
+
+}
+
+/* Swaps buf1 ptr and buf2 ptr, as well as their sizes */
+static inline void swap_bufs(void **buf1, size_t *size1, void **buf2,
+                             size_t *size2) {
+
+  void * scratch_buf = *buf1;
+  size_t scratch_size = *size1;
+  *buf1 = *buf2;
+  *size1 = *size2;
+  *buf2 = scratch_buf;
+  *size2 = scratch_size;
+
+}
+
+#undef INITIAL_GROWTH_SIZE
+
+#endif
+
diff --git a/examples/custom_mutators/example.c b/examples/custom_mutators/example.c
index 127f971e..c8200b26 100644
--- a/examples/custom_mutators/example.c
+++ b/examples/custom_mutators/example.c
@@ -3,11 +3,18 @@
   Written by Khaled Yakdan <yakdan@code-intelligence.de>
              Andrea Fioraldi <andreafioraldi@gmail.com>
              Shengtuo Hu <h1994st@gmail.com>
+             Dominik Maier <mail@dmnk.co>
 */
 
+// You need to use -I /path/to/AFLplusplus/include
+#include "custom_mutator_helpers.h"
+
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
+#include <stdio.h>
+
+#define DATA_SIZE (100)
 
 static const char *commands[] = {
 
@@ -17,11 +24,50 @@ static const char *commands[] = {
 
 };
 
-static size_t data_size = 100;
+typedef struct my_mutator {
+
+  afl_t *afl;
+
+  // any additional data here!
+  size_t trim_size_current;
+  int    trimmming_steps;
+  int    cur_step;
+
+  // Reused buffers:
+  BUF_VAR(u8, fuzz);
+  BUF_VAR(u8, data);
+  BUF_VAR(u8, havoc);
+  BUF_VAR(u8, trim);
+  BUF_VAR(u8, pre_save);
+
+} my_mutator_t;
+
+/**
+ * Initialize this custom mutator
+ *
+ * @param[in] afl a pointer to the internal state object. Can be ignored for
+ * now.
+ * @param[in] seed A seed for this mutator - the same seed should always mutate
+ * in the same way.
+ * @return Pointer to the data object this custom mutator instance should use.
+ *         There may be multiple instances of this mutator in one afl-fuzz run!
+ *         Return NULL on error.
+ */
+my_mutator_t *afl_custom_init(afl_t *afl, unsigned int seed) {
 
-void afl_custom_init(unsigned int seed) {
+  srand(seed);  // needed also by surgical_havoc_mutate()
 
-  srand(seed);
+  my_mutator_t *data = calloc(1, sizeof(my_mutator_t));
+  if (!data) {
+
+    perror("afl_custom_init alloc");
+    return NULL;
+
+  }
+
+  data->afl = afl;
+
+  return data;
 
 }
 
@@ -30,36 +76,49 @@ void afl_custom_init(unsigned int seed) {
  *
  * (Optional for now. Required in the future)
  *
+ * @param[in] data pointer returned in afl_custom_init for this fuzz case
  * @param[in] buf Pointer to input data to be mutated
  * @param[in] buf_size Size of input data
+ * @param[out] out_buf the buffer we will work on. we can reuse *buf. NULL on
+ * error.
  * @param[in] add_buf Buffer containing the additional test case
  * @param[in] add_buf_size Size of the additional test case
  * @param[in] max_size Maximum size of the mutated output. The mutation must not
  *     produce data larger than max_size.
  * @return Size of the mutated output.
  */
-size_t afl_custom_fuzz(uint8_t **buf, size_t buf_size, uint8_t *add_buf,
+size_t afl_custom_fuzz(my_mutator_t *data, uint8_t *buf, size_t buf_size,
+                       u8 **out_buf, uint8_t *add_buf,
                        size_t add_buf_size,  // add_buf can be NULL
                        size_t max_size) {
 
   // Make sure that the packet size does not exceed the maximum size expected by
   // the fuzzer
-  size_t mutated_size = data_size <= max_size ? data_size : max_size;
+  size_t mutated_size = DATA_SIZE <= max_size ? DATA_SIZE : max_size;
 
-  if (mutated_size > buf_size) *buf = realloc(*buf, mutated_size);
+  // maybe_grow is optimized to be quick for reused buffers.
+  u8 *mutated_out = maybe_grow(BUF_PARAMS(data, fuzz), mutated_size);
+  if (!mutated_out) {
 
-  uint8_t *mutated_out = *buf;
+    *out_buf = NULL;
+    perror("custom mutator allocation (maybe_grow)");
+    return 0;            /* afl-fuzz will very likely error out after this. */
+
+  }
 
   // Randomly select a command string to add as a header to the packet
   memcpy(mutated_out, commands[rand() % 3], 3);
 
   // Mutate the payload of the packet
-  for (int i = 3; i < mutated_size; i++) {
+  int i;
+  for (i = 0; i < 8; ++i) {
 
-    mutated_out[i] = (mutated_out[i] + rand() % 10) & 0xff;
+    // Randomly perform one of the (no len modification) havoc mutations
+    surgical_havoc_mutate(mutated_out, 3, mutated_size);
 
   }
 
+  *out_buf = mutated_out;
   return mutated_size;
 
 }
@@ -71,31 +130,39 @@ size_t afl_custom_fuzz(uint8_t **buf, size_t buf_size, uint8_t *add_buf,
  * (Optional) If this functionality is not needed, simply don't define this
  * function.
  *
+ * @param[in] data pointer returned in afl_custom_init for this fuzz case
  * @param[in] buf Buffer containing the test case to be executed
  * @param[in] buf_size Size of the test case
  * @param[out] out_buf Pointer to the buffer containing the test case after
- *     processing. External library should allocate memory for out_buf. AFL++
- *     will release the memory after saving the test case.
- * @return Size of the output buffer after processing
+ *     processing. External library should allocate memory for out_buf.
+ *     The buf pointer may be reused (up to the given buf_size);
+ * @return Size of the output buffer after processing or the needed amount.
+ *     A return of 0 indicates an error.
  */
-size_t afl_custom_pre_save(uint8_t *buf, size_t buf_size, uint8_t **out_buf) {
+size_t afl_custom_pre_save(my_mutator_t *data, uint8_t *buf, size_t buf_size,
+                           uint8_t **out_buf) {
 
-  size_t out_buf_size;
+  uint8_t *pre_save_buf = maybe_grow(BUF_PARAMS(data, pre_save), buf_size + 5);
+  if (!pre_save_buf) {
 
-  out_buf_size = buf_size;
+    perror("custom mutator realloc failed.");
+    *out_buf = NULL;
+    return 0;
 
-  // External mutator should allocate memory for `out_buf`
-  *out_buf = malloc(out_buf_size);
-  memcpy(*out_buf, buf, out_buf_size);
+  }
 
-  return out_buf_size;
+  memcpy(pre_save_buf + 5, buf, buf_size);
+  pre_save_buf[0] = 'A';
+  pre_save_buf[1] = 'F';
+  pre_save_buf[2] = 'L';
+  pre_save_buf[3] = '+';
+  pre_save_buf[4] = '+';
 
-}
+  *out_buf = pre_save_buf;
 
-static uint8_t *trim_buf;
-static size_t   trim_buf_size;
-static int      trimmming_steps;
-static int      cur_step;
+  return buf_size + 5;
+
+}
 
 /**
  * This method is called at the start of each trimming operation and receives
@@ -113,27 +180,39 @@ static int      cur_step;
  *
  * (Optional)
  *
+ * @param data pointer returned in afl_custom_init for this fuzz case
  * @param buf Buffer containing the test case
  * @param buf_size Size of the test case
- * @return The amount of possible iteration steps to trim the input
+ * @return The amount of possible iteration steps to trim the input.
+ *        negative on error.
  */
-int afl_custom_init_trim(uint8_t *buf, size_t buf_size) {
+int32_t afl_custom_init_trim(my_mutator_t *data, uint8_t *buf,
+                             size_t buf_size) {
 
   // We simply trim once
-  trimmming_steps = 1;
+  data->trimmming_steps = 1;
+
+  data->cur_step = 0;
+
+  if (!maybe_grow(BUF_PARAMS(data, trim), buf_size)) {
+
+    perror("init_trim grow");
+    return -1;
 
-  cur_step = 0;
-  trim_buf = buf;
-  trim_buf_size = buf_size;
+  }
+
+  memcpy(data->trim_buf, buf, buf_size);
+
+  data->trim_size_current = buf_size;
 
-  return trimmming_steps;
+  return data->trimmming_steps;
 
 }
 
 /**
  * This method is called for each trimming operation. It doesn't have any
  * arguments because we already have the initial buffer from init_trim and we
- * can memorize the current state in global variables. This can also save
+ * can memorize the current state in *data. This can also save
  * reparsing steps for each iteration. It should return the trimmed input
  * buffer, where the returned data must not exceed the initial input data in
  * length. Returning anything that is larger than the original data (passed
@@ -141,19 +220,20 @@ int afl_custom_init_trim(uint8_t *buf, size_t buf_size) {
  *
  * (Optional)
  *
+ * @param[in] data pointer returned in afl_custom_init for this fuzz case
  * @param[out] out_buf Pointer to the buffer containing the trimmed test case.
- *     External library should allocate memory for out_buf. AFL++ will release
- *     the memory after saving the test case.
- * @param[out] out_buf_size Pointer to the size of the trimmed test case
+ *     External library should allocate memory for out_buf.
+ *     AFL++ will not release the memory after saving the test case.
+ *     Keep a ref in *data.
+ *     *out_buf = NULL is treated as error.
+ * @return Pointer to the size of the trimmed test case
  */
-void afl_custom_trim(uint8_t **out_buf, size_t *out_buf_size) {
+size_t afl_custom_trim(my_mutator_t *data, uint8_t **out_buf) {
 
-  *out_buf_size = trim_buf_size - 1;
+  *out_buf = data->trim_buf;
 
-  // External mutator should allocate memory for `out_buf`
-  *out_buf = malloc(*out_buf_size);
   // Remove the last byte of the trimming input
-  memcpy(*out_buf, trim_buf, *out_buf_size);
+  return data->trim_size_current - 1;
 
 }
 
@@ -164,20 +244,21 @@ void afl_custom_trim(uint8_t **out_buf, size_t *out_buf_size) {
  *
  * (Optional)
  *
+ * @param[in] data pointer returned in afl_custom_init for this fuzz case
  * @param success Indicates if the last trim operation was successful.
  * @return The next trim iteration index (from 0 to the maximum amount of
- *     steps returned in init_trim)
+ *     steps returned in init_trim). negative ret on failure.
  */
-int afl_custom_post_trim(int success) {
+int32_t afl_custom_post_trim(my_mutator_t *data, int success) {
 
   if (success) {
 
-    ++cur_step;
-    return cur_step;
+    ++data->cur_step;
+    return data->cur_step;
 
   }
 
-  return trimmming_steps;
+  return data->trimmming_steps;
 
 }
 
@@ -187,26 +268,41 @@ int afl_custom_post_trim(int success) {
  *
  * (Optional)
  *
- * @param[inout] buf Pointer to the input data to be mutated and the mutated
+ * @param[in] data pointer returned in afl_custom_init for this fuzz case
+ * @param[in] buf Pointer to the input data to be mutated and the mutated
  *     output
  * @param[in] buf_size Size of input data
+ * @param[out] out_buf The output buffer. buf can be reused, if the content
+ * fits. *out_buf = NULL is treated as error.
  * @param[in] max_size Maximum size of the mutated output. The mutation must
  *     not produce data larger than max_size.
  * @return Size of the mutated output.
  */
-size_t afl_custom_havoc_mutation(uint8_t **buf, size_t buf_size,
-                                 size_t max_size) {
+size_t afl_custom_havoc_mutation(my_mutator_t *data, u8 *buf, size_t buf_size,
+                                 u8 **out_buf, size_t max_size) {
 
   if (buf_size == 0) {
 
-    *buf = realloc(*buf, 1);
-    **buf = rand() % 256;
+    *out_buf = maybe_grow(BUF_PARAMS(data, havoc), 1);
+    if (!*out_buf) {
+
+      perror("custom havoc: maybe_grow");
+      return 0;
+
+    }
+
+    **out_buf = rand() % 256;
     buf_size = 1;
 
+  } else {
+
+    // We reuse buf here. It's legal and faster.
+    *out_buf = buf;
+
   }
 
   size_t victim = rand() % buf_size;
-  (*buf)[victim] += rand() % 10;
+  (*out_buf)[victim] += rand() % 10;
 
   return buf_size;
 
@@ -218,9 +314,10 @@ size_t afl_custom_havoc_mutation(uint8_t **buf, size_t buf_size,
  *
  * (Optional)
  *
+ * @param[in] data pointer returned in afl_custom_init for this fuzz case
  * @return The probability (0-100).
  */
-uint8_t afl_custom_havoc_mutation_probability(void) {
+uint8_t afl_custom_havoc_mutation_probability(my_mutator_t *data) {
 
   return 5;  // 5 %
 
@@ -231,11 +328,12 @@ uint8_t afl_custom_havoc_mutation_probability(void) {
  *
  * (Optional)
  *
+ * @param[in] data pointer returned in afl_custom_init for this fuzz case
  * @param filename File name of the test case in the queue entry
  * @return Return True(1) if the fuzzer will fuzz the queue entry, and
  *     False(0) otherwise.
  */
-uint8_t afl_custom_queue_get(const uint8_t *filename) {
+uint8_t afl_custom_queue_get(my_mutator_t *data, const uint8_t *filename) {
 
   return 1;
 
@@ -247,13 +345,31 @@ uint8_t afl_custom_queue_get(const uint8_t *filename) {
  *
  * (Optional)
  *
+ * @param data pointer returned in afl_custom_init for this fuzz case
  * @param filename_new_queue File name of the new queue entry
  * @param filename_orig_queue File name of the original queue entry
  */
-void afl_custom_queue_new_entry(const uint8_t *filename_new_queue,
+void afl_custom_queue_new_entry(my_mutator_t * data,
+                                const uint8_t *filename_new_queue,
                                 const uint8_t *filename_orig_queue) {
 
   /* Additional analysis on the original or new test case */
 
 }
 
+/**
+ * Deinitialize everything
+ *
+ * @param data The data ptr from afl_custom_init
+ */
+void afl_custom_deinit(my_mutator_t *data) {
+
+  free(data->pre_save_buf);
+  free(data->havoc_buf);
+  free(data->data_buf);
+  free(data->fuzz_buf);
+  free(data->trim_buf);
+  free(data);
+
+}
+
diff --git a/examples/custom_mutators/example.py b/examples/custom_mutators/example.py
index 6bacfa05..9e95eed6 100644
--- a/examples/custom_mutators/example.py
+++ b/examples/custom_mutators/example.py
@@ -17,6 +17,13 @@ file, You can obtain one at http://mozilla.org/MPL/2.0/.
 import random
 
 
+COMMANDS = [
+    b"GET",
+    b"PUT",
+    b"DEL",
+]
+
+
 def init(seed):
     '''
     Called once when AFLFuzz starts up. Used to seed our RNG.
@@ -27,6 +34,10 @@ def init(seed):
     random.seed(seed)
 
 
+def deinit():
+    pass
+
+
 def fuzz(buf, add_buf, max_size):
     '''
     Called per fuzzing iteration.
@@ -44,8 +55,9 @@ def fuzz(buf, add_buf, max_size):
     @rtype: bytearray
     @return: A new bytearray containing the mutated data
     '''
-    ret = bytearray(buf)
-    # Do something interesting with ret
+    ret = bytearray(100)
+
+    ret[:3] = random.choice(COMMANDS)
 
     return ret
 
@@ -164,11 +176,10 @@ def fuzz(buf, add_buf, max_size):
 #     '''
 #     Called after adding a new test case to the queue
 #
-#     @type filename_new_queue: str 
+#     @type filename_new_queue: str
 #     @param filename_new_queue: File name of the new queue entry
 #
 #     @type filename_orig_queue: str
 #     @param filename_orig_queue: File name of the original queue entry
 #     '''
 #     pass
-
diff --git a/examples/post_library/post_library.so.c b/examples/post_library/post_library.so.c
index 5d2685cd..0aa780cb 100644
--- a/examples/post_library/post_library.so.c
+++ b/examples/post_library/post_library.so.c
@@ -3,6 +3,7 @@
    --------------------------------------------------
 
    Originally written by Michal Zalewski
+   Edited by Dominik Maier, 2020
 
    Copyright 2015 Google Inc. All rights reserved.
 
@@ -41,22 +42,23 @@
    AFL will call the afl_postprocess() function for every mutated output buffer.
    From there, you have three choices:
 
-   1) If you don't want to modify the test case, simply return the original
-      buffer pointer ('in_buf').
+   1) If you don't want to modify the test case, simply set `*out_buf = in_buf`
+      and return the original `len`.
 
    2) If you want to skip this test case altogether and have AFL generate a
-      new one, return NULL. Use this sparingly - it's faster than running
-      the target program with patently useless inputs, but still wastes CPU
-      time.
+      new one, return 0 or set `*out_buf = NULL`.
+      Use this sparingly - it's faster than running the target program
+      with patently useless inputs, but still wastes CPU time.
 
    3) If you want to modify the test case, allocate an appropriately-sized
       buffer, move the data into that buffer, make the necessary changes, and
-      then return the new pointer. You can update *len if necessary, too.
+      then return the new pointer as out_buf. Return an appropriate len
+   afterwards.
 
       Note that the buffer will *not* be freed for you. To avoid memory leaks,
       you need to free it or reuse it on subsequent calls (as shown below).
 
-      *** DO NOT MODIFY THE ORIGINAL 'in_buf' BUFFER. ***
+      *** Feel free to reuse the original 'in_buf' BUFFER and return it. ***
 
     Aight. The example below shows a simple postprocessor that tries to make
     sure that all input files start with "GIF89a".
@@ -74,47 +76,84 @@
 
 #define HEADER "GIF89a"
 
-/* The actual postprocessor routine called by afl-fuzz: */
+typedef struct post_state {
+
+  unsigned char *buf;
+  size_t         size;
+
+} post_state_t;
+
+void *afl_postprocess_init(void *afl) {
+
+  post_state_t *state = malloc(sizeof(post_state_t));
+  if (!state) {
+
+    perror("malloc");
+    return NULL;
+
+  }
+
+  state->buf = calloc(sizeof(unsigned char), 4096);
+  if (!state->buf) { return NULL; }
+
+  return state;
+
+}
 
-const unsigned char *afl_postprocess(const unsigned char *in_buf,
-                                     unsigned int *       len) {
+/* The actual postprocessor routine called by afl-fuzz: */
 
-  static unsigned char *saved_buf;
-  unsigned char *       new_buf;
+size_t afl_postprocess(post_state_t *data, unsigned char *in_buf,
+                       unsigned int len, unsigned char **out_buf) {
 
   /* Skip execution altogether for buffers shorter than 6 bytes (just to
-     show how it's done). We can trust *len to be sane. */
+     show how it's done). We can trust len to be sane. */
 
-  if (*len < strlen(HEADER)) return NULL;
+  if (len < strlen(HEADER)) return 0;
 
   /* Do nothing for buffers that already start with the expected header. */
 
-  if (!memcmp(in_buf, HEADER, strlen(HEADER))) return in_buf;
+  if (!memcmp(in_buf, HEADER, strlen(HEADER))) {
+
+    *out_buf = in_buf;
+    return len;
+
+  }
 
   /* Allocate memory for new buffer, reusing previous allocation if
      possible. */
 
-  new_buf = realloc(saved_buf, *len);
+  *out_buf = realloc(data->buf, len);
 
   /* If we're out of memory, the most graceful thing to do is to return the
      original buffer and give up on modifying it. Let AFL handle OOM on its
      own later on. */
 
-  if (!new_buf) return in_buf;
-  saved_buf = new_buf;
+  if (!*out_buf) {
+
+    *out_buf = in_buf;
+    return len;
+
+  }
 
   /* Copy the original data to the new location. */
 
-  memcpy(new_buf, in_buf, *len);
+  memcpy(*out_buf, in_buf, len);
 
   /* Insert the new header. */
 
-  memcpy(new_buf, HEADER, strlen(HEADER));
+  memcpy(*out_buf, HEADER, strlen(HEADER));
+
+  /* Return the new len. It hasn't changed, so it's just len. */
+
+  return len;
+
+}
 
-  /* Return modified buffer. No need to update *len in this particular case,
-     as we're not changing it. */
+/* Gets called afterwards */
+void afl_postprocess_deinit(post_state_t *data) {
 
-  return new_buf;
+  free(data->buf);
+  free(data);
 
 }
 
diff --git a/examples/post_library/post_library_png.so.c b/examples/post_library/post_library_png.so.c
index 60ab318f..41ba4f5e 100644
--- a/examples/post_library/post_library_png.so.c
+++ b/examples/post_library/post_library_png.so.c
@@ -5,6 +5,7 @@
    Originally written by Michal Zalewski
 
    Copyright 2015 Google Inc. All rights reserved.
+   Adapted to the new API, 2020 by Dominik Maier
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
@@ -35,11 +36,32 @@
 
 #define UP4K(_i) ((((_i) >> 12) + 1) << 12)
 
-const unsigned char *afl_postprocess(const unsigned char *in_buf,
-                                     unsigned int *       len) {
+typedef struct post_state {
 
-  static unsigned char *saved_buf;
-  static unsigned int   saved_len;
+  unsigned char *buf;
+  size_t         size;
+
+} post_state_t;
+
+void *afl_postprocess_init(void *afl) {
+
+  post_state_t *state = malloc(sizeof(post_state_t));
+  if (!state) {
+
+    perror("malloc");
+    return NULL;
+
+  }
+
+  state->buf = calloc(sizeof(unsigned char), 4096);
+  if (!state->buf) { return NULL; }
+
+  return state;
+
+}
+
+size_t afl_postprocess(post_state_t *data, const unsigned char *in_buf,
+                       unsigned int len, const unsigned char **out_buf) {
 
   unsigned char *new_buf = (unsigned char *)in_buf;
   unsigned int   pos = 8;
@@ -47,12 +69,17 @@ const unsigned char *afl_postprocess(const unsigned char *in_buf,
   /* Don't do anything if there's not enough room for the PNG header
      (8 bytes). */
 
-  if (*len < 8) return in_buf;
+  if (len < 8) {
+
+    *out_buf = in_buf;
+    return len;
+
+  }
 
   /* Minimum size of a zero-length PNG chunk is 12 bytes; if we
      don't have that, we can bail out. */
 
-  while (pos + 12 <= *len) {
+  while (pos + 12 <= len) {
 
     unsigned int chunk_len, real_cksum, file_cksum;
 
@@ -62,7 +89,7 @@ const unsigned char *afl_postprocess(const unsigned char *in_buf,
 
     /* Bail out if chunk size is too big or goes past EOF. */
 
-    if (chunk_len > 1024 * 1024 || pos + 12 + chunk_len > *len) break;
+    if (chunk_len > 1024 * 1024 || pos + 12 + chunk_len > len) break;
 
     /* Chunk checksum is calculated for chunk ID (dword) and the actual
        payload. */
@@ -82,17 +109,23 @@ const unsigned char *afl_postprocess(const unsigned char *in_buf,
 
       if (new_buf == in_buf) {
 
-        if (*len <= saved_len) {
+        if (len <= data->size) {
 
-          new_buf = saved_buf;
+          new_buf = data->buf;
 
         } else {
 
-          new_buf = realloc(saved_buf, UP4K(*len));
-          if (!new_buf) return in_buf;
-          saved_buf = new_buf;
-          saved_len = UP4K(*len);
-          memcpy(new_buf, in_buf, *len);
+          new_buf = realloc(data->buf, UP4K(len));
+          if (!new_buf) {
+
+            *out_buf = in_buf;
+            return len;
+
+          }
+
+          data->buf = new_buf;
+          data->size = UP4K(len);
+          memcpy(new_buf, in_buf, len);
 
         }
 
@@ -108,7 +141,16 @@ const unsigned char *afl_postprocess(const unsigned char *in_buf,
 
   }
 
-  return new_buf;
+  *out_buf = new_buf;
+  return len;
+
+}
+
+/* Gets called afterwards */
+void afl_postprocess_deinit(post_state_t *data) {
+
+  free(data->buf);
+  free(data);
 
 }
 
diff --git a/examples/socket_fuzzing/Makefile b/examples/socket_fuzzing/Makefile
index ad921664..2fdc58ee 100644
--- a/examples/socket_fuzzing/Makefile
+++ b/examples/socket_fuzzing/Makefile
@@ -18,18 +18,29 @@ HELPER_PATH = $(PREFIX)/lib/afl
 CFLAGS = -fPIC -Wall -Wextra
 LDFLAGS = -shared
 
-ifneq "$(filter Linux GNU%,$(shell uname))" ""
-  LDFLAGS  += -ldl
-endif
+UNAME_SAYS_LINUX=$(shell uname | grep -E '^Linux|^GNU' >/dev/null; echo $$?)
+UNAME_SAYS_LINUX:sh=uname | grep -E '^Linux|^GNU' >/dev/null; echo $$?
+
+_LDFLAGS_ADD=$(UNAME_SAYS_LINUX:1=)
+LDFLAGS_ADD=$(_LDFLAGS_ADD:0=-ldl)
+LDFLAGS  += $(LDFLAGS_ADD)
 
 # on gcc for arm there is no -m32, but -mbe32
 M32FLAG = -m32
 M64FLAG = -m64
-ifeq "$(findstring clang, $(shell $(CC) --version 2>/dev/null))" ""
- ifneq (,$(findstring arm, "$(shell $(CC) -v 2>&1 >/dev/null)"))
-  M32FLAG = -mbe32
- endif
-endif
+
+CC_IS_GCC=$(shell $(CC) --version 2>/dev/null | grep gcc; echo $$?)
+CC_IS_ARMCOMPILER=$(shell $(CC) -v 2>&1 >/dev/null | grep arm; echo $$?)
+
+_M32FLAG=$(CC_IS_GCC)$(CC_IS_ARMCOMPILER)
+__M32FLAG=$(_M32FLAG:00=-mbe32)
+___M32FLAG=$(__M32FLAG:$(CC_IS_GCC)$(CC_IS_ARMCOMPILER)=-m32)
+M32FLAG=$(___M32FLAG)
+#ifeq "$(findstring clang, $(shell $(CC) --version 2>/dev/null))" ""
+# ifneq (,$(findstring arm, "$(shell $(CC) -v 2>&1 >/dev/null)"))
+#  M32FLAG = -mbe32
+# endif
+#endif
 
 all: socketfuzz32.so socketfuzz64.so
 
diff --git a/gcc_plugin/GNUmakefile b/gcc_plugin/GNUmakefile
new file mode 100644
index 00000000..9a404966
--- /dev/null
+++ b/gcc_plugin/GNUmakefile
@@ -0,0 +1,160 @@
+#
+# american fuzzy lop++ - GCC plugin instrumentation
+# -----------------------------------------------
+#
+# Written by Austin Seipp <aseipp@pobox.com> and
+#            Laszlo Szekeres <lszekeres@google.com> and
+#            Michal Zalewski and
+#            Heiko Eißfeldt  <heiko@hexco.de>
+#
+# GCC integration design is based on the LLVM design, which comes
+# from Laszlo Szekeres.
+#
+# Copyright 2015 Google Inc. All rights reserved.
+# Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+
+PREFIX      ?= /usr/local
+HELPER_PATH ?= $(PREFIX)/lib/afl
+BIN_PATH    ?= $(PREFIX)/bin
+DOC_PATH    ?= $(PREFIX)/share/doc/afl
+MAN_PATH    ?= $(PREFIX)/man/man8
+
+VERSION     = $(shell grep '^$(HASH)define VERSION ' ../config.h | cut -d '"' -f2)
+
+CFLAGS      ?= -O3 -g -funroll-loops -D_FORTIFY_SOURCE=2
+CFLAGS = -Wall -I../include -Wno-pointer-sign \
+               -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \
+               -DGCC_VERSION=\"$(GCCVER)\" -DGCC_BINDIR=\"$(GCCBINDIR)\" \
+               -Wno-unused-function
+
+CXXFLAGS    ?= -O3 -g -funroll-loops -D_FORTIFY_SOURCE=2
+CXXEFLAGS   := $(CXXFLAGS) -Wall
+
+CC          ?= gcc
+CXX         ?= g++
+
+ifeq "clang" "$(CC)"
+        CC  = gcc
+        CXX = g++
+endif
+
+ifeq "clang++" "$(CXX)"
+        CC  = gcc
+        CXX = g++
+endif
+
+PLUGIN_FLAGS = -fPIC -fno-rtti -I"$(shell $(CC) -print-file-name=plugin)/include"
+HASH=\#
+
+GCCVER    = $(shell $(CC) --version 2>/dev/null | awk 'NR == 1 {print $$NF}')
+GCCBINDIR = $(shell dirname `command -v $(CC)` 2>/dev/null )
+
+ifeq "$(shell echo '$(HASH)include <sys/ipc.h>@$(HASH)include <sys/shm.h>@int main() { int _id = shmget(IPC_PRIVATE, 65536, IPC_CREAT | IPC_EXCL | 0600); shmctl(_id, IPC_RMID, 0); return 0;}' | tr @ '\n' | $(CC) -x c - -o .test2 2>/dev/null && echo 1 || echo 0 ; rm -f .test2 )" "1"
+	SHMAT_OK=1
+else
+	SHMAT_OK=0
+	CFLAGS+=-DUSEMMAP=1
+	LDFLAGS += -lrt
+endif
+
+ifeq "$(TEST_MMAP)" "1"
+	SHMAT_OK=0
+	CFLAGS+=-DUSEMMAP=1
+	LDFLAGS += -lrt
+endif
+
+PROGS        = ../afl-gcc-fast ../afl-gcc-pass.so ../afl-gcc-rt.o
+
+
+all: test_shm test_deps $(PROGS) afl-gcc-fast.8 test_build all_done
+
+ifeq "$(SHMAT_OK)" "1"
+
+test_shm:
+	@echo "[+] shmat seems to be working."
+	@rm -f .test2
+
+else
+
+test_shm:
+	@echo "[-] shmat seems not to be working, switching to mmap implementation"
+
+endif
+
+
+test_deps:
+	@echo "[*] Checking for working '$(CC)'..."
+	@type $(CC) >/dev/null 2>&1 || ( echo "[-] Oops, can't find '$(CC)'. Make sure that it's in your \$$PATH (or set \$$CC and \$$CXX)."; exit 1 )
+#	@echo "[*] Checking for gcc for plugin support..."
+#	@$(CC) -v 2>&1 | grep -q -- --enable-plugin || ( echo "[-] Oops, this gcc has not been configured with plugin support."; exit 1 )
+	@echo "[*] Checking for gcc plugin development header files..."
+	@test -d `$(CC) -print-file-name=plugin`/include || ( echo "[-] Oops, can't find gcc header files. Be sure to install 'gcc-X-plugin-dev'."; exit 1 )
+	@echo "[*] Checking for '../afl-showmap'..."
+	@test -f ../afl-showmap || ( echo "[-] Oops, can't find '../afl-showmap'. Be sure to compile AFL first."; exit 1 )
+	@echo "[+] All set and ready to build."
+
+afl-common.o: ../src/afl-common.c
+	$(CC) $(CFLAGS) -c $< -o $@ $(LDFLAGS)
+
+../afl-gcc-fast: afl-gcc-fast.c afl-common.o | test_deps
+	$(CC) -DAFL_GCC_CC=\"$(CC)\" -DAFL_GCC_CXX=\"$(CXX)\" $(CFLAGS) $< afl-common.o -o $@ $(LDFLAGS)
+	ln -sf afl-gcc-fast ../afl-g++-fast
+
+../afl-gcc-pass.so: afl-gcc-pass.so.cc | test_deps
+	$(CXX) $(CXXEFLAGS) $(PLUGIN_FLAGS) -shared $< -o $@
+
+../afl-gcc-rt.o: afl-gcc-rt.o.c | test_deps
+	$(CC) $(CFLAGS) -fPIC -c $< -o $@
+
+test_build: $(PROGS)
+	@echo "[*] Testing the CC wrapper and instrumentation output..."
+	unset AFL_USE_ASAN AFL_USE_MSAN; AFL_QUIET=1 AFL_INST_RATIO=100 AFL_PATH=. AFL_CC=$(CC) ../afl-gcc-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS)
+#	unset AFL_USE_ASAN AFL_USE_MSAN;             AFL_INST_RATIO=100 AFL_PATH=. AFL_CC=$(CC) ../afl-gcc-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS)
+	ASAN_OPTIONS=detect_leaks=0 ../afl-showmap -m none -q -o .test-instr0 ./test-instr </dev/null
+	echo 1 | ASAN_OPTIONS=detect_leaks=0 ../afl-showmap -m none -q -o .test-instr1 ./test-instr
+	@rm -f test-instr
+	@cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation does not seem to be behaving correctly!"; echo; echo "Please post to https://github.com/AFLplusplus/AFLplusplus/issues to troubleshoot the issue."; echo; exit 1; fi
+	@echo "[+] All right, the instrumentation seems to be working!"
+
+all_done: test_build
+	@echo "[+] All done! You can now use '../afl-gcc-fast' to compile programs."
+
+.NOTPARALLEL: clean
+
+vpath  % ..
+%.8: %
+	@echo .TH $* 8 `date "+%Y-%m-%d"` "afl++" > ../$@
+	@echo .SH NAME >> ../$@
+	@echo .B $* >> ../$@
+	@echo >> ../$@
+	@echo .SH SYNOPSIS >> ../$@
+	@../$* -h 2>&1 | head -n 3 | tail -n 1 | sed 's/^\.\///' >> ../$@
+	@echo >> ../$@
+	@echo .SH OPTIONS >> ../$@
+	@echo .nf >> ../$@
+	@../$* -h 2>&1 | tail -n +4 >> ../$@
+	@echo >> ../$@
+	@echo .SH AUTHOR >> ../$@
+	@echo "afl++ was written by Michal \"lcamtuf\" Zalewski and is maintained by Marc \"van Hauser\" Heuse <mh@mh-sec.de>, Heiko \"hexcoder-\" Eissfeldt <heiko.eissfeldt@hexco.de>, Andrea Fioraldi <andreafioraldi@gmail.com> and Dominik Maier <domenukk@gmail.com>" >> ../$@
+	@echo  The homepage of afl++ is: https://github.com/AFLplusplus/AFLplusplus >> ../$@
+	@echo >> ../$@
+	@echo .SH LICENSE >> ../$@
+	@echo Apache License Version 2.0, January 2004 >> ../$@
+	ln -sf afl-gcc-fast.8 ../afl-g++-fast.8
+
+install: all
+	install -m 755 ../afl-gcc-fast $${DESTDIR}$(BIN_PATH)
+	install -m 755 ../afl-gcc-pass.so ../afl-gcc-rt.o $${DESTDIR}$(HELPER_PATH)
+	install -m 644 -T README.md $${DESTDIR}$(DOC_PATH)/README.gcc_plugin.md
+	install -m 644 -T README.whitelist.md $${DESTDIR}$(DOC_PATH)/README.gcc_plugin.whitelist.md
+
+clean:
+	rm -f *.o *.so *~ a.out core core.[1-9][0-9]* test-instr .test-instr0 .test-instr1 .test2
+	rm -f $(PROGS) afl-common.o ../afl-g++-fast ../afl-g*-fast.8
diff --git a/gcc_plugin/Makefile b/gcc_plugin/Makefile
index 17962401..3da2e4f6 100644
--- a/gcc_plugin/Makefile
+++ b/gcc_plugin/Makefile
@@ -27,81 +27,90 @@ DOC_PATH    ?= $(PREFIX)/share/doc/afl
 MAN_PATH    ?= $(PREFIX)/man/man8
 
 VERSION     = $(shell grep '^$(HASH)define VERSION ' ../config.h | cut -d '"' -f2)
+VERSION:sh= grep '^$(HASH)define VERSION ' ../config.h | cut -d '"' -f2
 
-CFLAGS      ?= -O3 -g -funroll-loops
-CFLAGS      += -Wall -I../include -D_FORTIFY_SOURCE=2 -Wno-pointer-sign \
+CFLAGS      ?= -O3 -g -funroll-loops -D_FORTIFY_SOURCE=2
+CFLAGS = -Wall -I../include -Wno-pointer-sign \
                -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \
                -DGCC_VERSION=\"$(GCCVER)\" -DGCC_BINDIR=\"$(GCCBINDIR)\" \
                -Wno-unused-function
 
-CXXFLAGS    ?= -O3 -g -funroll-loops
-CXXEFLAGS   := $(CXXFLAGS) -Wall -D_FORTIFY_SOURCE=2
+CXXFLAGS    ?= -O3 -g -funroll-loops -D_FORTIFY_SOURCE=2
+CXXEFLAGS   = $(CXXFLAGS) -Wall
 
 CC          ?= gcc
 CXX         ?= g++
 
-PLUGIN_FLAGS = -fPIC -fno-rtti -I"$(shell $(CC) -print-file-name=plugin)/include"
+MYCC=$(CC:clang=gcc)
+MYCXX=$(CXX:clang++=g++)
+
+PLUGIN_PATH = $(shell $(MYCC) -print-file-name=plugin)
+PLUGIN_PATH:sh= $(MYCC) -print-file-name=plugin
+PLUGIN_FLAGS = -fPIC -fno-rtti -I"$(PLUGIN_PATH)/include"
 HASH=\#
 
-GCCVER    = $(shell $(CC) --version 2>/dev/null | awk 'NR == 1 {print $$NF}')
-GCCBINDIR = $(shell dirname `type $(CC) | awk '{print $$NF}'` 2>/dev/null )
+GCCVER    = $(shell $(MYCC) --version 2>/dev/null | awk 'NR == 1 {print $$NF}')
+GCCBINDIR = $(shell dirname `command -v $(MYCC)` 2>/dev/null )
 
-ifeq "$(shell echo '$(HASH)include <sys/ipc.h>@$(HASH)include <sys/shm.h>@int main() { int _id = shmget(IPC_PRIVATE, 65536, IPC_CREAT | IPC_EXCL | 0600); shmctl(_id, IPC_RMID, 0); return 0;}' | tr @ '\n' | $(CC) -x c - -o .test2 2>/dev/null && echo 1 || echo 0 ; rm -f .test2 )" "1"
-	SHMAT_OK=1
-else
-	SHMAT_OK=0
-	CFLAGS+=-DUSEMMAP=1
-	LDFLAGS += -lrt
-endif
+_SHMAT_OK= $(shell echo '$(HASH)include <sys/ipc.h>@$(HASH)include <sys/shm.h>@int main() { int _id = shmget(IPC_PRIVATE, 65536, IPC_CREAT | IPC_EXCL | 0600); shmctl(_id, IPC_RMID, 0); return 0;}' | tr @ '\n' | $(MYCC) -x c - -o .test2 2>/dev/null && echo 1 || echo 0 ; rm -f .test2 )
+_SHMAT_OK:sh= echo '$(HASH)include <sys/ipc.h>@$(HASH)include <sys/shm.h>@int main() { int _id = shmget(IPC_PRIVATE, 65536, IPC_CREAT | IPC_EXCL | 0600); shmctl(_id, IPC_RMID, 0); return 0;}' | tr @ '\n' | $(MYCC) -x c - -o .test2 2>/dev/null && echo 1 || echo 0 ; rm -f .test2
 
-ifeq "$(TEST_MMAP)" "1"
-	SHMAT_OK=0
-	CFLAGS+=-DUSEMMAP=1
-	LDFLAGS += -lrt
-endif
+IGNORE_MMAP=$(TEST_MMAP:1=0)
+__SHMAT_OK=$(_SHMAT_OK)$(IGNORE_MMAP)
+___SHMAT_OK=$(__SHMAT_OK:10=0)
+SHMAT_OK=$(___SHMAT_OK:1=1)
+_CFLAGS_ADD=$(SHMAT_OK:1=)
+CFLAGS_ADD=$(_CFLAGS_ADD:0=-DUSEMMAP=1)
 
-PROGS        = ../afl-gcc-fast ../afl-gcc-pass.so ../afl-gcc-rt.o
+_LDFLAGS_ADD=$(SHMAT_OK:1=)
+LDFLAGS_ADD=$(_LDFLAGS_ADD:0=-lrt)
 
+CFLAGS += $(CFLAGS_ADD)
+LDFLAGS += $(LDFLAGS_ADD)
 
-all: test_shm test_deps $(PROGS) afl-gcc-fast.8 test_build all_done
+PROGS        = ../afl-gcc-fast ../afl-gcc-pass.so ../afl-gcc-rt.o
 
-ifeq "$(SHMAT_OK)" "1"
 
-test_shm:
-	@echo "[+] shmat seems to be working."
-	@rm -f .test2
+all: test_shm test_deps $(PROGS) afl-gcc-fast.8 test_build all_done
 
-else
+debug:
+	@echo _SHMAT_OK = $(_SHMAT_OK)
+	@echo IGNORE_MMAP = $(IGNORE_MMAP)
+	@echo __SHMAT_OK = $(__SHMAT_OK)
+	@echo ___SHMAT_OK = $(___SHMAT_OK)
+	@echo SHMAT_OK = $(SHMAT_OK)
 
 test_shm:
-	@echo "[-] shmat seems not to be working, switching to mmap implementation"
-
-endif
-
+	@if [ "$(SHMAT_OK)" == "1" ]; then \
+	  echo "[+] shmat seems to be working."; \
+	  rm -f .test2; \
+	else \
+	  echo "[-] shmat seems not to be working, switching to mmap implementation"; \
+	fi
 
 test_deps:
-	@echo "[*] Checking for working '$(CC)'..."
-	@type $(CC) >/dev/null 2>&1 || ( echo "[-] Oops, can't find '$(CC)'. Make sure that it's in your \$$PATH (or set \$$CC and \$$CXX)."; exit 1 )
+	@echo "[*] Checking for working '$(MYCC)'..."
+	@type $(MYCC) >/dev/null 2>&1 || ( echo "[-] Oops, can't find '$(MYCC)'. Make sure that it's in your \$$PATH (or set \$$CC and \$$CXX)."; exit 1 )
 #	@echo "[*] Checking for gcc for plugin support..."
-#	@$(CC) -v 2>&1 | grep -q -- --enable-plugin || ( echo "[-] Oops, this gcc has not been configured with plugin support."; exit 1 )
+#	@$(MYCC) -v 2>&1 | grep -q -- --enable-plugin || ( echo "[-] Oops, this gcc has not been configured with plugin support."; exit 1 )
 	@echo "[*] Checking for gcc plugin development header files..."
-	@test -d `$(CC) -print-file-name=plugin`/include || ( echo "[-] Oops, can't find gcc header files. Be sure to install 'gcc-X-plugin-dev'."; exit 1 )
+	@test -d `$(MYCC) -print-file-name=plugin`/include || ( echo "[-] Oops, can't find gcc header files. Be sure to install 'gcc-X-plugin-dev'."; exit 1 )
 	@echo "[*] Checking for '../afl-showmap'..."
 	@test -f ../afl-showmap || ( echo "[-] Oops, can't find '../afl-showmap'. Be sure to compile AFL first."; exit 1 )
 	@echo "[+] All set and ready to build."
 
 afl-common.o: ../src/afl-common.c
-	$(CC) $(CFLAGS) -c $< -o $@ $(LDFLAGS)
+	$(MYCC) $(CFLAGS) -c $< -o $@ $(LDFLAGS)
 
 ../afl-gcc-fast: afl-gcc-fast.c afl-common.o | test_deps
-	$(CC) -DAFL_GCC_CC=\"$(CC)\" -DAFL_GCC_CXX=\"$(CXX)\" $(CFLAGS) $< afl-common.o -o $@ $(LDFLAGS)
+	$(MYCC) -DAFL_GCC_CC=\"$(MYCC)\" -DAFL_GCC_CXX=\"$(MYCXX)\" $(CFLAGS) $< afl-common.o -o $@ $(LDFLAGS)
 	ln -sf afl-gcc-fast ../afl-g++-fast
 
 ../afl-gcc-pass.so: afl-gcc-pass.so.cc | test_deps
-	$(CXX) $(CXXEFLAGS) $(PLUGIN_FLAGS) -shared $< -o $@
+	$(MYCXX) $(CXXEFLAGS) $(PLUGIN_FLAGS) -shared $< -o $@
 
 ../afl-gcc-rt.o: afl-gcc-rt.o.c | test_deps
-	$(CC) $(CFLAGS) -fPIC -c $< -o $@
+	$(MYCC) $(CFLAGS) -fPIC -c $< -o $@
 
 test_build: $(PROGS)
 	@echo "[*] Testing the CC wrapper and instrumentation output..."
@@ -118,7 +127,7 @@ all_done: test_build
 
 .NOTPARALLEL: clean
 
-vpath  % ..
+VPATH = ..
 %.8: %
 	@echo .TH $* 8 `date "+%Y-%m-%d"` "afl++" > ../$@
 	@echo .SH NAME >> ../$@
@@ -132,7 +141,7 @@ vpath  % ..
 	@../$* -h 2>&1 | tail -n +4 >> ../$@
 	@echo >> ../$@
 	@echo .SH AUTHOR >> ../$@
-	@echo "afl++ was written by Michal \"lcamtuf\" Zalewski and is maintained by Marc \"van Hauser\" Heuse <mh@mh-sec.de>, Heiko \"hexcoder-\" Eissfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com>" >> ../$@
+	@echo "afl++ was written by Michal \"lcamtuf\" Zalewski and is maintained by Marc \"van Hauser\" Heuse <mh@mh-sec.de>, Heiko \"hexcoder-\" Eissfeldt <heiko.eissfeldt@hexco.de>, Andrea Fioraldi <andreafioraldi@gmail.com> and Dominik Maier <domenukk@gmail.com>" >> ../$@
 	@echo  The homepage of afl++ is: https://github.com/AFLplusplus/AFLplusplus >> ../$@
 	@echo >> ../$@
 	@echo .SH LICENSE >> ../$@
@@ -143,7 +152,7 @@ install: all
 	install -m 755 ../afl-gcc-fast $${DESTDIR}$(BIN_PATH)
 	install -m 755 ../afl-gcc-pass.so ../afl-gcc-rt.o $${DESTDIR}$(HELPER_PATH)
 	install -m 644 -T README.md $${DESTDIR}$(DOC_PATH)/README.gcc_plugin.md
-	install -m 644 -T README.whitelist.md $${DESTDIR}$(DOC_PATH)/README.gcc_plugin_whitelist.md
+	install -m 644 -T README.whitelist.md $${DESTDIR}$(DOC_PATH)/README.gcc_plugin.whitelist.md
 
 clean:
 	rm -f *.o *.so *~ a.out core core.[1-9][0-9]* test-instr .test-instr0 .test-instr1 .test2
diff --git a/gcc_plugin/afl-gcc-fast.c b/gcc_plugin/afl-gcc-fast.c
index 3583345e..8953c523 100644
--- a/gcc_plugin/afl-gcc-fast.c
+++ b/gcc_plugin/afl-gcc-fast.c
@@ -41,7 +41,6 @@ static u8 * obj_path;                  /* Path to runtime libraries         */
 static u8 **cc_params;                 /* Parameters passed to the real CC  */
 static u32  cc_par_cnt = 1;            /* Param count, including argv0      */
 u8          use_stdin = 0;                                         /* dummy */
-u8          be_quiet;
 
 /* Try to find the runtime libraries. If that fails, abort. */
 
@@ -122,12 +121,12 @@ static void edit_params(u32 argc, char **argv) {
   if (!strcmp(name, "afl-g++-fast")) {
 
     u8 *alt_cxx = getenv("AFL_CXX");
-    cc_params[0] = alt_cxx ? alt_cxx : (u8 *)AFL_GCC_CXX;
+    cc_params[0] = alt_cxx && *alt_cxx ? alt_cxx : (u8 *)AFL_GCC_CXX;
 
   } else {
 
     u8 *alt_cc = getenv("AFL_CC");
-    cc_params[0] = alt_cc ? alt_cc : (u8 *)AFL_GCC_CC;
+    cc_params[0] = alt_cc && *alt_cc ? alt_cc : (u8 *)AFL_GCC_CC;
 
   }
 
diff --git a/gcc_plugin/afl-gcc-rt.o.c b/gcc_plugin/afl-gcc-rt.o.c
index 77bb5325..0a2246e7 100644
--- a/gcc_plugin/afl-gcc-rt.o.c
+++ b/gcc_plugin/afl-gcc-rt.o.c
@@ -25,6 +25,9 @@
 #include "../config.h"
 #include "../types.h"
 
+#ifdef USEMMAP
+#include <stdio.h>
+#endif
 #include <stdlib.h>
 #include <signal.h>
 #include <unistd.h>
@@ -135,8 +138,9 @@ static void __afl_map_shm(void) {
 
 static void __afl_start_forkserver(void) {
 
-  static u8 tmp[4];
-  s32       child_pid;
+  u8  tmp[4] = {0, 0, 0, 0};
+  u32 map_size = MAP_SIZE;
+  s32 child_pid;
 
   u8 child_stopped = 0;
 
@@ -145,6 +149,13 @@ static void __afl_start_forkserver(void) {
   /* Phone home and tell the parent that we're OK. If parent isn't there,
      assume we're not running in forkserver mode and just execute program. */
 
+  if (MAP_SIZE <= 0x800000) {
+
+    map_size = (FS_OPT_ENABLED | FS_OPT_MAPSIZE | FS_OPT_SET_MAPSIZE(MAP_SIZE));
+    memcpy(tmp, &map_size, 4);
+
+  }
+
   if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
 
   while (1) {
diff --git a/include/afl-as.h b/include/afl-as.h
index 7fc00ffe..a2bf1f9c 100644
--- a/include/afl-as.h
+++ b/include/afl-as.h
@@ -152,7 +152,7 @@ static const u8 *trampoline_fmt_64 =
     "/* --- END --- */\n"
     "\n";
 
-static const u8*main_payload_32 = 
+static const u8 *main_payload_32 = 
 
   "\n"
   "/* --- AFL MAIN PAYLOAD (32-BIT) --- */\n"
@@ -409,7 +409,7 @@ static const u8*main_payload_32 =
 #define CALL_L64(str) "call " str "@PLT\n"
 #endif                                                        /* ^__APPLE__ */
 
-static const u8* main_payload_64 = 
+static const u8 *main_payload_64 = 
 
   "\n"
   "/* --- AFL MAIN PAYLOAD (64-BIT) --- */\n"
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index 4e2deaa3..6cfb34ca 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -109,6 +109,8 @@
 #define CASE_PREFIX "id_"
 #endif                                                    /* ^!SIMPLE_FILES */
 
+#define STAGE_BUF_SIZE (64)  /* usable size for stage name buf in afl_state */
+
 extern s8  interesting_8[INTERESTING_8_LEN];
 extern s16 interesting_16[INTERESTING_8_LEN + INTERESTING_16_LEN];
 extern s32
@@ -231,13 +233,12 @@ enum {
   /* 04 */ QUAD,    /* Quadratic schedule               */
   /* 05 */ EXPLOIT, /* AFL's exploitation-based const.  */
   /* 06 */ MMOPT,   /* Modified MOPT schedule           */
+  /* 07 */ RARE,    /* Rare edges                       */
 
   POWER_SCHEDULES_NUM
 
 };
 
-extern u8 *doc_path;                    /* gath to documentation dir        */
-
 /* Python stuff */
 #ifdef USE_PYTHON
 
@@ -278,10 +279,32 @@ enum {
   /* 07 */ PY_FUNC_HAVOC_MUTATION_PROBABILITY,
   /* 08 */ PY_FUNC_QUEUE_GET,
   /* 09 */ PY_FUNC_QUEUE_NEW_ENTRY,
+  /* 10 */ PY_FUNC_DEINIT,
   PY_FUNC_COUNT
 
 };
 
+typedef struct py_mutator {
+
+  PyObject *py_module;
+  PyObject *py_functions[PY_FUNC_COUNT];
+  void *    afl_state;
+  void *    py_data;
+
+  u8 *   fuzz_buf;
+  size_t fuzz_size;
+
+  u8 *   pre_save_buf;
+  size_t pre_save_size;
+
+  u8 *   trim_buf;
+  size_t trim_size;
+
+  u8 *   havoc_buf;
+  size_t havoc_size;
+
+} py_mutator_t;
+
 #endif
 
 typedef struct MOpt_globals {
@@ -308,7 +331,8 @@ typedef struct afl_env_vars {
   u8 afl_skip_cpufreq, afl_exit_when_done, afl_no_affinity, afl_skip_bin_check,
       afl_dumb_forksrv, afl_import_first, afl_custom_mutator_only, afl_no_ui,
       afl_force_ui, afl_i_dont_care_about_missing_crashes, afl_bench_just_one,
-      afl_bench_until_crash, afl_debug_child_output, afl_autoresume;
+      afl_bench_until_crash, afl_debug_child_output, afl_autoresume,
+      afl_cal_fast;
 
   u8 *afl_tmpdir, *afl_post_library, *afl_custom_mutator_library,
       *afl_python_module, *afl_path, *afl_hang_tmout, *afl_skip_crashes,
@@ -420,7 +444,6 @@ typedef struct afl_state {
       no_arith,                         /* Skip most arithmetic ops         */
       shuffle_queue,                    /* Shuffle input queue?             */
       bitmap_changed,                   /* Time to update bitmap?           */
-      qemu_mode,                        /* Running in QEMU mode?            */
       unicorn_mode,                     /* Running in Unicorn mode?         */
       use_wine,                         /* Use WINE with QEMU mode          */
       skip_requested,                   /* Skip request, via SIGUSR1        */
@@ -463,11 +486,6 @@ typedef struct afl_state {
       unique_tmouts,                    /* Timeouts with unique signatures  */
       unique_hangs,                     /* Hangs with unique signatures     */
       total_execs,                      /* Total execve() calls             */
-      slowest_exec_ms,                  /* Slowest testcase non hang in ms  */
-      start_time,                       /* Unix start time (ms)             */
-      last_path_time,                   /* Time for most recent path (ms)   */
-      last_crash_time,                  /* Time for most recent crash (ms)  */
-      last_hang_time,                   /* Time for most recent hang (ms)   */
       last_crash_execs,                 /* Exec counter at last crash       */
       queue_cycle,                      /* Queue round counter              */
       cycles_wo_finds,                  /* Cycles without any new paths     */
@@ -475,15 +493,20 @@ typedef struct afl_state {
       bytes_trim_in,                    /* Bytes coming into the trimmer    */
       bytes_trim_out,                   /* Bytes coming outa the trimmer    */
       blocks_eff_total,                 /* Blocks subject to effector maps  */
-      blocks_eff_select;                /* Blocks selected as fuzzable      */
+      blocks_eff_select,                /* Blocks selected as fuzzable      */
+      start_time,                       /* Unix start time (ms)             */
+      last_path_time,                   /* Time for most recent path (ms)   */
+      last_crash_time,                  /* Time for most recent crash (ms)  */
+      last_hang_time;                   /* Time for most recent hang (ms)   */
 
-  u32 subseq_tmouts;                    /* Number of timeouts in a row      */
+  u32 slowest_exec_ms,                  /* Slowest testcase non hang in ms  */
+      subseq_tmouts;                    /* Number of timeouts in a row      */
 
   u8 *stage_name,                       /* Name of the current fuzz stage   */
       *stage_short,                     /* Short stage name                 */
       *syncing_party;                   /* Currently syncing with...        */
 
-  u8 stage_name_buf64[64];              /* A name buf with len 64 if needed */
+  u8 stage_name_buf[STAGE_BUF_SIZE];    /* reused stagename buf with len 64 */
 
   s32 stage_cur, stage_max;             /* Stage progression                */
   s32 splicing_with;                    /* Splicing with which test case?   */
@@ -532,35 +555,70 @@ typedef struct afl_state {
   struct extra_data *a_extras;          /* Automatically selected extras    */
   u32                a_extras_cnt;      /* Total number of tokens available */
 
-  u8 *(*post_handler)(u8 *buf, u32 *len);
+  /* afl_postprocess API */
+  void *(*post_init)(struct afl_state *afl);
+  size_t (*post_handler)(void *data, u8 *buf, u32 len, u8 **out_buf);
+  void *(*post_deinit)(void *data);
+  void *post_data;
 
   /* CmpLog */
 
-  char *cmplog_binary;
-  s32   cmplog_child_pid, cmplog_fsrv_pid;
+  char *           cmplog_binary;
+  afl_forkserver_t cmplog_fsrv;     /* cmplog has its own little forkserver */
 
   /* Custom mutators */
   struct custom_mutator *mutator;
 
   /* cmplog forkserver ids */
   s32 cmplog_fsrv_ctl_fd, cmplog_fsrv_st_fd;
-  
+  u32 cmplog_prev_timed_out;
+
   struct afl_pass_stat* pass_stats;
 
   u8 describe_op_buf_256[256]; /* describe_op will use this to return a string
                                   up to 256 */
 
-#ifdef USE_PYTHON
-  /* Python Mutators */
-  PyObject *py_module;
-  PyObject *py_functions[PY_FUNC_COUNT];
-#endif
-
 #ifdef _AFL_DOCUMENT_MUTATIONS
   u8  do_document;
   u32 document_counter;
 #endif
 
+  void *maybe_add_auto;
+
+  /* statistics file */
+  double last_bitmap_cvg, last_stability, last_eps;
+
+  /* plot file saves from last run */
+  u32 plot_prev_qp, plot_prev_pf, plot_prev_pnf, plot_prev_ce, plot_prev_md;
+  u64 plot_prev_qc, plot_prev_uc, plot_prev_uh;
+
+  u64 stats_last_stats_ms, stats_last_plot_ms, stats_last_ms, stats_last_execs;
+  double stats_avg_exec;
+
+  u8 clean_trace[MAP_SIZE];
+  u8 clean_trace_custom[MAP_SIZE];
+  u8 first_trace[MAP_SIZE];
+
+  /*needed for afl_fuzz_one */
+  // TODO: see which we can reuse
+  u8 *   out_buf;
+  size_t out_size;
+
+  u8 *   out_scratch_buf;
+  size_t out_scratch_size;
+
+  u8 *   eff_buf;
+  size_t eff_size;
+
+  u8 *   in_buf;
+  size_t in_size;
+
+  u8 *   in_scratch_buf;
+  size_t in_scratch_size;
+
+  u8 *   ex_buf;
+  size_t ex_size;
+
 } afl_state_t;
 
 /* A global pointer to all instances is needed (for now) for signals to arrive
@@ -572,33 +630,40 @@ struct custom_mutator {
 
   const char *name;
   void *      dh;
+  u8 *        pre_save_buf;
+  size_t      pre_save_size;
+
+  void *data;                                    /* custom mutator data ptr */
 
   /* hooks for the custom mutator function */
 
   /**
    * Initialize the custom mutator.
    *
-   * (Optional)
-   *
+   * @param afl AFL instance.
    * @param seed Seed used for the mutation.
+   * @return pointer to internal data or NULL on error
    */
-  void (*afl_custom_init)(afl_state_t *afl, unsigned int seed);
+  void *(*afl_custom_init)(afl_state_t *afl, unsigned int seed);
 
   /**
    * Perform custom mutations on a given input
    *
    * (Optional for now. Required in the future)
    *
-   * @param[inout] buf Pointer to the input data to be mutated and the mutated
+   * @param data pointer returned in afl_custom_init for this fuzz case
+   * @param[in] buf Pointer to the input data to be mutated and the mutated
    *     output
    * @param[in] buf_size Size of the input/output data
+   * @param[out] out_buf the new buffer. We may reuse *buf if large enough.
+   *             *out_buf = NULL is treated as FATAL.
    * @param[in] add_buf Buffer containing the additional test case
    * @param[in] add_buf_size Size of the additional test case
    * @param[in] max_size Maximum size of the mutated output. The mutation must
    * not produce data larger than max_size.
    * @return Size of the mutated output.
    */
-  size_t (*afl_custom_fuzz)(afl_state_t *afl, u8 **buf, size_t buf_size,
+  size_t (*afl_custom_fuzz)(void *data, u8 *buf, size_t buf_size, u8 **out_buf,
                             u8 *add_buf, size_t add_buf_size, size_t max_size);
 
   /**
@@ -608,14 +673,15 @@ struct custom_mutator {
    * (Optional) If this functionality is not needed, simply don't define this
    * function.
    *
+   * @param[in] data pointer returned in afl_custom_init for this fuzz case
    * @param[in] buf Buffer containing the test case to be executed
    * @param[in] buf_size Size of the test case
-   * @param[out] out_buf Pointer to the buffer of storing the test case after
-   *     processing. External library should allocate memory for out_buf. AFL++
-   *     will release the memory after saving the test case.
-   * @return Size of the output buffer after processing
+   * @param[out] out_buf Pointer to the buffer storing the test case after
+   *     processing. External library should allocate memory for out_buf.
+   *     It can chose to alter buf in-place, if the space is large enough.
+   * @return Size of the output buffer.
    */
-  size_t (*afl_custom_pre_save)(afl_state_t *afl, u8 *buf, size_t buf_size,
+  size_t (*afl_custom_pre_save)(void *data, u8 *buf, size_t buf_size,
                                 u8 **out_buf);
 
   /**
@@ -634,11 +700,13 @@ struct custom_mutator {
    *
    * (Optional)
    *
+   * @param data pointer returned in afl_custom_init for this fuzz case
    * @param buf Buffer containing the test case
    * @param buf_size Size of the test case
-   * @return The amount of possible iteration steps to trim the input
+   * @return The amount of possible iteration steps to trim the input.
+   *        Negative on error.
    */
-  u32 (*afl_custom_init_trim)(afl_state_t *afl, u8 *buf, size_t buf_size);
+  s32 (*afl_custom_init_trim)(void *data, u8 *buf, size_t buf_size);
 
   /**
    * This method is called for each trimming operation. It doesn't have any
@@ -651,12 +719,13 @@ struct custom_mutator {
    *
    * (Optional)
    *
+   * @param data pointer returned in afl_custom_init for this fuzz case
    * @param[out] out_buf Pointer to the buffer containing the trimmed test case.
-   *     External library should allocate memory for out_buf. AFL++ will release
-   *     the memory after saving the test case.
-   * @param[out] out_buf_size Pointer to the size of the trimmed test case
+   *             The library can reuse a buffer for each call
+   *             and will have to free the buf (for example in deinit)
+   * @return the size of the trimmed test case
    */
-  void (*afl_custom_trim)(afl_state_t *afl, u8 **out_buf, size_t *out_buf_size);
+  size_t (*afl_custom_trim)(void *data, u8 **out_buf);
 
   /**
    * This method is called after each trim operation to inform you if your
@@ -665,11 +734,12 @@ struct custom_mutator {
    *
    * (Optional)
    *
+   * @param data pointer returned in afl_custom_init for this fuzz case
    * @param success Indicates if the last trim operation was successful.
    * @return The next trim iteration index (from 0 to the maximum amount of
-   *     steps returned in init_trim)
+   *     steps returned in init_trim). Negative on error.
    */
-  u32 (*afl_custom_post_trim)(afl_state_t *afl, u8 success);
+  s32 (*afl_custom_post_trim)(void *data, u8 success);
 
   /**
    * Perform a single custom mutation on a given input.
@@ -677,15 +747,18 @@ struct custom_mutator {
    *
    * (Optional)
    *
-   * @param[inout] buf Pointer to the input data to be mutated and the mutated
+   * @param[in] data pointer returned in afl_custom_init for this fuzz case
+   * @param[in] buf Pointer to the input data to be mutated and the mutated
    *     output
    * @param[in] buf_size Size of input data
+   * @param[out] out_buf The new buffer. It's legal to reuse *buf if it's <
+   * buf_size.
    * @param[in] max_size Maximum size of the mutated output. The mutation must
    *     not produce data larger than max_size.
-   * @return Size of the mutated output.
+   * @return Size of the mutated output (out_size).
    */
-  size_t (*afl_custom_havoc_mutation)(afl_state_t *afl, u8 **buf,
-                                      size_t buf_size, size_t max_size);
+  size_t (*afl_custom_havoc_mutation)(void *data, u8 *buf, size_t buf_size,
+                                      u8 **out_buf, size_t max_size);
 
   /**
    * Return the probability (in percentage) that afl_custom_havoc_mutation
@@ -693,20 +766,22 @@ struct custom_mutator {
    *
    * (Optional)
    *
+   * @param data pointer returned in afl_custom_init for this fuzz case
    * @return The probability (0-100).
    */
-  u8 (*afl_custom_havoc_mutation_probability)(afl_state_t *afl);
+  u8 (*afl_custom_havoc_mutation_probability)(void *data);
 
   /**
    * Determine whether the fuzzer should fuzz the current queue entry or not.
    *
    * (Optional)
    *
+   * @param data pointer returned in afl_custom_init for this fuzz case
    * @param filename File name of the test case in the queue entry
    * @return Return True(1) if the fuzzer will fuzz the queue entry, and
    *     False(0) otherwise.
    */
-  u8 (*afl_custom_queue_get)(afl_state_t *afl, const u8 *filename);
+  u8 (*afl_custom_queue_get)(void *data, const u8 *filename);
 
   /**
    * Allow for additional analysis (e.g. calling a different tool that does a
@@ -714,13 +789,19 @@ struct custom_mutator {
    *
    * (Optional)
    *
+   * @param data pointer returned in afl_custom_init for this fuzz case
    * @param filename_new_queue File name of the new queue entry
    * @param filename_orig_queue File name of the original queue entry. This
    *     argument can be NULL while initializing the fuzzer
    */
-  void (*afl_custom_queue_new_entry)(afl_state_t *afl,
-                                     const u8 *   filename_new_queue,
-                                     const u8 *   filename_orig_queue);
+  void (*afl_custom_queue_new_entry)(void *data, const u8 *filename_new_queue,
+                                     const u8 *filename_orig_queue);
+  /**
+   * Deinitialize the custom mutator.
+   *
+   * @param data pointer returned in afl_custom_init for this fuzz case
+   */
+  void (*afl_custom_deinit)(void *data);
 
 };
 
@@ -738,19 +819,17 @@ u8   trim_case_custom(afl_state_t *, struct queue_entry *q, u8 *in_buf);
 /* Python */
 #ifdef USE_PYTHON
 
-int  init_py_module(afl_state_t *, u8 *);
-void finalize_py_module(afl_state_t *);
+void finalize_py_module(void *);
 
-void   init_py(afl_state_t *, unsigned int);
-size_t fuzz_py(afl_state_t *, u8 **, size_t, u8 *, size_t, size_t);
-size_t pre_save_py(afl_state_t *, u8 *, size_t, u8 **);
-u32    init_trim_py(afl_state_t *, u8 *, size_t);
-u32    post_trim_py(afl_state_t *, u8);
-void   trim_py(afl_state_t *, u8 **, size_t *);
-size_t havoc_mutation_py(afl_state_t *, u8 **, size_t, size_t);
-u8     havoc_mutation_probability_py(afl_state_t *);
-u8     queue_get_py(afl_state_t *, const u8 *);
-void   queue_new_entry_py(afl_state_t *, const u8 *, const u8 *);
+size_t pre_save_py(void *, u8 *, size_t, u8 **);
+s32    init_trim_py(void *, u8 *, size_t);
+s32    post_trim_py(void *, u8);
+size_t trim_py(void *, u8 **);
+size_t havoc_mutation_py(void *, u8 *, size_t, u8 **, size_t);
+u8     havoc_mutation_probability_py(void *);
+u8     queue_get_py(void *, const u8 *);
+void   queue_new_entry_py(void *, const u8 *, const u8 *);
+void   deinit_py(void *);
 
 #endif
 
@@ -769,36 +848,29 @@ u32  calculate_score(afl_state_t *, struct queue_entry *);
 
 void read_bitmap(afl_state_t *, u8 *);
 void write_bitmap(afl_state_t *);
-u32  count_bits(u8 *);
-u32  count_bytes(u8 *);
-u32  count_non_255_bytes(u8 *);
+u32  count_bits(afl_state_t *, u8 *);
+u32  count_bytes(afl_state_t *, u8 *);
+u32  count_non_255_bytes(afl_state_t *, u8 *);
 #ifdef WORD_SIZE_64
-void simplify_trace(u64 *);
-void classify_counts(u64 *);
+void simplify_trace(afl_state_t *, u64 *);
+void classify_counts(afl_state_t *, u64 *);
 #else
-void simplify_trace(u32 *);
-void classify_counts(u32 *);
+void simplify_trace(afl_state_t *, u32 *);
+void classify_counts(afl_state_t *, u32 *);
 #endif
 void init_count_class16(void);
-void minimize_bits(u8 *, u8 *);
+void minimize_bits(afl_state_t *, u8 *, u8 *);
 #ifndef SIMPLE_FILES
 u8 *describe_op(afl_state_t *, u8);
 #endif
 u8 save_if_interesting(afl_state_t *, void *, u32, u8);
 u8 has_new_bits(afl_state_t *, u8 *);
 
-/* Misc */
-
-u8 *DI(u64);
-u8 *DF(double);
-u8 *DMS(u64);
-u8 *DTD(u64, u64);
-
 /* Extras */
 
 void load_extras_file(afl_state_t *, u8 *, u32 *, u32 *, u32);
 void load_extras(afl_state_t *, u8 *);
-void maybe_add_auto(afl_state_t *, u8 *, u32);
+void maybe_add_auto(void *, u8 *, u32);
 void save_auto(afl_state_t *);
 void load_auto(afl_state_t *);
 void destroy_extras(afl_state_t *);
@@ -812,7 +884,7 @@ void show_init_stats(afl_state_t *);
 
 /* Run */
 
-u8   run_target(afl_state_t *, u32);
+u8   run_target(afl_state_t *, afl_forkserver_t *fsrv, u32);
 void write_to_testcase(afl_state_t *, void *, u32);
 u8   calibrate_case(afl_state_t *, struct queue_entry *, u8 *, u32, u8);
 void sync_fuzzers(afl_state_t *);
@@ -856,8 +928,7 @@ void   save_cmdline(afl_state_t *, u32, char **);
 
 /* CmpLog */
 
-void init_cmplog_forkserver(afl_state_t *afl);
-u8   common_fuzz_cmplog_stuff(afl_state_t *afl, u8 *out_buf, u32 len);
+u8 common_fuzz_cmplog_stuff(afl_state_t *afl, u8 *out_buf, u32 len);
 
 /* RedQueen */
 u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len,
@@ -868,16 +939,16 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len,
 /* Generate a random number (from 0 to limit - 1). This may
    have slight bias. */
 
-static inline u32 UR(afl_state_t *afl, u32 limit) {
+static inline u32 rand_below(afl_state_t *afl, u32 limit) {
 
 #ifdef HAVE_ARC4RANDOM
-  if (afl->fixed_seed) { return random() % limit; }
+  if (unlikely(afl->fixed_seed)) { return random() % limit; }
 
   /* The boundary not being necessarily a power of 2,
      we need to ensure the result uniformity. */
   return arc4random_uniform(limit);
 #else
-  if (!afl->fixed_seed && unlikely(!afl->rand_cnt--)) {
+  if (unlikely(!afl->rand_cnt--) && likely(!afl->fixed_seed)) {
 
     ck_read(afl->fsrv.dev_urandom_fd, &afl->rand_seed, sizeof(afl->rand_seed),
             "/dev/urandom");
@@ -893,7 +964,7 @@ static inline u32 UR(afl_state_t *afl, u32 limit) {
 
 static inline u32 get_rand_seed(afl_state_t *afl) {
 
-  if (afl->fixed_seed) return (u32)afl->init_seed;
+  if (unlikely(afl->fixed_seed)) return (u32)afl->init_seed;
   return afl->rand_seed[0];
 
 }
@@ -901,7 +972,7 @@ static inline u32 get_rand_seed(afl_state_t *afl) {
 /* Find first power of two greater or equal to val (assuming val under
    2^63). */
 
-static u64 next_p2(u64 val) {
+static inline u64 next_p2(u64 val) {
 
   u64 ret = 1;
   while (val > ret)
diff --git a/include/afl-prealloc.h b/include/afl-prealloc.h
index ab86f0d7..66e6aadf 100644
--- a/include/afl-prealloc.h
+++ b/include/afl-prealloc.h
@@ -44,9 +44,8 @@ typedef enum prealloc_status {
 
 /* Adds the entry used for prealloc bookkeeping to this struct */
 
-#define PREALLOCABLE \
-  ;                  \
-  pre_status_t pre_status;              /* prealloc status of this instance */
+/* prealloc status of this instance */
+#define PREALLOCABLE pre_status_t pre_status
 
 /* allocate an element of type *el_ptr, to this variable.
     Uses (and reuses) the given prealloc_buf before hitting libc's malloc.
diff --git a/include/alloc-inl.h b/include/alloc-inl.h
index c8783d96..d16e84bb 100644
--- a/include/alloc-inl.h
+++ b/include/alloc-inl.h
@@ -35,6 +35,9 @@
 #include "types.h"
 #include "debug.h"
 
+/* Initial size used for ck_maybe_grow */
+#define INITIAL_GROWTH_SIZE (64)
+
 // Be careful! _WANT_ORIGINAL_AFL_ALLOC is not compatible with custom mutators
 
 #ifndef _WANT_ORIGINAL_AFL_ALLOC
@@ -82,7 +85,7 @@
 
 static inline void *DFL_ck_alloc_nozero(u32 size) {
 
-  u8 *ret;
+  void *ret;
 
   if (!size) return NULL;
 
@@ -124,7 +127,7 @@ static inline void DFL_ck_free(void *mem) {
 
 static inline void *DFL_ck_realloc(void *orig, u32 size) {
 
-  u8 *ret;
+  void *ret;
 
   if (!size) {
 
@@ -179,7 +182,7 @@ static inline u8 *DFL_ck_strdup(u8 *str) {
 
 static inline void *DFL_ck_memdup(void *mem, u32 size) {
 
-  u8 *ret;
+  void *ret;
 
   if (!mem || !size) return NULL;
 
@@ -764,5 +767,99 @@ static inline void TRK_ck_free(void *ptr, const char *file, const char *func,
 
 #endif                                          /* _WANT_ORIGINAL_AFL_ALLOC */
 
+/* This function calculates the next power of 2 greater or equal its argument.
+ @return The rounded up power of 2 (if no overflow) or 0 on overflow.
+*/
+static inline size_t next_pow2(size_t in) {
+
+  if (in == 0 || in > (size_t)-1)
+    return 0;                  /* avoid undefined behaviour under-/overflow */
+  size_t out = in - 1;
+  out |= out >> 1;
+  out |= out >> 2;
+  out |= out >> 4;
+  out |= out >> 8;
+  out |= out >> 16;
+  return out + 1;
+
+}
+
+/* This function makes sure *size is > size_needed after call.
+ It will realloc *buf otherwise.
+ *size will grow exponentially as per:
+ https://blog.mozilla.org/nnethercote/2014/11/04/please-grow-your-buffers-exponentially/
+ Will return NULL and free *buf if size_needed is <1 or realloc failed.
+ @return For convenience, this function returns *buf.
+ */
+static inline void *maybe_grow(void **buf, size_t *size, size_t size_needed) {
+
+  /* No need to realloc */
+  if (likely(size_needed && *size >= size_needed)) return *buf;
+
+  /* No initial size was set */
+  if (size_needed < INITIAL_GROWTH_SIZE) size_needed = INITIAL_GROWTH_SIZE;
+
+  /* grow exponentially */
+  size_t next_size = next_pow2(size_needed);
+
+  /* handle overflow and zero size_needed */
+  if (!next_size) { next_size = size_needed; }
+
+  /* alloc */
+  *buf = realloc(*buf, next_size);
+  *size = *buf ? next_size : 0;
+
+  return *buf;
+
+}
+
+/* This function makes sure *size is > size_needed after call.
+ It will realloc *buf otherwise.
+ *size will grow exponentially as per:
+ https://blog.mozilla.org/nnethercote/2014/11/04/please-grow-your-buffers-exponentially/
+ Will FATAL if size_needed is <1.
+ @return For convenience, this function returns *buf.
+ */
+static inline void *ck_maybe_grow(void **buf, size_t *size,
+                                  size_t size_needed) {
+
+  /* Oops. found a bug? */
+  if (unlikely(size_needed < 1)) FATAL("cannot grow to non-positive size");
+
+  /* No need to realloc */
+  if (likely(*size >= size_needed)) return *buf;
+
+  /* No initial size was set */
+  if (size_needed < INITIAL_GROWTH_SIZE) size_needed = INITIAL_GROWTH_SIZE;
+
+  /* grow exponentially */
+  size_t next_size = next_pow2(size_needed);
+
+  /* handle overflow */
+  if (!next_size) { next_size = size_needed; }
+
+  /* alloc */
+  *buf = ck_realloc(*buf, next_size);
+  *size = next_size;
+
+  return *buf;
+
+}
+
+/* Swaps buf1 ptr and buf2 ptr, as well as their sizes */
+static inline void swap_bufs(void **buf1, size_t *size1, void **buf2,
+                             size_t *size2) {
+
+  void * scratch_buf = *buf1;
+  size_t scratch_size = *size1;
+  *buf1 = *buf2;
+  *size1 = *size2;
+  *buf2 = scratch_buf;
+  *size2 = scratch_size;
+
+}
+
+#undef INITIAL_GROWTH_SIZE
+
 #endif                                               /* ! _HAVE_ALLOC_INL_H */
 
diff --git a/include/android-ashmem.h b/include/android-ashmem.h
index 3a0b9969..3a0b9969 100755..100644
--- a/include/android-ashmem.h
+++ b/include/android-ashmem.h
diff --git a/include/cmplog.h b/include/cmplog.h
index 36f8f2c5..74e6a3bb 100644
--- a/include/cmplog.h
+++ b/include/cmplog.h
@@ -29,6 +29,7 @@
 #define _AFL_CMPLOG_H
 
 #include "config.h"
+#include "forkserver.h"
 
 #define CMP_MAP_W 65536
 #define CMP_MAP_H 256
@@ -74,5 +75,9 @@ struct cmp_map {
 
 };
 
+/* Execs the child */
+
+void cmplog_exec_child(afl_forkserver_t *fsrv, char **argv);
+
 #endif
 
diff --git a/include/common.h b/include/common.h
index 28c11049..8dd66355 100644
--- a/include/common.h
+++ b/include/common.h
@@ -27,10 +27,17 @@
 #ifndef __AFLCOMMON_H
 #define __AFLCOMMON_H
 
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
 #include <sys/time.h>
 #include "types.h"
 #include "stdbool.h"
 
+/* STRINGIFY_VAL_SIZE_MAX will fit all stringify_ strings. */
+
+#define STRINGIFY_VAL_SIZE_MAX (16)
+
 void detect_file_args(char **argv, u8 *prog_in, u8 *use_stdin);
 void check_environment_vars(char **env);
 
@@ -41,31 +48,62 @@ char **get_qemu_argv(u8 *own_loc, u8 **target_path_p, int argc, char **argv);
 char **get_wine_argv(u8 *own_loc, u8 **target_path_p, int argc, char **argv);
 char * get_afl_env(char *env);
 
+extern u8  be_quiet;
+extern u8 *doc_path;                    /* path to documentation dir        */
+
 /* Get unix time in milliseconds */
 
-static u64 get_cur_time(void) {
+u64 get_cur_time(void);
 
-  struct timeval  tv;
-  struct timezone tz;
+/* Get unix time in microseconds */
 
-  gettimeofday(&tv, &tz);
+u64 get_cur_time_us(void);
 
-  return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000);
+/* Describe integer. The buf should be
+   at least 6 bytes to fit all ints we randomly see.
+   Will return buf for convenience. */
 
-}
+u8 *stringify_int(u8 *buf, size_t len, u64 val);
 
-/* Get unix time in microseconds */
+/* Describe float. Similar as int. */
+
+u8 *stringify_float(u8 *buf, size_t len, double val);
+
+/* Describe integer as memory size. */
+
+u8 *stringify_mem_size(u8 *buf, size_t len, u64 val);
+
+/* Describe time delta as string.
+   Returns a pointer to buf for convenience. */
+
+u8 *stringify_time_diff(u8 *buf, size_t len, u64 cur_ms, u64 event_ms);
+
+/* Unsafe Describe integer. The buf sizes are not checked.
+   This is unsafe but fast.
+   Will return buf for convenience. */
+
+u8 *u_stringify_int(u8 *buf, u64 val);
+
+/* Unsafe describe float. Similar as unsafe int. */
+
+u8 *u_stringify_float(u8 *buf, double val);
 
-static u64 get_cur_time_us(void) {
+/* Unsafe describe integer as memory size. */
 
-  struct timeval  tv;
-  struct timezone tz;
+u8 *u_stringify_mem_size(u8 *buf, u64 val);
 
-  gettimeofday(&tv, &tz);
+/* Unsafe describe time delta as string.
+   Returns a pointer to buf for convenience. */
 
-  return (tv.tv_sec * 1000000ULL) + tv.tv_usec;
+u8 *u_stringify_time_diff(u8 *buf, u64 cur_ms, u64 event_ms);
 
-}
+/* Wrapper for select() and read(), reading exactly len bytes.
+  Returns the time passed to read.
+  stop_soon should point to a variable indicating ctrl+c was pressed.
+  If the wait times out, returns timeout_ms + 1;
+  Returns 0 if an error occurred (fd closed, signal, ...); */
+u32 read_timed(s32 fd, void *buf, size_t len, u32 timeout_ms,
+               volatile u8 *stop_soon_p);
 
 #endif
 
diff --git a/include/config.h b/include/config.h
index a87c5e84..f0274fd3 100644
--- a/include/config.h
+++ b/include/config.h
@@ -28,7 +28,7 @@
 /* Version string: */
 
 // c = release, d = volatile github dev, e = experimental branch
-#define VERSION "++2.62d"
+#define VERSION "++2.63d"
 
 /******************************************************
  *                                                    *
@@ -201,8 +201,8 @@
    (first value), and to keep in memory as candidates. The latter should be much
    higher than the former. */
 
-#define USE_AUTO_EXTRAS 50
-#define MAX_AUTO_EXTRAS (USE_AUTO_EXTRAS * 10)
+#define USE_AUTO_EXTRAS 128
+#define MAX_AUTO_EXTRAS (USE_AUTO_EXTRAS * 64)
 
 /* Scaling factor for the effector map used to skip some of the more
    expensive deterministic steps. The actual divisor is set to
@@ -400,5 +400,15 @@
 #endif
 #endif                           /* __APPLE__ || __FreeBSD__ || __OpenBSD__ */
 
+/* Extended forkserver option values */
+
+#define FS_OPT_ENABLED 0x8f000001
+#define FS_OPT_MAPSIZE 0x40000000
+#define FS_OPT_SNAPSHOT 0x20000000
+#define FS_OPT_AUTODICT 0x10000000
+#define FS_OPT_GET_MAPSIZE(x) (((x & 0x00fffffe) >> 1) + 1)
+#define FS_OPT_SET_MAPSIZE(x) \
+  (x <= 1 || x > MAP_SIZE || x > 0x1000000 ? 0 : ((x - 1) << 1))
+
 #endif                                                  /* ! _HAVE_CONFIG_H */
 
diff --git a/include/debug.h b/include/debug.h
index b3865c19..8824ff6b 100644
--- a/include/debug.h
+++ b/include/debug.h
@@ -28,10 +28,23 @@
 #include "types.h"
 #include "config.h"
 
+/* __FUNCTION__ is non-iso */
+#ifndef __FUNCTION__
+#ifdef __func__
+#define __FUNCTION__ __func__
+#else
+#define __FUNCTION__ "func_unknown"
+#endif
+#endif
+
 /*******************
  * Terminal colors *
  *******************/
 
+#ifndef MESSAGES_TO_STDOUT
+#define MESSAGES_TO_STDOUT
+#endif
+
 #ifdef USE_COLOR
 
 #define cBLK "\x1b[0;30m"
diff --git a/include/forkserver.h b/include/forkserver.h
index 9802b216..6fbaf612 100644
--- a/include/forkserver.h
+++ b/include/forkserver.h
@@ -51,6 +51,8 @@ typedef struct afl_forkserver {
       fsrv_st_fd;                       /* Fork server status pipe (read)   */
 
   u32 exec_tmout;                       /* Configurable exec timeout (ms)   */
+  u32 map_size;                         /* map size used by the target      */
+  u32 snapshot;                         /* is snapshot feature used         */
   u64 mem_limit;                        /* Memory cap for child (MB)        */
 
   u8 *out_file,                         /* File to fuzz, if any             */
@@ -62,13 +64,27 @@ typedef struct afl_forkserver {
 
   u8 use_fauxsrv;                       /* Fauxsrv for non-forking targets? */
 
+  u32 prev_timed_out;                   /* if prev forkserver run timed out */
+
+  u8 qemu_mode;                         /* if running in qemu mode or not   */
+
+  char *cmplog_binary;                  /* the name of the cmplog binary    */
+
+  /* Function to kick off the forkserver child */
+  void (*init_child_func)(struct afl_forkserver *fsrv, char **argv);
+
+  u8 *function_opt;                     /* for autodictionary: afl ptr      */
+
+  void (*function_ptr)(void *afl_tmp, u8 *mem, u32 len);
+
 } afl_forkserver_t;
 
-void handle_timeout(int sig);
 void afl_fsrv_init(afl_forkserver_t *fsrv);
-void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv);
+void afl_fsrv_init_dup(afl_forkserver_t *fsrv_to, afl_forkserver_t *from);
+void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
+                    volatile u8 *stop_soon_p, u8 debug_child_output);
+void afl_fsrv_killall(void);
 void afl_fsrv_deinit(afl_forkserver_t *fsrv);
-void afl_fsrv_killall();
 
 #ifdef __APPLE__
 #define MSG_FORK_ON_APPLE                                                    \
diff --git a/include/list.h b/include/list.h
index 1190931f..bb985c4f 100644
--- a/include/list.h
+++ b/include/list.h
@@ -34,8 +34,8 @@
 #include "debug.h"
 #include "afl-prealloc.h"
 
-#define LIST_PREALLOC_SIZE \
-  (64)             /* How many elements to allocate before malloc is needed */
+/* How many elements to allocate before malloc is needed */
+#define LIST_PREALLOC_SIZE (64)
 
 typedef struct list_element {
 
@@ -50,23 +50,24 @@ typedef struct list_element {
 typedef struct list {
 
   element_t element_prealloc_buf[LIST_PREALLOC_SIZE];
-  u32       element_prealloc_count;
+  s32       element_prealloc_count;
 
 } list_t;
 
 static inline element_t *get_head(list_t *list) {
 
-  return &list->element_prealloc_buf[0];
+  /* The first element is the head */
+  return list->element_prealloc_buf;
 
 }
 
-static void list_free_el(list_t *list, element_t *el) {
+static inline void list_free_el(list_t *list, element_t *el) {
 
   PRE_FREE(el, list->element_prealloc_count);
 
 }
 
-static void list_append(list_t *list, void *el) {
+static inline void list_append(list_t *list, void *el) {
 
   element_t *head = get_head(list);
   if (!head->next) {
@@ -97,23 +98,23 @@ static void list_append(list_t *list, void *el) {
    A return from this block will return from calling func.
 */
 
-#define LIST_FOREACH(list, type, block)                    \
-  do {                                                     \
-                                                           \
-    list_t *   li = (list);                                \
-    element_t *head = get_head((li));                      \
-    element_t *el_box = (head)->next;                      \
-    if (!el_box) FATAL("foreach over uninitialized list"); \
-    while (el_box != head) {                               \
-                                                           \
-      type *el = (type *)((el_box)->data);                 \
-      /* get next so el_box can be unlinked */             \
-      element_t *next = el_box->next;                      \
-      {block};                                             \
-      el_box = next;                                       \
-                                                           \
-    }                                                      \
-                                                           \
+#define LIST_FOREACH(list, type, block)                            \
+  do {                                                             \
+                                                                   \
+    list_t *   li = (list);                                        \
+    element_t *head = get_head((li));                              \
+    element_t *el_box = (head)->next;                              \
+    if (!el_box) FATAL("foreach over uninitialized list");         \
+    while (el_box != head) {                                       \
+                                                                   \
+      __attribute__((unused)) type *el = (type *)((el_box)->data); \
+      /* get next so el_box can be unlinked */                     \
+      element_t *next = el_box->next;                              \
+      {block};                                                     \
+      el_box = next;                                               \
+                                                                   \
+    }                                                              \
+                                                                   \
   } while (0);
 
 /* In foreach: remove the current el from the list */
@@ -143,7 +144,7 @@ static void list_append(list_t *list, void *el) {
 
 /* remove an item from the list */
 
-static void list_remove(list_t *list, void *remove_me) {
+static inline void list_remove(list_t *list, void *remove_me) {
 
   LIST_FOREACH(list, void, {
 
@@ -165,7 +166,7 @@ static void list_remove(list_t *list, void *remove_me) {
 
 /* Returns true if el is in list */
 
-static bool list_contains(list_t *list, void *contains_me) {
+static inline bool list_contains(list_t *list, void *contains_me) {
 
   LIST_FOREACH(list, void, {
 
diff --git a/include/snapshot-inl.h b/include/snapshot-inl.h
new file mode 100644
index 00000000..b73a001e
--- /dev/null
+++ b/include/snapshot-inl.h
@@ -0,0 +1,59 @@
+/*
+   american fuzzy lop++ - snapshot helpers routines
+   ------------------------------------------------
+
+   Originally written by Michal Zalewski
+
+   Forkserver design by Jann Horn <jannhorn@googlemail.com>
+
+   Now maintained by Marc Heuse <mh@mh-sec.de>,
+                     Heiko Eißfeldt <heiko.eissfeldt@hexco.de>,
+                     Andrea Fioraldi <andreafioraldi@gmail.com>,
+                     Dominik Maier <mail@dmnk.co>
+
+   Copyright 2016, 2017 Google Inc. All rights reserved.
+   Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ */
+
+// From AFL-Snapshot-LKM/include/afl_snapshot.h (must be kept synced)
+
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#define AFL_SNAPSHOT_FILE_NAME "/dev/afl_snapshot"
+
+#define AFL_SNAPSHOT_IOCTL_MAGIC 44313
+
+#define AFL_SNAPSHOT_IOCTL_DO _IO(AFL_SNAPSHOT_IOCTL_MAGIC, 1)
+#define AFL_SNAPSHOT_IOCTL_CLEAN _IO(AFL_SNAPSHOT_IOCTL_MAGIC, 2)
+
+static int afl_snapshot_dev_fd;
+
+static int afl_snapshot_init(void) {
+
+  afl_snapshot_dev_fd = open(AFL_SNAPSHOT_FILE_NAME, 0);
+  return afl_snapshot_dev_fd;
+
+}
+
+static int afl_snapshot_do() {
+
+  return ioctl(afl_snapshot_dev_fd, AFL_SNAPSHOT_IOCTL_DO);
+
+}
+
+static int afl_snapshot_clean(void) {
+
+  return ioctl(afl_snapshot_dev_fd, AFL_SNAPSHOT_IOCTL_CLEAN);
+
+}
+
diff --git a/include/types.h b/include/types.h
index bd4bd05d..f2a12953 100644
--- a/include/types.h
+++ b/include/types.h
@@ -46,7 +46,7 @@ typedef uint32_t u32;
 
  */
 
-#ifdef __x86_64__
+#if defined(__x86_64__) || defined(__aarch64__)
 typedef unsigned long long u64;
 #else
 typedef uint64_t u64;
@@ -58,8 +58,22 @@ typedef int32_t s32;
 typedef int64_t s64;
 
 #ifndef MIN
-#define MIN(_a, _b) ((_a) > (_b) ? (_b) : (_a))
-#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
+#define MIN(a, b)           \
+  ({                        \
+                            \
+    __typeof__(a) _a = (a); \
+    __typeof__(b) _b = (b); \
+    _a < _b ? _a : _b;      \
+                            \
+  })
+#define MAX(a, b)           \
+  ({                        \
+                            \
+    __typeof__(a) _a = (a); \
+    __typeof__(b) _b = (b); \
+    _a > _b ? _a : _b;      \
+                            \
+  })
 #endif                                                              /* !MIN */
 
 #define SWAP16(_x)                    \
@@ -120,9 +134,13 @@ typedef int64_t s64;
 #define likely(_x) (_x)
 #define unlikely(_x) (_x)
 #else
+#ifndef likely
 #define likely(_x) __builtin_expect(!!(_x), 1)
+#endif
+#ifndef unlikely
 #define unlikely(_x) __builtin_expect(!!(_x), 0)
 #endif
+#endif
 
 #endif                                                   /* ! _HAVE_TYPES_H */
 
diff --git a/libdislocator/Makefile b/libdislocator/Makefile
index 07d98a0d..37fd4fd6 100644
--- a/libdislocator/Makefile
+++ b/libdislocator/Makefile
@@ -18,18 +18,17 @@ HELPER_PATH  = $(PREFIX)/lib/afl
 
 VERSION     = $(shell grep '^\#define VERSION ' ../config.h | cut -d '"' -f2)
 
-CFLAGS      ?= -O3 -funroll-loops
-CFLAGS      += -I ../include/ -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign
+CFLAGS      ?= -O3 -funroll-loops -D_FORTIFY_SOURCE=2
+CFLAGS += -I ../include/ -Wall -g -Wno-pointer-sign
 
-ifdef USEHUGEPAGE
-	CFLAGS += -DUSEHUGEPAGE
-endif
+CFLAGS_ADD=$(USEHUGEPAGE:1=-DUSEHUGEPAGE)
+CFLAGS += $(CFLAGS_ADD)
 
 all: libdislocator.so
 
 VPATH = ..
 libdislocator.so: libdislocator.so.c ../config.h
-	$(CC) $(CFLAGS) -shared -fPIC $< -o ../$@ $(LDFLAGS)
+	$(CC) $(CFLAGS) -shared -fPIC libdislocator.so.c -o ../$@ $(LDFLAGS)
 
 .NOTPARALLEL: clean
 
diff --git a/libtokencap/Makefile b/libtokencap/Makefile
index 4889479b..75a8d337 100644
--- a/libtokencap/Makefile
+++ b/libtokencap/Makefile
@@ -20,30 +20,51 @@ MAN_PATH    ?= $(PREFIX)/man/man8
 
 VERSION     = $(shell grep '^\#define VERSION ' ../config.h | cut -d '"' -f2)
 
-CFLAGS      ?= -O3 -funroll-loops
-CFLAGS      += -I ../include/ -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign
-
-ifeq "$(shell uname)" "Linux"
-  TARGETS = libtokencap.so
-  LDFLAGS     += -ldl
-endif
-ifeq "$(shell uname)" "Darwin"
-  TARGETS = libtokencap.so
-  LDFLAGS     += -ldl
-endif
-ifeq "$(shell uname)" "FreeBSD"
-  TARGETS = libtokencap.so
-endif
-ifeq "$(shell uname)" "OpenBSD"
-  TARGETS = libtokencap.so
-endif
-ifeq "$(shell uname)" "NetBSD"
-  TARGETS = libtokencap.so
-endif
-ifeq "$(shell uname)" "DragonFly"
-  TARGETS = libtokencap.so
-  LDFLAGS     += -ldl
-endif
+CFLAGS      ?= -O3 -funroll-loops -D_FORTIFY_SOURCE=2
+CFLAGS += -I ../include/ -Wall -g -Wno-pointer-sign
+
+
+UNAME_S =$(shell uname -s)# GNU make
+UNAME_S:sh=uname -s       # BSD make
+_UNIQ=_QINU_
+
+     _OS_DL = $(_UNIQ)$(UNAME_S)
+    __OS_DL =     $(_OS_DL:$(_UNIQ)Linux=$(_UNIQ))
+   ___OS_DL =    $(__OS_DL:$(_UNIQ)Darwin=$(_UNIQ))
+  ____OS_DL =   $(___OS_DL:$(_UNIQ)DragonFly=$(_UNIQ))
+ _____OS_DL =  $(____OS_DL:$(_UNIQ)$(UNAME_S)=)
+______OS_DL = $(_____OS_DL:$(_UNIQ)="-ldl")
+
+   _OS_TARGET = $(____OS_DL:$(_UNIQ)FreeBSD=$(_UNIQ))
+  __OS_TARGET =   $(_OS_TARGET:$(_UNIQ)OpenBSD=$(_UNIQ))
+ ___OS_TARGET =  $(__OS_TARGET:$(_UNIQ)NetBSD=$(_UNIQ))
+____OS_TARGET = $(___OS_TARGET:$(_UNIQ)$(UNAME_S)=)
+
+TARGETS       = $(____OS_TARGET:$(_UNIQ)=libtokencap.so)
+
+LDFLAGS     += $(______OS_DL)
+
+#ifeq "$(shell uname)" "Linux"
+#  TARGETS = libtokencap.so
+#  LDFLAGS     += -ldl
+#endif
+#ifeq "$(shell uname)" "Darwin"
+#  TARGETS = libtokencap.so
+#  LDFLAGS     += -ldl
+#endif
+#ifeq "$(shell uname)" "FreeBSD"
+#  TARGETS = libtokencap.so
+#endif
+#ifeq "$(shell uname)" "OpenBSD"
+#  TARGETS = libtokencap.so
+#endif
+#ifeq "$(shell uname)" "NetBSD"
+#  TARGETS = libtokencap.so
+#endif
+#ifeq "$(shell uname)" "DragonFly"
+#  TARGETS = libtokencap.so
+#  LDFLAGS     += -ldl
+#endif
 all: $(TARGETS)
 
 VPATH = ..
@@ -52,6 +73,16 @@ libtokencap.so: libtokencap.so.c ../config.h
 
 .NOTPARALLEL: clean
 
+debug:
+	@echo $(UNAME_S)$(_UNIQ) | hexdump -C
+	@echo from $(____OS_DL) : $(_UNIQ)$(UNAME_S) = -\> $(_____OS_DL)
+	@echo from $(_____OS_DL) : $(_UNIQ) = -ldl -\> $(______OS_DL)
+	@echo from $(____OS_DL) : $(_UNIQ)FreeBSD = $(_UNIQ) -\> $(_OS_TARGET)
+	@echo from $(_OS_TARGET) : $(_UNIQ)OpenBSD = $(_UNIQ) -\> $(__OS_TARGET)
+	@echo from $(__OS_TARGET) : $(_UNIQ)NetBSD = $(_UNIQ) -\> $(___OS_TARGET)
+	@echo from $(___OS_TARGET) : $(_UNIQ)$(_UNIQ) = -\> $(____OS_TARGET)
+	@echo from $(____OS_TARGET) : $(_UNIQ) = libtokencap.so -\> $(TARGETS)
+
 clean:
 	rm -f *.o *.so *~ a.out core core.[1-9][0-9]*
 	rm -f ../libtokencap.so
@@ -60,4 +91,3 @@ install: all
 	install -m 755 -d $${DESTDIR}$(HELPER_PATH)
 	install -m 755 ../libtokencap.so $${DESTDIR}$(HELPER_PATH)
 	install -m 644 -T README.md $${DESTDIR}$(DOC_PATH)/README.tokencap.md
-
diff --git a/llvm_mode/GNUmakefile b/llvm_mode/GNUmakefile
new file mode 100644
index 00000000..d6a00580
--- /dev/null
+++ b/llvm_mode/GNUmakefile
@@ -0,0 +1,364 @@
+#
+# american fuzzy lop++ - LLVM instrumentation
+# -----------------------------------------
+#
+# Written by Laszlo Szekeres <lszekeres@google.com> and
+#            Michal Zalewski
+#
+# LLVM integration design comes from Laszlo Szekeres.
+#
+# Copyright 2015, 2016 Google Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+
+# For Heiko:
+#TEST_MMAP=1
+HASH=\#
+
+PREFIX      ?= /usr/local
+HELPER_PATH ?= $(PREFIX)/lib/afl
+BIN_PATH    ?= $(PREFIX)/bin
+DOC_PATH    ?= $(PREFIX)/share/doc/afl
+MISC_PATH   ?= $(PREFIX)/share/afl
+MAN_PATH    ?= $(PREFIX)/man/man8
+
+VERSION     = $(shell grep '^$(HASH)define VERSION ' ../config.h | cut -d '"' -f2)
+
+ifeq "$(shell uname)" "OpenBSD"
+  LLVM_CONFIG ?= $(BIN_PATH)/llvm-config
+  HAS_OPT = $(shell test -x $(BIN_PATH)/opt && echo 0 || echo 1)
+  ifeq "$(HAS_OPT)" "1"
+    $(error llvm_mode needs a complete llvm installation (versions 3.8.0 up to 11) -> e.g. "pkg_add llvm-7.0.1p9")
+  endif
+else
+  LLVM_CONFIG ?= llvm-config
+endif
+
+LLVMVER  = $(shell $(LLVM_CONFIG) --version 2>/dev/null )
+LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[2-9]' && echo 1 || echo 0 )
+LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 )
+LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//')
+LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null)
+LLVM_STDCXX = gnu++11
+LLVM_APPLE = $(shell clang -v 2>&1 | grep -iq apple && echo 1 || echo 0)
+LLVM_LTO   = 0
+
+ifeq "$(LLVMVER)" ""
+  $(warning [!] llvm_mode needs llvm-config, which was not found)
+endif
+
+ifeq "$(LLVM_UNSUPPORTED)" "1"
+  $(warning llvm_mode only supports llvm versions 3.8.0 up to 11)
+endif
+
+ifeq "$(LLVM_MAJOR)" "9"
+  $(info [+] llvm_mode detected llvm 9, enabling neverZero implementation)
+endif
+
+ifeq "$(LLVM_NEW_API)" "1"
+  $(info [+] llvm_mode detected llvm 10+, enabling neverZero implementation and c++14)
+  LLVM_STDCXX = c++14
+endif
+
+ifeq "$(LLVM_MAJOR)" "11"
+  $(info [+] llvm_mode detected llvm 11, enabling afl-clang-lto LTO implementation)
+  LLVM_LTO = 1
+endif
+
+ifeq "$(LLVM_LTO)" "0"
+  $(info [+] llvm_mode detected llvm < 11, afl-clang-lto LTO will not be build.)
+endif
+
+ifeq "$(LLVM_APPLE)" "1"
+  $(warning llvm_mode will not compile with Xcode clang...)
+endif
+
+# We were using llvm-config --bindir to get the location of clang, but
+# this seems to be busted on some distros, so using the one in $PATH is
+# probably better.
+
+CC         = $(LLVM_BINDIR)/clang
+CXX        = $(LLVM_BINDIR)/clang++
+
+ifeq "$(shell test -e $(CC) || echo 1 )" "1"
+  # llvm-config --bindir may not providing a valid path, so ...
+  ifeq "$(shell test -e '$(BIN_DIR)/clang' && echo 1)" "1"
+    # we found one in the local install directory, lets use these
+    CC         = $(BIN_DIR)/clang
+    CXX        = $(BIN_DIR)/clang++
+  else
+    # hope for the best
+    $(warning we have trouble finding clang/clang++ - llvm-config is not helping us)
+    CC         = clang
+    CXX        = clang++
+  endif
+endif
+
+# sanity check.
+# Are versions of clang --version and llvm-config --version equal?
+CLANGVER = $(shell $(CC) --version | sed -E -ne '/^.*version\ ([0-9]\.[0-9]\.[0-9]).*/s//\1/p')
+
+ifneq "$(CLANGVER)" "$(LLVMVER)"
+  CC = $(shell $(LLVM_CONFIG) --bindir)/clang
+  CXX = $(shell $(LLVM_CONFIG) --bindir)/clang++
+endif
+
+# After we set CC/CXX we can start makefile magic tests
+
+ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+	CFLAGS_OPT = -march=native
+endif
+
+ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+        AFL_CLANG_FLTO ?= -flto=full
+else
+ ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=thin -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+        AFL_CLANG_FLTO ?= -flto=thin
+ else
+  ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+        AFL_CLANG_FLTO ?= -flto
+  endif
+ endif
+endif
+
+ifeq "$(LLVM_LTO)" "1"
+  ifneq "$(AFL_CLANG_FLTO)" ""
+    ifeq "$(AFL_REAL_LD)" ""
+      ifneq "$(shell readlink $(LLVM_BINDIR)/ld.lld 2>&1)" ""
+        AFL_REAL_LD = $(LLVM_BINDIR)/ld.lld
+      else
+        $(warn ld.lld not found, can not enable LTO mode)
+        LLVM_LTO = 0
+      endif
+    endif
+  endif
+endif
+
+AFL_CLANG_FUSELD=
+ifneq "$(AFL_CLANG_FLTO)" ""
+ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -fuse-ld=`command -v ld` -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+  AFL_CLANG_FUSELD=1
+endif
+endif
+
+CFLAGS          ?= -O3 -funroll-loops -D_FORTIFY_SOURCE=2
+override CFLAGS += -Wall \
+               -g -Wno-pointer-sign -I ../include/ \
+               -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \
+               -DLLVM_BINDIR=\"$(LLVM_BINDIR)\" -DVERSION=\"$(VERSION)\" \
+               -DLLVM_VERSION=\"$(LLVMVER)\"  -DAFL_CLANG_FLTO=\"$(AFL_CLANG_FLTO)\" \
+               -DAFL_REAL_LD=\"$(AFL_REAL_LD)\" -DAFL_CLANG_FUSELD=\"$(AFL_CLANG_FUSELD)\" -Wno-unused-function
+ifdef AFL_TRACE_PC
+  $(info Compile option AFL_TRACE_PC is deprecated, just set AFL_LLVM_INSTRUMENT=PCGUARD to activate when compiling targets )
+endif
+
+CXXFLAGS          ?= -O3 -funroll-loops -D_FORTIFY_SOURCE=2
+override CXXFLAGS += -Wall -g -I ../include/ \
+                     -DVERSION=\"$(VERSION)\" -Wno-variadic-macros
+
+CLANG_CFL    = `$(LLVM_CONFIG) --cxxflags` -Wl,-znodelete -fno-rtti -fpic $(CXXFLAGS)
+CLANG_LFL    = `$(LLVM_CONFIG) --ldflags` $(LDFLAGS)
+
+
+# User teor2345 reports that this is required to make things work on MacOS X.
+ifeq "$(shell uname)" "Darwin"
+  CLANG_LFL += -Wl,-flat_namespace -Wl,-undefined,suppress
+endif
+
+ifeq "$(shell uname)" "OpenBSD"
+  CLANG_LFL += `$(LLVM_CONFIG) --libdir`/libLLVM.so
+endif
+
+ifeq "$(shell echo '$(HASH)include <sys/ipc.h>@$(HASH)include <sys/shm.h>@int main() { int _id = shmget(IPC_PRIVATE, 65536, IPC_CREAT | IPC_EXCL | 0600); shmctl(_id, IPC_RMID, 0); return 0;}' | tr @ '\n' | $(CC) -x c - -o .test2 2>/dev/null && echo 1 || echo 0 ; rm -f .test2 )" "1"
+        SHMAT_OK=1
+else
+        SHMAT_OK=0
+        CFLAGS+=-DUSEMMAP=1
+        LDFLAGS += -lrt
+endif
+
+ifeq "$(TEST_MMAP)" "1"
+        SHMAT_OK=0
+        CFLAGS+=-DUSEMMAP=1
+        LDFLAGS += -lrt
+endif
+
+  PROGS      = ../afl-clang-fast ../afl-llvm-pass.so ../afl-llvm-lto-whitelist.so ../afl-llvm-lto-instrumentation.so ../libLLVMInsTrim.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so ../cmplog-routines-pass.so ../cmplog-instructions-pass.so
+
+# If prerequisites are not given, warn, do not build anything, and exit with code 0
+ifeq "$(LLVMVER)" ""
+  NO_BUILD = 1
+endif
+
+ifneq "$(LLVM_UNSUPPORTED)$(LLVM_APPLE)" "00"
+  NO_BUILD = 1
+endif
+
+ifeq "$(NO_BUILD)" "1"
+  TARGETS = no_build
+else
+  TARGETS = test_shm test_deps $(PROGS) afl-clang-fast.8 test_build all_done
+endif
+
+LLVM_MIN_4_0_1 = $(shell awk 'function tonum(ver, a) {split(ver,a,"."); return a[1]*1000000+a[2]*1000+a[3]} BEGIN { exit tonum(ARGV[1]) >= tonum(ARGV[2]) }' $(LLVMVER) 4.0.1; echo $$?)
+
+all: $(TARGETS)
+
+ifeq "$(SHMAT_OK)" "1"
+
+test_shm:
+	@echo "[+] shmat seems to be working."
+	@rm -f .test2
+
+else
+
+test_shm:
+	@echo "[-] shmat seems not to be working, switching to mmap implementation"
+
+endif
+
+no_build:
+	@printf "%b\\n" "\\033[0;31mPrerequisites are not met, skipping build llvm_mode\\033[0m"
+
+test_deps:
+	@echo "[*] Checking for working 'llvm-config'..."
+ ifneq "$(LLVM_APPLE)" "1"
+	@type $(LLVM_CONFIG) >/dev/null 2>&1 || ( echo "[-] Oops, can't find 'llvm-config'. Install clang or set \$$LLVM_CONFIG or \$$PATH beforehand."; echo "    (Sometimes, the binary will be named llvm-config-3.5 or something like that.)"; exit 1 )
+ endif
+	@echo "[*] Checking for working '$(CC)'..."
+	@type $(CC) >/dev/null 2>&1 || ( echo "[-] Oops, can't find '$(CC)'. Make sure that it's in your \$$PATH (or set \$$CC and \$$CXX)."; exit 1 )
+	@echo "[*] Checking for matching versions of '$(CC)' and '$(LLVM_CONFIG)'"
+ifneq "$(CLANGVER)" "$(LLVMVER)"
+	@echo "[!] WARNING: we have llvm-config version $(LLVMVER) and a clang version $(CLANGVER)"
+	@echo "[!] Retrying with the clang compiler from llvm: CC=`llvm-config --bindir`/clang"
+else
+	@echo "[*] We have llvm-config version $(LLVMVER) with a clang version $(CLANGVER), good."
+endif
+	@echo "[*] Checking for '../afl-showmap'..."
+	@test -f ../afl-showmap || ( echo "[-] Oops, can't find '../afl-showmap'. Be sure to compile AFL first."; exit 1 )
+	@echo "[+] All set and ready to build."
+
+afl-common.o: ../src/afl-common.c
+	$(CC) $(CFLAGS) -c $< -o $@ $(LDFLAGS)
+
+../afl-clang-fast: afl-clang-fast.c afl-common.o | test_deps
+	$(CC) $(CFLAGS) $< afl-common.o -o $@ $(LDFLAGS) -DCFLAGS_OPT=\"$(CFLAGS_OPT)\"
+	ln -sf afl-clang-fast ../afl-clang-fast++
+ifneq "$(AFL_CLANG_FLTO)" ""
+ifeq "$(LLVM_LTO)" "1"
+	ln -sf afl-clang-fast ../afl-clang-lto
+	ln -sf afl-clang-fast ../afl-clang-lto++
+endif
+endif
+
+../libLLVMInsTrim.so: LLVMInsTrim.so.cc MarkNodes.cc | test_deps
+	-$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL)
+
+../afl-llvm-pass.so: afl-llvm-pass.so.cc | test_deps
+ifeq "$(LLVM_MIN_4_0_1)" "0"
+	$(info [!] N-gram branch coverage instrumentation is not available for llvm version $(LLVMVER))
+endif
+	$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL)
+
+../afl-llvm-lto-whitelist.so: afl-llvm-lto-whitelist.so.cc
+ifeq "$(LLVM_LTO)" "1"
+	$(CXX) $(CLANG_CFL) -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL)
+endif
+
+../afl-llvm-lto-instrumentation.so: afl-llvm-lto-instrumentation.so.cc
+ifeq "$(LLVM_LTO)" "1"
+	$(CXX) $(CLANG_CFL) -Wno-writable-strings -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL)
+	$(CC) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto.o
+	@$(CC) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -m64 -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto-64.o 2>/dev/null; if [ "$$?" = "0" ]; then : ; fi
+	@$(CC) $(CFLAGS) -Wno-unused-result -O0 $(AFL_CLANG_FLTO) -m32 -fPIC -c afl-llvm-rt-lto.o.c -o ../afl-llvm-rt-lto-32.o 2>/dev/null; if [ "$$?" = "0" ]; then : ; fi
+endif
+
+# laf
+../split-switches-pass.so:	split-switches-pass.so.cc | test_deps
+	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL)
+../compare-transform-pass.so:	compare-transform-pass.so.cc | test_deps
+	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL)
+../split-compares-pass.so:	split-compares-pass.so.cc | test_deps
+	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL)
+# /laf
+
+../cmplog-routines-pass.so:	cmplog-routines-pass.cc | test_deps
+	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL)
+
+../cmplog-instructions-pass.so:	cmplog-instructions-pass.cc | test_deps
+	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL)
+
+../afl-llvm-rt.o: afl-llvm-rt.o.c | test_deps
+	$(CC) $(CFLAGS) -Wno-unused-result -fPIC -c $< -o $@
+
+../afl-llvm-rt-32.o: afl-llvm-rt.o.c | test_deps
+	@printf "[*] Building 32-bit variant of the runtime (-m32)... "
+	@$(CC) $(CFLAGS) -Wno-unused-result -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi
+
+../afl-llvm-rt-64.o: afl-llvm-rt.o.c | test_deps
+	@printf "[*] Building 64-bit variant of the runtime (-m64)... "
+	@$(CC) $(CFLAGS) -Wno-unused-result -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi
+
+test_build: $(PROGS)
+	@echo "[*] Testing the CC wrapper and instrumentation output..."
+	unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_CC=$(CC) AFL_LLVM_LAF_SPLIT_SWITCHES=1 AFL_LLVM_LAF_TRANSFORM_COMPARES=1 AFL_LLVM_LAF_SPLIT_COMPARES=1 ../afl-clang-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS)
+	ASAN_OPTIONS=detect_leaks=0 ../afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
+	echo 1 | ASAN_OPTIONS=detect_leaks=0 ../afl-showmap -m none -q -o .test-instr1 ./test-instr
+	@rm -f test-instr
+	@cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation does not seem to be behaving correctly!"; echo; echo "Please post to https://github.com/AFLplusplus/AFLplusplus/issues to troubleshoot the issue."; echo; exit 1; fi
+	@echo "[+] All right, the instrumentation seems to be working!"
+
+all_done: test_build
+	@echo "[+] All done! You can now use '../afl-clang-fast' to compile programs."
+
+.NOTPARALLEL: clean
+
+install: all
+	install -d -m 755 $${DESTDIR}$(BIN_PATH) $${DESTDIR}$(HELPER_PATH) $${DESTDIR}$(DOC_PATH) $${DESTDIR}$(MISC_PATH)
+	if [ -f ../afl-clang-fast -a -f ../libLLVMInsTrim.so -a -f ../afl-llvm-rt.o ]; then set -e; install -m 755 ../afl-clang-fast $${DESTDIR}$(BIN_PATH); ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-fast++; install -m 755 ../libLLVMInsTrim.so ../afl-llvm-pass.so ../afl-llvm-rt.o $${DESTDIR}$(HELPER_PATH); fi
+	if [ -f ../afl-clang-lto ]; then set -e; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto; ln -sf afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang-lto++; install -m 755 ../afl-llvm-lto-instrumentation.so ../afl-llvm-rt-lto*.o ../afl-llvm-lto-whitelist.so $${DESTDIR}$(HELPER_PATH); fi
+	if [ -f ../afl-llvm-rt-32.o ]; then set -e; install -m 755 ../afl-llvm-rt-32.o $${DESTDIR}$(HELPER_PATH); fi
+	if [ -f ../afl-llvm-rt-64.o ]; then set -e; install -m 755 ../afl-llvm-rt-64.o $${DESTDIR}$(HELPER_PATH); fi
+	if [ -f ../compare-transform-pass.so ]; then set -e; install -m 755 ../compare-transform-pass.so $${DESTDIR}$(HELPER_PATH); fi
+	if [ -f ../split-compares-pass.so ]; then set -e; install -m 755 ../split-compares-pass.so $${DESTDIR}$(HELPER_PATH); fi
+	if [ -f ../split-switches-pass.so ]; then set -e; install -m 755 ../split-switches-pass.so $${DESTDIR}$(HELPER_PATH); fi
+	if [ -f ../cmplog-instructions-pass.so ]; then set -e; install -m 755 ../cmplog-*-pass.so $${DESTDIR}$(HELPER_PATH); fi
+	set -e; if [ -f ../afl-clang-fast ] ; then ln -sf ../afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang ; ln -sf ../afl-clang-fast $${DESTDIR}$(BIN_PATH)/afl-clang++ ; else ln -sf ../afl-gcc $${DESTDIR}$(BIN_PATH)/afl-clang ; ln -sf ../afl-gcc $${DESTDIR}$(BIN_PATH)/afl-clang++; fi
+	install -m 644 README.*.md $${DESTDIR}$(DOC_PATH)/
+	install -m 644 -T README.md $${DESTDIR}$(DOC_PATH)/README.llvm_mode.md
+
+vpath  % ..
+%.8: %
+	@echo .TH $* 8 `date "+%Y-%m-%d"` "afl++" > ../$@
+	@echo .SH NAME >> ../$@
+	@echo .B $* >> ../$@
+	@echo >> ../$@
+	@echo .SH SYNOPSIS >> ../$@
+	@../$* -h 2>&1 | head -n 3 | tail -n 1 | sed 's/^\.\///' >> ../$@
+	@echo >> ../$@
+	@echo .SH OPTIONS >> ../$@
+	@echo .nf >> ../$@
+	@../$* -h 2>&1 | tail -n +4 >> ../$@
+	@echo >> ../$@
+	@echo .SH AUTHOR >> ../$@
+	@echo "afl++ was written by Michal \"lcamtuf\" Zalewski and is maintained by Marc \"van Hauser\" Heuse <mh@mh-sec.de>, Heiko \"hexcoder-\" Eissfeldt <heiko.eissfeldt@hexco.de>, Andrea Fioraldi <andreafioraldi@gmail.com> and Dominik Maier <domenukk@gmail.com>" >> ../$@
+	@echo  The homepage of afl++ is: https://github.com/AFLplusplus/AFLplusplus >> ../$@
+	@echo >> ../$@
+	@echo .SH LICENSE >> ../$@
+	@echo Apache License Version 2.0, January 2004 >> ../$@
+	ln -sf afl-clang-fast.8 ../afl-clang-fast++.8
+ifneq "$(AFL_CLANG_FLTO)" ""
+ifeq "$(LLVM_LTO)" "1"
+	ln -sf afl-clang-fast.8 ../afl-clang-lto.8
+	ln -sf afl-clang-fast.8 ../afl-clang-lto++.8
+endif
+endif
+
+clean:
+	rm -f *.o *.so *~ a.out core core.[1-9][0-9]* .test2 test-instr .test-instr0 .test-instr1 *.dwo
+	rm -f $(PROGS) afl-common.o ../afl-clang-fast++ ../afl-clang-lto ../afl-clang-lto++ ../afl-clang*.8 ../ld ../afl-ld ../afl-llvm-rt*.o
diff --git a/llvm_mode/LLVMInsTrim.so.cc b/llvm_mode/LLVMInsTrim.so.cc
index a94eb907..8b23942c 100644
--- a/llvm_mode/LLVMInsTrim.so.cc
+++ b/llvm_mode/LLVMInsTrim.so.cc
@@ -509,10 +509,11 @@ struct InsTrim : public ModulePass {
     if (!be_quiet) {
 
       char modeline[100];
-      snprintf(modeline, sizeof(modeline), "%s%s%s%s",
+      snprintf(modeline, sizeof(modeline), "%s%s%s%s%s",
                getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
                getenv("AFL_USE_ASAN") ? ", ASAN" : "",
                getenv("AFL_USE_MSAN") ? ", MSAN" : "",
+               getenv("AFL_USE_CFISAN") ? ", CFISAN" : "",
                getenv("AFL_USE_UBSAN") ? ", UBSAN" : "");
 
       OKF("Instrumented %u locations (%llu, %llu) (%s mode)\n", total_instr,
diff --git a/llvm_mode/Makefile b/llvm_mode/Makefile
index 5ce0e579..0b306dde 100644
--- a/llvm_mode/Makefile
+++ b/llvm_mode/Makefile
@@ -1,349 +1,2 @@
-#
-# american fuzzy lop++ - LLVM instrumentation
-# -----------------------------------------
-#
-# Written by Laszlo Szekeres <lszekeres@google.com> and
-#            Michal Zalewski
-#
-# LLVM integration design comes from Laszlo Szekeres.
-#
-# Copyright 2015, 2016 Google Inc. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-
-# For Heiko:
-#TEST_MMAP=1
-HASH=\#
-
-PREFIX      ?= /usr/local
-HELPER_PATH  = $(PREFIX)/lib/afl
-BIN_PATH     = $(PREFIX)/bin
-
-VERSION     = $(shell grep '^$(HASH)define VERSION ' ../config.h | cut -d '"' -f2)
-
-ifeq "$(shell uname)" "OpenBSD"
-  LLVM_CONFIG ?= $(BIN_PATH)/llvm-config
-  HAS_OPT = $(shell test -x $(BIN_PATH)/opt && echo 0 || echo 1)
-  ifeq "$(HAS_OPT)" "1"
-    $(error llvm_mode needs a complete llvm installation (versions 3.8.0 up to 11) -> e.g. "pkg_add llvm-7.0.1p9")
-  endif
-else
-  LLVM_CONFIG ?= llvm-config
-endif
-
-LLVMVER  = $(shell $(LLVM_CONFIG) --version 2>/dev/null )
-LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-7]|^1[2-9]' && echo 1 || echo 0 )
-LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 )
-LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//')
-LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null)
-LLVM_STDCXX = gnu++11
-LLVM_APPLE = $(shell clang -v 2>&1 | grep -iq apple && echo 1 || echo 0)
-LLVM_LTO   = 0
-
-ifeq "$(LLVMVER)" ""
-  $(warning [!] llvm_mode needs llvm-config, which was not found)
-endif
-
-ifeq "$(LLVM_UNSUPPORTED)" "1"
-  $(warning llvm_mode only supports llvm versions 3.8.0 up to 11)
-endif
-
-ifeq "$(LLVM_MAJOR)" "9"
-  $(info [+] llvm_mode detected llvm 9, enabling neverZero implementation)
-  $(info [+] llvm_mode detected llvm 9, enabling afl-clang-lto LTO implementation)
-  LLVM_LTO = 1
-endif
-
-ifeq "$(LLVM_NEW_API)" "1"
-  $(info [+] llvm_mode detected llvm 10+, enabling neverZero implementation and c++14)
-  $(info [+] llvm_mode detected llvm 9, enabling afl-clang-lto LTO implementation)
-  LLVM_STDCXX = c++14
-  LLVM_LTO = 1
-endif
-
-ifeq "$(LLVM_LTO)" "0"
-  $(info [+] llvm_mode detected llvm < 9, afl-clang-lto LTO will not be build.)
-endif
-
-ifeq "$(LLVM_APPLE)" "1"
-  $(warning llvm_mode will not compile with Xcode clang...)
-endif
-
-# We were using llvm-config --bindir to get the location of clang, but
-# this seems to be busted on some distros, so using the one in $PATH is
-# probably better.
-
-CC         = $(LLVM_BINDIR)/clang
-CXX        = $(LLVM_BINDIR)/clang++
-
-ifeq "$(shell test -e $(CC) || echo 1 )" "1"
-  # llvm-config --bindir may not providing a valid path, so ...
-  ifeq "$(shell test -e '$(BIN_DIR)/clang' && echo 1)" "1"
-    # we found one in the local install directory, lets use these
-    CC         = $(BIN_DIR)/clang
-    CXX        = $(BIN_DIR)/clang++
-  else
-    # hope for the best
-    $(warning we have trouble finding clang/clang++ - llvm-config is not helping us)
-    CC         = clang
-    CXX        = clang++
-  endif
-endif
-
-# sanity check.
-# Are versions of clang --version and llvm-config --version equal?
-CLANGVER = $(shell $(CC) --version | sed -E -ne '/^.*version\ ([0-9]\.[0-9]\.[0-9]).*/s//\1/p')
-
-ifneq "$(CLANGVER)" "$(LLVMVER)"
-  CC = $(shell $(LLVM_CONFIG) --bindir)/clang
-  CXX = $(shell $(LLVM_CONFIG) --bindir)/clang++
-endif
-
-# After we set CC/CXX we can start makefile magic tests
-
-ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-	CFLAGS_OPT = -march=native
-endif
-
-ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-        AFL_CLANG_FLTO ?= -flto=full
-else
- ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto=thin -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-        AFL_CLANG_FLTO ?= -flto=thin
- else
-  ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -flto -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-        AFL_CLANG_FLTO ?= -flto
-  endif
- endif
-endif
-
-ifneq "$(AFL_CLANG_FLTO)" ""
-ifeq "$(AFL_REAL_LD)" ""
-  AFL_REAL_LD = $(shell readlink /bin/ld 2>/dev/null)
-  ifeq "$(AFL_REAL_LD)" ""
-    AFL_REAL_LD = $(shell readlink /usr/bin/ld 2>/dev/null)
-  endif
-endif
-endif
-
-CFLAGS          ?= -O3 -funroll-loops
-override CFLAGS = -Wall \
-               -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign -I ../include/ \
-               -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \
-               -DLLVM_BINDIR=\"$(LLVM_BINDIR)\" -DVERSION=\"$(VERSION)\" \
-               -DLLVM_VERSION=\"$(LLVMVER)\"  -DAFL_CLANG_FLTO=\"$(AFL_CLANG_FLTO)\" \
-               -DAFL_REAL_LD=\"$(AFL_REAL_LD)\" -Wno-unused-function
-ifdef AFL_TRACE_PC
-  CFLAGS    += -DUSE_TRACE_PC=1
-endif
-
-CXXFLAGS          ?= -O3 -funroll-loops
-override CXXFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -I ../include/ \
-                     -DVERSION=\"$(VERSION)\" -Wno-variadic-macros
-
-CLANG_CFL    = `$(LLVM_CONFIG) --cxxflags` -Wl,-znodelete -fno-rtti -fpic $(CXXFLAGS)
-CLANG_LFL    = `$(LLVM_CONFIG) --ldflags` $(LDFLAGS)
-
-
-# User teor2345 reports that this is required to make things work on MacOS X.
-ifeq "$(shell uname)" "Darwin"
-  CLANG_LFL += -Wl,-flat_namespace -Wl,-undefined,suppress
-endif
-
-ifeq "$(shell uname)" "OpenBSD"
-  CLANG_LFL += `$(LLVM_CONFIG) --libdir`/libLLVM.so
-endif
-
-ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -x c - -fuse-ld=`type ld | awk '{print $$NF}'` -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-  CFLAGS += -DAFL_CLANG_FUSELD=1
-endif
-
-ifeq "$(shell echo '$(HASH)include <sys/ipc.h>@$(HASH)include <sys/shm.h>@int main() { int _id = shmget(IPC_PRIVATE, 65536, IPC_CREAT | IPC_EXCL | 0600); shmctl(_id, IPC_RMID, 0); return 0;}' | tr @ '\n' | $(CC) -x c - -o .test2 2>/dev/null && echo 1 || echo 0 ; rm -f .test2 )" "1"
-        SHMAT_OK=1
-else
-        SHMAT_OK=0
-        CFLAGS+=-DUSEMMAP=1
-        LDFLAGS += -lrt
-endif
-
-ifeq "$(TEST_MMAP)" "1"
-        SHMAT_OK=0
-        CFLAGS+=-DUSEMMAP=1
-        LDFLAGS += -lrt
-endif
-
-ifndef AFL_TRACE_PC
-  PROGS      = ../afl-clang-fast ../afl-ld ../afl-llvm-pass.so ../afl-llvm-lto-whitelist.so ../afl-llvm-lto-instrumentation.so ../libLLVMInsTrim.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so ../cmplog-routines-pass.so ../cmplog-instructions-pass.so
-else
-  PROGS      = ../afl-clang-fast ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o ../compare-transform-pass.so ../split-compares-pass.so ../split-switches-pass.so ../cmplog-routines-pass.so ../cmplog-instructions-pass.so
-endif
-
-# If prerequisites are not given, warn, do not build anything, and exit with code 0
-ifeq "$(LLVMVER)" ""
-  NO_BUILD = 1
-endif
-
-ifneq "$(LLVM_UNSUPPORTED)$(LLVM_APPLE)" "00"
-  NO_BUILD = 1
-endif
-
-ifeq "$(NO_BUILD)" "1"
-  TARGETS = no_build
-else
-  TARGETS = test_shm test_deps $(PROGS) afl-clang-fast.8 test_build all_done
-endif
-
-all: $(TARGETS)
-
-ifeq "$(SHMAT_OK)" "1"
-
-test_shm:
-	@echo "[+] shmat seems to be working."
-	@rm -f .test2
-
-else
-
-test_shm:
-	@echo "[-] shmat seems not to be working, switching to mmap implementation"
-
-endif
-
-no_build:
-	@printf "%b\\n" "\\033[0;31mPrerequisites are not met, skipping build llvm_mode\\033[0m"
-
-test_deps:
-ifndef AFL_TRACE_PC
-	@echo "[*] Checking for working 'llvm-config'..."
- ifneq "$(LLVM_APPLE)" "1"
-	@type $(LLVM_CONFIG) >/dev/null 2>&1 || ( echo "[-] Oops, can't find 'llvm-config'. Install clang or set \$$LLVM_CONFIG or \$$PATH beforehand."; echo "    (Sometimes, the binary will be named llvm-config-3.5 or something like that.)"; exit 1 )
- endif
-else
-	@echo "[!] Note: using -fsanitize=trace-pc mode (this will fail with older LLVM)."
-endif
-	@echo "[*] Checking for working '$(CC)'..."
-	@type $(CC) >/dev/null 2>&1 || ( echo "[-] Oops, can't find '$(CC)'. Make sure that it's in your \$$PATH (or set \$$CC and \$$CXX)."; exit 1 )
-	@echo "[*] Checking for matching versions of '$(CC)' and '$(LLVM_CONFIG)'"
-ifneq "$(CLANGVER)" "$(LLVMVER)"
-	@echo "[!] WARNING: we have llvm-config version $(LLVMVER) and a clang version $(CLANGVER)"
-	@echo "[!] Retrying with the clang compiler from llvm: CC=`llvm-config --bindir`/clang"
-else
-	@echo "[*] We have llvm-config version $(LLVMVER) with a clang version $(CLANGVER), good."
-endif
-	@echo "[*] Checking for '../afl-showmap'..."
-	@test -f ../afl-showmap || ( echo "[-] Oops, can't find '../afl-showmap'. Be sure to compile AFL first."; exit 1 )
-	@echo "[+] All set and ready to build."
-
-afl-common.o: ../src/afl-common.c
-	$(CC) $(CFLAGS) -c $< -o $@ $(LDFLAGS)
-
-../afl-clang-fast: afl-clang-fast.c afl-common.o | test_deps
-	$(CC) $(CFLAGS) $< afl-common.o -o $@ $(LDFLAGS) -DCFLAGS_OPT=\"$(CFLAGS_OPT)\"
-	ln -sf afl-clang-fast ../afl-clang-fast++
-ifneq "$(AFL_CLANG_FLTO)" ""
-ifeq "$(LLVM_LTO)" "1"
-	ln -sf afl-clang-fast ../afl-clang-lto
-	ln -sf afl-clang-fast ../afl-clang-lto++
-endif
-endif
-
-../afl-ld: afl-ld.c
-ifneq "$(AFL_CLANG_FLTO)" ""
-ifeq "$(LLVM_LTO)" "1"
-	$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
-	ln -sf afl-ld ../ld
-	@rm -f .test-instr
-	@-export AFL_QUIET=1 AFL_PATH=.. PATH="..:$(PATH)" ; ../afl-clang-lto -Wl,--afl -o .test-instr ../test-instr.c && echo "[+] afl-clang-lto and afl-ld seem to work fine :)" || echo "[!] WARNING: clang seems to have a hardcoded "'/bin/ld'" - check README.lto"
-	@rm -f .test-instr
-endif
-endif
-../libLLVMInsTrim.so: LLVMInsTrim.so.cc MarkNodes.cc | test_deps
-	-$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL)
-
-../afl-llvm-pass.so: afl-llvm-pass.so.cc | test_deps
-	$(CXX) $(CLANG_CFL) -DLLVMInsTrim_EXPORTS -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL)
-
-../afl-llvm-lto-whitelist.so: afl-llvm-lto-whitelist.so.cc
-ifeq "$(LLVM_LTO)" "1"
-	$(CXX) $(CLANG_CFL) -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< -o $@ $(CLANG_LFL)
-endif
-
-../afl-llvm-lto-instrumentation.so: afl-llvm-lto-instrumentation.so.cc MarkNodes.cc
-ifeq "$(LLVM_LTO)" "1"
-	$(CXX) $(CLANG_CFL) -Wno-writable-strings -fno-rtti -fPIC -std=$(LLVM_STDCXX) -shared $< MarkNodes.cc -o $@ $(CLANG_LFL)
-endif
-
-# laf
-../split-switches-pass.so:	split-switches-pass.so.cc | test_deps
-	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL)
-../compare-transform-pass.so:	compare-transform-pass.so.cc | test_deps
-	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL)
-../split-compares-pass.so:	split-compares-pass.so.cc | test_deps
-	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL)
-# /laf
-
-../cmplog-routines-pass.so:	cmplog-routines-pass.cc | test_deps
-	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL)
-
-../cmplog-instructions-pass.so:	cmplog-instructions-pass.cc | test_deps
-	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL)
-
-../afl-llvm-rt.o: afl-llvm-rt.o.c | test_deps
-	$(CC) $(CFLAGS) -fPIC -c $< -o $@
-
-../afl-llvm-rt-32.o: afl-llvm-rt.o.c | test_deps
-	@printf "[*] Building 32-bit variant of the runtime (-m32)... "
-	@$(CC) $(CFLAGS) -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi
-
-../afl-llvm-rt-64.o: afl-llvm-rt.o.c | test_deps
-	@printf "[*] Building 64-bit variant of the runtime (-m64)... "
-	@$(CC) $(CFLAGS) -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi
-
-test_build: $(PROGS)
-	@echo "[*] Testing the CC wrapper and instrumentation output..."
-	unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_CC=$(CC) AFL_LLVM_LAF_SPLIT_SWITCHES=1 AFL_LLVM_LAF_TRANSFORM_COMPARES=1 AFL_LLVM_LAF_SPLIT_COMPARES=1 ../afl-clang-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS)
-	ASAN_OPTIONS=detect_leaks=0 ../afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
-	echo 1 | ASAN_OPTIONS=detect_leaks=0 ../afl-showmap -m none -q -o .test-instr1 ./test-instr
-	@rm -f test-instr
-	@cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation does not seem to be behaving correctly!"; echo; echo "Please post to https://github.com/AFLplusplus/AFLplusplus/issues to troubleshoot the issue."; echo; exit 1; fi
-	@echo "[+] All right, the instrumentation seems to be working!"
-
-all_done: test_build
-	@echo "[+] All done! You can now use '../afl-clang-fast' to compile programs."
-
-.NOTPARALLEL: clean
-
-vpath  % ..
-%.8: %
-	@echo .TH $* 8 `date "+%Y-%m-%d"` "afl++" > ../$@
-	@echo .SH NAME >> ../$@
-	@echo .B $* >> ../$@
-	@echo >> ../$@
-	@echo .SH SYNOPSIS >> ../$@
-	@../$* -h 2>&1 | head -n 3 | tail -n 1 | sed 's/^\.\///' >> ../$@
-	@echo >> ../$@
-	@echo .SH OPTIONS >> ../$@
-	@echo .nf >> ../$@
-	@../$* -h 2>&1 | tail -n +4 >> ../$@
-	@echo >> ../$@
-	@echo .SH AUTHOR >> ../$@
-	@echo "afl++ was written by Michal \"lcamtuf\" Zalewski and is maintained by Marc \"van Hauser\" Heuse <mh@mh-sec.de>, Heiko \"hexcoder-\" Eissfeldt <heiko.eissfeldt@hexco.de> and Andrea Fioraldi <andreafioraldi@gmail.com>" >> ../$@
-	@echo  The homepage of afl++ is: https://github.com/AFLplusplus/AFLplusplus >> ../$@
-	@echo >> ../$@
-	@echo .SH LICENSE >> ../$@
-	@echo Apache License Version 2.0, January 2004 >> ../$@
-	ln -sf afl-clang-fast.8 ../afl-clang-fast++.8
-ifneq "$(AFL_CLANG_FLTO)" ""
-ifeq "$(LLVM_LTO)" "0"
-	ln -sf afl-clang-fast.8 ../afl-clang-lto.8
-	ln -sf afl-clang-fast.8 ../afl-clang-lto++.8
-endif
-endif
-
-clean:
-	rm -f *.o *.so *~ a.out core core.[1-9][0-9]* .test2 test-instr .test-instr0 .test-instr1 *.dwo
-	rm -f $(PROGS) afl-common.o ../afl-clang-fast++ ../afl-clang-lto ../afl-clang-lto++ ../afl-clang*.8
+all:
+	@echo please use GNU make, thanks!
diff --git a/llvm_mode/README.ctx.md b/llvm_mode/README.ctx.md
new file mode 100644
index 00000000..14255313
--- /dev/null
+++ b/llvm_mode/README.ctx.md
@@ -0,0 +1,22 @@
+# AFL Context Sensitive Branch Coverage
+
+## What is this?
+
+This is an LLVM-based implementation of the context sensitive branch coverage.
+
+Basically every function gets it's own ID and that ID is combined with the
+edges of the called functions.
+
+So if both function A and function B call a function C, the coverage
+collected in C will be different.
+
+In math the coverage is collected as follows:
+`map[current_location_ID ^ previous_location_ID >> 1 ^ previous_callee_ID] += 1`
+
+## Usage
+
+Set the `AFL_LLVM_INSTRUMENT=CTX` or `AFL_LLVM_CTX=1` environment variable.
+
+It is highly recommended to increase the MAP_SIZE_POW2 definition in
+config.h to at least 18 and maybe up to 20 for this as otherwise too
+many map collisions occur.
diff --git a/llvm_mode/README.instrim.md b/llvm_mode/README.instrim.md
index 5c3f32c8..b905af11 100644
--- a/llvm_mode/README.instrim.md
+++ b/llvm_mode/README.instrim.md
@@ -5,13 +5,12 @@ InsTrim: Lightweight Instrumentation for Coverage-guided Fuzzing
 ## Introduction
 
 InsTrim uses CFG and markers to instrument just what is necessary in the
-binary in llvm_mode. It is about 20-25% faster but as a cost has a lower
-path discovery.
+binary in llvm_mode. It is about 10-15% faster without disadvantages.
 
 ## Usage
 
-Set the environment variable `AFL_LLVM_INSTRIM=1` during compilation of
-the target.
+Set the environment variable `AFL_LLVM_INSTRUMENT=CFG` or `AFL_LLVM_INSTRIM=1`
+during compilation of the target.
 
 There is also an advanced mode which instruments loops in a way so that
 afl-fuzz can see which loop path has been selected but not being able to
diff --git a/llvm_mode/README.lto.md b/llvm_mode/README.lto.md
index 28b3b045..51b50544 100644
--- a/llvm_mode/README.lto.md
+++ b/llvm_mode/README.lto.md
@@ -2,16 +2,17 @@
 
 ## TLDR;
 
-1. This compile mode is very frickle if it works it is amazing, if it fails
-   - well use afl-clang-fast
+This version requires a current llvm 11 compiled from the github master.
 
-2. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better
+1. Use afl-clang-lto/afl-clang-lto++ because it is faster and gives better
    coverage than anything else that is out there in the AFL world
 
-3. You can use it together with llvm_mode: laf-intel and whitelisting
+2. You can use it together with llvm_mode: laf-intel and whitelisting
    features and can be combined with cmplog/Redqueen
 
-4. It only works with llvm 9 (and likely 10+ but is not tested there yet)
+3. It only works with llvm 11 (current github master state)
+
+4. AUTODICTIONARY feature! see below
 
 ## Introduction and problem description
 
@@ -34,33 +35,43 @@ and many dead ends until we got to this:
  * Our compiler (afl-clang-lto/afl-clang-lto++) takes care of setting the
    correct LTO options and runs our own afl-ld linker instead of the system
    linker
- * Our linker collects all LTO files to link and instruments them so that
+ * The LLVM linker collects all LTO files to link and instruments them so that
    we have non-colliding edge overage
  * We use a new (for afl) edge coverage - which is the same as in llvm
    -fsanitize=coverage edge coverage mode :)
- * after inserting our instrumentation in all interesting edges we link
-   all parts of the program together to our executable
 
 The result:
- * 10-15% speed gain compared to llvm_mode
+ * 10-20% speed gain compared to llvm_mode
  * guaranteed non-colliding edge coverage :-)
  * The compile time especially for libraries can be longer
 
 Example build output from a libtiff build:
 ```
-/bin/bash ../libtool  --tag=CC   --mode=link afl-clang-lto  -g -O2 -Wall -W   -o thumbnail thumbnail.o ../libtiff/libtiff.la ../port/libport.la -llzma -ljbig -ljpeg -lz -lm 
 libtool: link: afl-clang-lto -g -O2 -Wall -W -o thumbnail thumbnail.o  ../libtiff/.libs/libtiff.a ../port/.libs/libport.a -llzma -ljbig -ljpeg -lz -lm
-afl-clang-lto++2.62d by Marc "vanHauser" Heuse <mh@mh-sec.de>
-afl-ld++2.62d by Marc "vanHauser" Heuse <mh@mh-sec.de> (level 0)
-[+] Running ar unpacker on /prg/tests/lto/tiff-4.0.4/tools/../libtiff/.libs/libtiff.a into /tmp/.afl-3914343-1583339800.dir
-[+] Running ar unpacker on /prg/tests/lto/tiff-4.0.4/tools/../port/.libs/libport.a into /tmp/.afl-3914343-1583339800.dir
-[+] Running bitcode linker, creating /tmp/.afl-3914343-1583339800-1.ll
-[+] Performing optimization via opt, creating /tmp/.afl-3914343-1583339800-2.bc
-[+] Performing instrumentation via opt, creating /tmp/.afl-3914343-1583339800-3.bc
-afl-llvm-lto++2.62d by Marc "vanHauser" Heuse <mh@mh-sec.de>
-[+] Instrumented 15833 locations with no collisions (on average 1767 collisions would be in afl-gcc/afl-clang-fast) (non-hardened mode).
-[+] Running real linker /bin/x86_64-linux-gnu-ld
-[+] Linker was successful
+afl-clang-lto++2.63d by Marc "vanHauser" Heuse <mh@mh-sec.de> in mode LTO
+afl-llvm-lto++2.63d by Marc "vanHauser" Heuse <mh@mh-sec.de>
+AUTODICTIONARY: 11 strings found
+[+] Instrumented 12071 locations with no collisions (on average 1046 collisions would be in afl-gcc/afl-clang-fast) (non-hardened mode).
+```
+
+## Building llvm 11
+
+```
+$ sudo apt install binutils-dev  # this is *essential*!
+$ git clone https://github.com/llvm/llvm-project
+$ cd llvm-project
+$ mkdir build
+$ cd build
+$ cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/
+$ make -j $(nproc)
+$ export PATH=`pwd`/bin:$PATH
+$ export LLVM_CONFIG=`pwd`/bin/llcm-config
+$ cd /path/to/AFLplusplus/
+$ make
+$ cd llvm_mode
+$ make
+$ cd ..
+$ make install
 ```
 
 ## How to use afl-clang-lto
@@ -77,6 +88,13 @@ CC=afl-clang-lto CXX=afl-clang-lto++ ./configure
 make
 ```
 
+## AUTODICTIONARY feature
+
+Setting `AFL_LLVM_LTO_AUTODICTIONARY` will generate a dictionary in the
+target binary based on string compare and memory compare functions.
+afl-fuzz will automatically get these transmitted when starting to fuzz.
+This improves coverage on a lot of targets.
+
 ## Potential issues
 
 ### compiling libraries fails
@@ -94,145 +112,16 @@ AR=llvm-ar RANLIB=llvm-ranlib CC=afl-clang-lto CXX=afl-clang-lto++ ./configure -
 ```
 and on some target you have to to AR=/RANLIB= even for make as the configure script does not save it ...
 
-### "linking globals named '...': symbol multiply defined" error
-
-The target program is using multiple global variables or functions with the
-same name. This is a common error when compiling a project with LTO, and
-the fix is `-Wl,--allow-multiple-definition` - however llvm-link which we
-need to link all llvm IR LTO files does not support this - yet (hopefully).
-Hence if you see this error either you have to remove the duplicate global
-variable (think `#ifdef` ...) or you are out of luck. :-(
-
-### "expected top-level entity" + binary ouput error
-
-This happens if multiple .a archives are to be linked and they contain the
-same object filenames, the first in LTO form, the other in ELF form.
-This can not be fixed programmatically, but can be fixed by hand.
-You can try to delete the file from either archive
-(`llvm-ar d <archive>.a <file>.o`) or performing the llvm-linking, optimizing
-and instrumentation by hand (see below).
-
-### "undefined reference to ..."
-
-This *can* be the opposite situation of the "expected top-level entity" error -
-the library with the ELF file is before the LTO library.
-However it can also be a bug in the program - try to compile it normally. If 
-fails then it is a bug in the program.
-Solutions: You can try to delete the file from either archive, e.g.
-(`llvm-ar d <archive>.a <file>.o`) or performing the llvm-linking, optimizing
-and instrumentation by hand (see below).
-
-### "File format not recognized"
-
-This happens if the build system has fixed LDFLAGS, CPPFLAGS, CXXFLAGS and/or
-CFLAGS. Ensure that they all contain the `-flto` flag that afl-clang-lto was
-compiled with (you can see that by typing `afl-clang-lto -h` and inspecting
-the last line of the help output) and add them otherwise
-
-### clang is hardcoded to /bin/ld
-
-Some clang packages have 'ld' hardcoded to /bin/ld. This is an issue as this
-prevents "our" afl-ld being called.
-
--fuse-ld=/path/to/afl-ld should be set through makefile magic in llvm_mode - 
-if it is supported - however if this fails you can try:
-```
-LDFLAGS=-fuse-ld=</path/to/afl-ld
-```
-
-As workaround attempt #2 you will have to switch /bin/ld:
-```
-  mv /bin/ld /bin/ld.orig
-  cp afl-ld /bin/ld
-```
-This can result in two problems though:
-
- !1!
-  When compiling afl-ld, the build process looks at where the /bin/ld link
-  is going to. So when the workaround was applied and a recompiling afl-ld
-  is performed then the link is gone and the new afl-ld clueless where
-  the real ld is.
-  In this case set AFL_REAL_LD=/bin/ld.orig
-
- !2! 
- When you install an updated gcc/clang/... package, your OS might restore
- the ld link.
-
-### Performing the steps by hand
-
-It is possible to perform all the steps afl-ld by hand to workaround issues
-in the target.
-
-1. Recompile with AFL_DEBUG=1 and collect the afl-clang-lto command that fails
-   e.g.: `AFL_DEBUG=1 make 2>&1 | grep afl-clang-lto | tail -n 1`
-
-2. run this command prepended with AFL_DEBUG=1 and collect the afl-ld command
-   parameters, e.g. `AFL_DEBUG=1 afl-clang-lto[++] .... | grep /afl/ld`
-
-3. for every .a archive you want to instrument unpack it into a seperate
-   directory, e.g.
-   `mkdir archive1.dir ; cd archive1.dir ; llvm-link x ../<archive>.a`
-
-4. run `file archive*.dir/*.o` and make two lists, one containing all ELF files
-   and one containing all LLVM IR bitcode files.
-   You do the same for all .o files of the ../afl/ld command options
-
-5. Create a single bitcode file by using llvm-link, e.g.
-   `llvm-link -o all-bitcode.bc <list of all LLVM IR .o files>`
-   If this fails it is game over - or you modify the source code
-
-6. Run the optimizer on the new bitcode file:
-   `opt -O3 --polly -o all-optimized.bc all-bitcode.bc`
-
-7. Instrument the optimized bitcode file:
-   `opt --load=$AFL_PATH/afl-llvm-lto-instrumentation.so --disable-opt --afl-lto all-optimized.bc -o all-instrumented.bc
-
-8. If the parameter `--allow-multiple-definition` is not in the list, add it
-   as first command line option.
-
-9. Link everything together.
-   a) You use the afl-ld command and instead of e.g. `/usr/local/lib/afl/ld`
-      you replace that with `ld`, the real linker.
-   b) Every .a archive you instrumented files from you remove the <archive>.a
-      or -l<archive> from the command
-   c) If you have entries in your ELF files list (see step 4), you put them to
-      the command line - but them in the same order!
-   d) put the all-instrumented.bc before the first library or .o file
-   e) run the command and hope it compiles, if it doesn't you have to analyze
-      what the issue is and fix that in the approriate step above.
-
-Yes this is long and complicated. That is why there is afl-ld doing this and
-that why this can easily fail and not all different ways how it *can* fail can
-be implemented ...
-
 ### compiling programs still fail
 
 afl-clang-lto is still work in progress.
-Complex targets are still likely not to compile and this needs to be fixed.
 Please report issues at:
 [https://github.com/AFLplusplus/AFLplusplus/issues/226](https://github.com/AFLplusplus/AFLplusplus/issues/226)
 
-Known issues:
-* ffmpeg
-* bogofilter
-* libjpeg-turbo-1.3.1
-
 ## Upcoming Work
 
-1. Currently the LTO whitelist feature does not allow to not instrument main, start and init functions
-2. Modify the forkserver + afl-fuzz so that only the necessary map size is
-   loaded and used - and communicated to afl-fuzz too.
-   Result: faster fork in the target and faster map analysis in afl-fuzz
-   => more speed :-)
-
-## Tested and working targets
-
-* libpng-1.2.53
-* libxml2-2.9.2
-* tiff-4.0.4
-* unrar-nonfree-5.6.6
-* exiv 0.27
-* jpeg-6b
+1. Currently the LTO whitelist feature does not allow to not instrument main,
+   start and init functions
 
 ## History
 
@@ -252,11 +141,17 @@ very difficult with a program that has so many paths and therefore so many
 dependencies. At lot of stratgies were implemented - and failed.
 And then sat solvers were tried, but with over 10.000 variables that turned
 out to be a dead-end too.
+
 The final idea to solve this came from domenukk who proposed to insert a block
 into an edge and then just use incremental counters ... and this worked!
 After some trials and errors to implement this vanhauser-thc found out that
 there is actually an llvm function for this: SplitEdge() :-)
+
 Still more problems came up though as this only works without bugs from
 llvm 9 onwards, and with high optimization the link optimization ruins
 the instrumented control flow graph.
-As long as there are no larger changes in llvm this all should work well now ...
+
+This is all now fixed with llvm 11. The llvm's own linker is now able to
+load passes and this bypasses all problems we had.
+
+Happy end :)
diff --git a/llvm_mode/README.md b/llvm_mode/README.md
index e6c47c9c..607350fb 100644
--- a/llvm_mode/README.md
+++ b/llvm_mode/README.md
@@ -92,13 +92,33 @@ which C/C++ files to actually instrument. See [README.whitelist](README.whitelis
 
 For splitting memcmp, strncmp, etc. please see [README.laf-intel](README.laf-intel.md)
 
-Then there is an optimized instrumentation strategy that uses CFGs and
-markers to just instrument what is needed. This increases speed by 20-25%
-however has a lower path discovery.
-If you want to use this, set AFL_LLVM_INSTRIM=1
+Then there are different ways of instrumenting the target:
+
+1. There is an optimized instrumentation strategy that uses CFGs and
+markers to just instrument what is needed. This increases speed by 10-15%
+without any disadvantages
+If you want to use this, set AFL_LLVM_INSTRUMENT=CFG or AFL_LLVM_INSTRIM=1
 See [README.instrim](README.instrim.md)
 
-A new instrumentation called CmpLog is also available as an alternative to
+2. An even better instrumentation strategy uses LTO and link time
+instrumentation. Note that not all targets can compile in this mode, however
+if it works it is the best option you can use.
+Simply use afl-clang-lto/afl-clang-lto++ to use this option.
+See [README.lto](README.lto.md)
+
+3. Alternativly you can choose a completely different coverage method:
+
+3a. N-GRAM coverage - which combines the previous visited edges with the
+current one. This explodes the map but on the other hand has proven to be
+effective for fuzzing.
+See [README.ngram](README.ngram.md)
+
+3b. Context sensitive coverage - which combines the visited edges with an
+individual caller ID (the function that called the current one)
+[README.ctx](README.ctx.md)
+
+Then - additionally to one of the instrumentation options above - there is
+a very effective new instrumentation option called CmpLog as an alternative to
 laf-intel that allow AFL++ to apply mutations similar to Redqueen.
 See [README.cmplog](README.cmplog.md)
 
@@ -109,12 +129,18 @@ is not optimal and was only fixed in llvm 9.
 You can set this with AFL_LLVM_NOT_ZERO=1
 See [README.neverzero](README.neverzero.md)
 
-## 4) Gotchas, feedback, bugs
+## 4) Snapshot feature
+
+To speed up fuzzing you can use a linux loadable kernel module which enables
+a snapshot feature.
+See [README.snapshot](README.snapshot.md)
+
+## 5) Gotchas, feedback, bugs
 
 This is an early-stage mechanism, so field reports are welcome. You can send bug
 reports to <afl-users@googlegroups.com>.
 
-## 5) Bonus feature #1: deferred initialization
+## 6) Bonus feature #1: deferred initialization
 
 AFL tries to optimize performance by executing the targeted binary just once,
 stopping it just before main(), and then cloning this "master" process to get
@@ -162,7 +188,7 @@ will keep working normally when compiled with a tool other than afl-clang-fast.
 Finally, recompile the program with afl-clang-fast (afl-gcc or afl-clang will
 *not* generate a deferred-initialization binary) - and you should be all set!
 
-## 6) Bonus feature #2: persistent mode
+## 7) Bonus feature #2: persistent mode
 
 Some libraries provide APIs that are stateless, or whose state can be reset in
 between processing different input files. When such a reset is performed, a
diff --git a/llvm_mode/README.ngram.md b/llvm_mode/README.ngram.md
new file mode 100644
index 00000000..de3ba432
--- /dev/null
+++ b/llvm_mode/README.ngram.md
@@ -0,0 +1,28 @@
+# AFL N-Gram Branch Coverage
+
+## Source
+
+This is an LLVM-based implementation of the n-gram branch coverage proposed in
+the paper ["Be Sensitive and Collaborative: Analzying Impact of Coverage Metrics
+in Greybox Fuzzing"](https://www.usenix.org/system/files/raid2019-wang-jinghan.pdf),
+by Jinghan Wang, et. al.
+
+Note that the original implementation (available
+[here](https://github.com/bitsecurerlab/afl-sensitive))
+is built on top of AFL's QEMU mode.
+This is essentially a port that uses LLVM vectorized instructions to achieve
+the same results when compiling source code.
+
+In math the branch coverage is performed as follows:
+`map[current_location ^ prev_location[0] >> 1 ^ prev_location[1] >> 1 ^ ... up to n-1`] += 1`
+
+## Usage
+
+The size of `n` (i.e., the number of branches to remember) is an option
+that is specified either in the `AFL_LLVM_INSTRUMENT=NGRAM-{value}` or the
+`AFL_LLVM_NGRAM_SIZE` environment variable.
+Good values are 2, 4 or 8, valid are 2-16.
+
+It is highly recommended to increase the MAP_SIZE_POW2 definition in
+config.h to at least 18 and maybe up to 20 for this as otherwise too
+many map collisions occur.
diff --git a/llvm_mode/README.snapshot.md b/llvm_mode/README.snapshot.md
new file mode 100644
index 00000000..9c12a8ba
--- /dev/null
+++ b/llvm_mode/README.snapshot.md
@@ -0,0 +1,16 @@
+# AFL++ snapshot feature
+
+Snapshotting is a feature that makes a snapshot from a process and then
+restores it's state, which is faster then forking it again.
+
+All targets compiled with llvm_mode are automatically enabled for the
+snapshot feature.
+
+To use the snapshot feature for fuzzing compile and load this kernel
+module: [https://github.com/AFLplusplus/AFL-Snapshot-LKM](https://github.com/AFLplusplus/AFL-Snapshot-LKM)
+
+Note that is has little value for persistent (__AFL_LOOP) fuzzing.
+
+## Notes
+
+Snapshot does not work with multithreaded targets yet. Still in WIP, it is now usable only for single threaded applications.
diff --git a/llvm_mode/afl-clang-fast.c b/llvm_mode/afl-clang-fast.c
index 313a2533..b7ef1858 100644
--- a/llvm_mode/afl-clang-fast.c
+++ b/llvm_mode/afl-clang-fast.c
@@ -29,11 +29,13 @@
 #include "types.h"
 #include "debug.h"
 #include "alloc-inl.h"
+#include "llvm-ngram-coverage.h"
 
 #include <stdio.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <string.h>
+#include <strings.h>
 #include <limits.h>
 #include <assert.h>
 
@@ -41,14 +43,33 @@ static u8 * obj_path;                  /* Path to runtime libraries         */
 static u8 **cc_params;                 /* Parameters passed to the real CC  */
 static u32  cc_par_cnt = 1;            /* Param count, including argv0      */
 static u8   llvm_fullpath[PATH_MAX];
-static u8   lto_mode;
+static u8   instrument_mode;
 static u8 * lto_flag = AFL_CLANG_FLTO;
 static u8 * march_opt = CFLAGS_OPT;
 static u8   debug;
 static u8   cwd[4096];
 static u8   cmplog_mode;
 u8          use_stdin = 0;                                         /* dummy */
-u8          be_quiet = 0;
+
+enum {
+
+  INSTRUMENT_CLASSIC = 0,
+  INSTRUMENT_AFL = 0,
+  INSTRUMENT_DEFAULT = 0,
+  INSTRUMENT_PCGUARD = 1,
+  INSTRUMENT_INSTRIM = 2,
+  INSTRUMENT_CFG = 2,
+  INSTRUMENT_LTO = 3,
+  INSTRUMENT_CTX = 4,
+  INSTRUMENT_NGRAM = 5  // + ngram value of 2-16 = 7 - 21
+
+};
+
+char instrument_mode_string[6][16] = {
+
+    "DEFAULT", "PCGUARD", "CFG", "LTO", "CTX",
+
+};
 
 u8 *getthecwd() {
 
@@ -135,9 +156,9 @@ static void find_obj(u8 *argv0) {
 
 /* Copy argv to cc_params, making the necessary edits. */
 
-static void edit_params(u32 argc, char **argv) {
+static void edit_params(u32 argc, char **argv, char **envp) {
 
-  u8  fortify_set = 0, asan_set = 0, x_set = 0, maybe_linking = 1, bit_mode = 0;
+  u8  fortify_set = 0, asan_set = 0, x_set = 0, bit_mode = 0;
   u8  has_llvm_config = 0;
   u8 *name;
 
@@ -151,20 +172,11 @@ static void edit_params(u32 argc, char **argv) {
 
   has_llvm_config = (strlen(LLVM_BINDIR) > 0);
 
-  if (!strncmp(name, "afl-clang-lto", strlen("afl-clang-lto"))) {
-
-#ifdef USE_TRACE_PC
-    FATAL("afl-clang-lto does not work with TRACE_PC mode");
-#endif
+  if (instrument_mode == INSTRUMENT_LTO)
     if (lto_flag[0] != '-')
       FATAL(
-          "afl-clang-lto not possible because Makefile magic did not identify "
-          "the correct -flto flag");
-    if (getenv("AFL_LLVM_INSTRIM") != NULL)
-      FATAL("afl-clang-lto does not work with InsTrim mode");
-    lto_mode = 1;
-
-  }
+          "Using afl-clang-lto is not possible because Makefile magic did not "
+          "identify the correct -flto flag");
 
   if (!strcmp(name, "afl-clang-fast++") || !strcmp(name, "afl-clang-lto++")) {
 
@@ -173,7 +185,7 @@ static void edit_params(u32 argc, char **argv) {
       snprintf(llvm_fullpath, sizeof(llvm_fullpath), "%s/clang++", LLVM_BINDIR);
     else
       sprintf(llvm_fullpath, "clang++");
-    cc_params[0] = alt_cxx ? alt_cxx : (u8 *)llvm_fullpath;
+    cc_params[0] = alt_cxx && *alt_cxx ? alt_cxx : (u8 *)llvm_fullpath;
 
   } else {
 
@@ -182,7 +194,7 @@ static void edit_params(u32 argc, char **argv) {
       snprintf(llvm_fullpath, sizeof(llvm_fullpath), "%s/clang", LLVM_BINDIR);
     else
       sprintf(llvm_fullpath, "clang");
-    cc_params[0] = alt_cc ? alt_cc : (u8 *)llvm_fullpath;
+    cc_params[0] = alt_cc && *alt_cc ? alt_cc : (u8 *)llvm_fullpath;
 
   }
 
@@ -255,23 +267,7 @@ static void edit_params(u32 argc, char **argv) {
 
   }
 
-#ifdef USE_TRACE_PC
-
-  cc_params[cc_par_cnt++] =
-      "-fsanitize-coverage=trace-pc-guard";  // edge coverage by default
-  // cc_params[cc_par_cnt++] = "-mllvm";
-  // cc_params[cc_par_cnt++] =
-  // "-fsanitize-coverage=trace-cmp,trace-div,trace-gep";
-  // cc_params[cc_par_cnt++] = "-sanitizer-coverage-block-threshold=0";
-#else
-
-  if (lto_mode) {
-
-    char *old_path = getenv("PATH");
-    char *new_path = alloc_printf("%s:%s", AFL_PATH, old_path);
-
-    setenv("PATH", new_path, 1);
-    setenv("AFL_LD", "1", 1);
+  if (instrument_mode == INSTRUMENT_LTO) {
 
     if (getenv("AFL_LLVM_WHITELIST") != NULL) {
 
@@ -283,43 +279,38 @@ static void edit_params(u32 argc, char **argv) {
 
     }
 
-#ifdef AFL_CLANG_FUSELD
-    cc_params[cc_par_cnt++] = alloc_printf("-fuse-ld=%s/afl-ld", AFL_PATH);
-#endif
-
-    cc_params[cc_par_cnt++] = "-B";
-    cc_params[cc_par_cnt++] = AFL_PATH;
-
+    cc_params[cc_par_cnt++] = alloc_printf("-fuse-ld=%s", AFL_REAL_LD);
+    cc_params[cc_par_cnt++] = "-Wl,--allow-multiple-definition";
+    cc_params[cc_par_cnt++] = alloc_printf(
+        "-Wl,-mllvm=-load=%s/afl-llvm-lto-instrumentation.so", obj_path);
     cc_params[cc_par_cnt++] = lto_flag;
 
-  } else
+  } else {
 
-      if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") ||
-          getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) {
+    if (instrument_mode == INSTRUMENT_PCGUARD) {
 
-    cc_params[cc_par_cnt++] =
-        "-fsanitize-coverage=trace-pc-guard";  // edge coverage by default
+      cc_params[cc_par_cnt++] =
+          "-fsanitize-coverage=trace-pc-guard";  // edge coverage by default
 
-  } else {
+    } else {
 
-    cc_params[cc_par_cnt++] = "-Xclang";
-    cc_params[cc_par_cnt++] = "-load";
-    cc_params[cc_par_cnt++] = "-Xclang";
-    if (getenv("AFL_LLVM_INSTRIM") != NULL || getenv("INSTRIM_LIB") != NULL)
-      cc_params[cc_par_cnt++] = alloc_printf("%s/libLLVMInsTrim.so", obj_path);
-    else
-      cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path);
+      cc_params[cc_par_cnt++] = "-Xclang";
+      cc_params[cc_par_cnt++] = "-load";
+      cc_params[cc_par_cnt++] = "-Xclang";
+      if (instrument_mode == INSTRUMENT_CFG)
+        cc_params[cc_par_cnt++] =
+            alloc_printf("%s/libLLVMInsTrim.so", obj_path);
+      else
+        cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-pass.so", obj_path);
 
-  }
+    }
 
-#endif                                                     /* ^USE_TRACE_PC */
+  }
 
   cc_params[cc_par_cnt++] = "-Qunused-arguments";
 
   /* Detect stray -v calls from ./configure scripts. */
 
-  if (argc == 1 && !strcmp(argv[1], "-v")) maybe_linking = 0;
-
   while (--argc) {
 
     u8 *cur = *(++argv);
@@ -330,16 +321,11 @@ static void edit_params(u32 argc, char **argv) {
 
     if (!strcmp(cur, "-x")) x_set = 1;
 
-    if (!strcmp(cur, "-c") || !strcmp(cur, "-S") || !strcmp(cur, "-E"))
-      maybe_linking = 0;
-
     if (!strcmp(cur, "-fsanitize=address") || !strcmp(cur, "-fsanitize=memory"))
       asan_set = 1;
 
     if (strstr(cur, "FORTIFY_SOURCE")) fortify_set = 1;
 
-    if (!strcmp(cur, "-shared")) maybe_linking = 0;
-
     if (!strcmp(cur, "-Wl,-z,defs") || !strcmp(cur, "-Wl,--no-undefined"))
       continue;
 
@@ -389,14 +375,21 @@ static void edit_params(u32 argc, char **argv) {
 
   }
 
-#ifdef USE_TRACE_PC
+  if (getenv("AFL_USE_CFISAN")) {
 
-  if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") ||
-      getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC"))
-    if (getenv("AFL_INST_RATIO"))
-      FATAL("AFL_INST_RATIO not available at compile time with 'trace-pc'.");
+    if (instrument_mode != INSTRUMENT_LTO) {
+
+      uint32_t i = 0, found = 0;
+      while (envp[i] != NULL && !found)
+        if (strncmp("-flto", envp[i++], 5) == 0) found = 1;
+      if (!found) cc_params[cc_par_cnt++] = "-flto";
+
+    }
+
+    cc_params[cc_par_cnt++] = "-fsanitize=cfi";
+    cc_params[cc_par_cnt++] = "-fvisibility=hidden";
 
-#endif                                                      /* USE_TRACE_PC */
+  }
 
   if (!getenv("AFL_DONT_OPTIMIZE")) {
 
@@ -408,7 +401,11 @@ static void edit_params(u32 argc, char **argv) {
 
   }
 
-  if (getenv("AFL_NO_BUILTIN")) {
+  if (getenv("AFL_NO_BUILTIN") || getenv("AFL_LLVM_LAF_TRANSFORM_COMPARES") ||
+      getenv("LAF_TRANSFORM_COMPARES") ||
+      (instrument_mode == INSTRUMENT_LTO &&
+       (getenv("AFL_LLVM_LTO_AUTODICTIONARY") ||
+        getenv("AFL_LLVM_AUTODICTIONARY")))) {
 
     cc_params[cc_par_cnt++] = "-fno-builtin-strcmp";
     cc_params[cc_par_cnt++] = "-fno-builtin-strncmp";
@@ -477,44 +474,57 @@ static void edit_params(u32 argc, char **argv) {
 #endif                                                        /* ^__APPLE__ */
       "_I(); } while (0)";
 
-  if (maybe_linking) {
+  if (x_set) {
 
-    if (x_set) {
+    cc_params[cc_par_cnt++] = "-x";
+    cc_params[cc_par_cnt++] = "none";
 
-      cc_params[cc_par_cnt++] = "-x";
-      cc_params[cc_par_cnt++] = "none";
-
-    }
+  }
 
 #ifndef __ANDROID__
-    switch (bit_mode) {
-
-      case 0:
-        cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt.o", obj_path);
-        break;
-
-      case 32:
-        cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-32.o", obj_path);
-
+  switch (bit_mode) {
+
+    case 0:
+      cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt.o", obj_path);
+      if (instrument_mode == INSTRUMENT_LTO)
+        cc_params[cc_par_cnt++] =
+            alloc_printf("%s/afl-llvm-rt-lto.o", obj_path);
+      break;
+
+    case 32:
+      cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-32.o", obj_path);
+      if (access(cc_params[cc_par_cnt - 1], R_OK))
+        FATAL("-m32 is not supported by your compiler");
+      if (instrument_mode == INSTRUMENT_LTO) {
+
+        cc_params[cc_par_cnt++] =
+            alloc_printf("%s/afl-llvm-rt-lto-32.o", obj_path);
         if (access(cc_params[cc_par_cnt - 1], R_OK))
           FATAL("-m32 is not supported by your compiler");
 
-        break;
+      }
 
-      case 64:
-        cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-64.o", obj_path);
+      break;
 
+    case 64:
+      cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-64.o", obj_path);
+      if (access(cc_params[cc_par_cnt - 1], R_OK))
+        FATAL("-m64 is not supported by your compiler");
+      if (instrument_mode == INSTRUMENT_LTO) {
+
+        cc_params[cc_par_cnt++] =
+            alloc_printf("%s/afl-llvm-rt-lto-64.o", obj_path);
         if (access(cc_params[cc_par_cnt - 1], R_OK))
           FATAL("-m64 is not supported by your compiler");
 
-        break;
-
-    }
+      }
 
-#endif
+      break;
 
   }
 
+#endif
+
   cc_params[cc_par_cnt] = NULL;
 
 }
@@ -524,7 +534,7 @@ static void edit_params(u32 argc, char **argv) {
 int main(int argc, char **argv, char **envp) {
 
   int   i;
-  char *callname = "afl-clang-fast";
+  char *callname = "afl-clang-fast", *ptr;
 
   if (getenv("AFL_DEBUG")) {
 
@@ -535,87 +545,222 @@ int main(int argc, char **argv, char **envp) {
 
     be_quiet = 1;
 
-  if (strstr(argv[0], "afl-clang-lto") != NULL) callname = "afl-clang-lto";
+#ifdef USE_TRACE_PC
+  instrument_mode = INSTRUMENT_PCGUARD;
+#endif
 
-  if (argc < 2 || strcmp(argv[1], "-h") == 0) {
+  if ((ptr = getenv("AFL_LLVM_INSTRUMENT")) != NULL) {
+
+    if (strncasecmp(ptr, "cfg", strlen("cfg")) == 0 ||
+        strncasecmp(ptr, "instrim", strlen("instrim")) == 0)
+      instrument_mode = INSTRUMENT_CFG;
+    else if (strncasecmp(ptr, "pc-guard", strlen("pc-guard")) == 0 ||
+             strncasecmp(ptr, "pcguard", strlen("pcgard")) == 0)
+      instrument_mode = INSTRUMENT_PCGUARD;
+    else if (strncasecmp(ptr, "lto", strlen("lto")) == 0)
+      instrument_mode = INSTRUMENT_LTO;
+    else if (strncasecmp(ptr, "ctx", strlen("ctx")) == 0) {
+
+      instrument_mode = INSTRUMENT_CTX;
+      setenv("AFL_LLVM_CTX", "1", 1);
+
+    } else if (strncasecmp(ptr, "ngram", strlen("ngram")) == 0) {
+
+      ptr += strlen("ngram");
+      while (*ptr && (*ptr < '0' || *ptr > '9'))
+        ptr++;
+      if (!*ptr)
+        if ((ptr = getenv("AFL_LLVM_NGRAM_SIZE")) != NULL)
+          FATAL(
+              "you must set the NGRAM size with (e.g. for value 2) "
+              "AFL_LLVM_INSTRUMENT=ngram-2");
+      instrument_mode = INSTRUMENT_NGRAM + atoi(ptr);
+      if (instrument_mode < INSTRUMENT_NGRAM + 2 ||
+          instrument_mode > INSTRUMENT_NGRAM + NGRAM_SIZE_MAX)
+        FATAL(
+            "NGRAM instrumentation mode must be between 2 and NGRAM_SIZE_MAX "
+            "(%u)",
+            NGRAM_SIZE_MAX);
+
+      ptr = alloc_printf("%u", instrument_mode - INSTRUMENT_NGRAM);
+      setenv("AFL_LLVM_NGRAM_SIZE", ptr, 1);
+
+    } else if (strncasecmp(ptr, "classic", strlen("classic")) != 0 ||
+
+               strncasecmp(ptr, "default", strlen("default")) != 0 ||
+               strncasecmp(ptr, "afl", strlen("afl")) != 0)
+      FATAL("unknown AFL_LLVM_INSTRUMENT value: %s", ptr);
 
-#ifdef USE_TRACE_PC
-    printf(cCYA "afl-clang-fast" VERSION cRST
-                " [tpcg] by <lszekeres@google.com>\n")
-#else
-    if (strstr(argv[0], "afl-clang-lto") == NULL)
+  }
+
+  if (getenv("USE_TRACE_PC") || getenv("AFL_USE_TRACE_PC") ||
+      getenv("AFL_LLVM_USE_TRACE_PC") || getenv("AFL_TRACE_PC")) {
+
+    if (instrument_mode == 0)
+      instrument_mode = INSTRUMENT_PCGUARD;
+    else if (instrument_mode != INSTRUMENT_PCGUARD)
+      FATAL("you can not set AFL_LLVM_INSTRUMENT and AFL_TRACE_PC together");
+
+  }
+
+  if (getenv("AFL_LLVM_INSTRIM") || getenv("INSTRIM") ||
+      getenv("INSTRIM_LIB")) {
+
+    if (instrument_mode == 0)
+      instrument_mode = INSTRUMENT_CFG;
+    else if (instrument_mode != INSTRUMENT_CFG)
+      FATAL(
+          "you can not set AFL_LLVM_INSTRUMENT and AFL_LLVM_INSTRIM together");
+
+  }
+
+  if (getenv("AFL_LLVM_CTX")) {
+
+    if (instrument_mode == 0)
+      instrument_mode = INSTRUMENT_CTX;
+    else if (instrument_mode != INSTRUMENT_CTX)
+      FATAL("you can not set AFL_LLVM_INSTRUMENT and AFL_LLVM_CTX together");
+
+  }
+
+  if (getenv("AFL_LLVM_NGRAM_SIZE")) {
+
+    if (instrument_mode == 0) {
+
+      instrument_mode = INSTRUMENT_NGRAM + atoi(getenv("AFL_LLVM_NGRAM_SIZE"));
+      if (instrument_mode < INSTRUMENT_NGRAM + 2 ||
+          instrument_mode > INSTRUMENT_NGRAM + NGRAM_SIZE_MAX)
+        FATAL(
+            "NGRAM instrumentation mode must be between 2 and NGRAM_SIZE_MAX "
+            "(%u)",
+            NGRAM_SIZE_MAX);
+
+    } else if (instrument_mode != INSTRUMENT_NGRAM)
+
+      FATAL(
+          "you can not set AFL_LLVM_INSTRUMENT and AFL_LLVM_NGRAM_SIZE "
+          "together");
+
+  }
+
+  if (instrument_mode < INSTRUMENT_NGRAM)
+    ptr = instrument_mode_string[instrument_mode];
+  else
+    ptr = alloc_printf("NGRAM-%u", instrument_mode - INSTRUMENT_NGRAM);
+
+  if (strstr(argv[0], "afl-clang-lto") != NULL) {
+
+    if (instrument_mode == 0 || instrument_mode == INSTRUMENT_LTO) {
 
-      printf(cCYA "afl-clang-fast" VERSION cRST " by <lszekeres@google.com>\n");
+      callname = "afl-clang-lto";
+      instrument_mode = INSTRUMENT_LTO;
+      ptr = instrument_mode_string[instrument_mode];
 
-    else {
+    } else {
 
-      printf(cCYA "afl-clang-lto" VERSION cRST
-                  "  by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n");
+      if (!be_quiet)
+        WARNF("afl-clang-lto called with mode %s, using that mode instead",
+              ptr);
 
     }
 
-#endif                                                     /* ^USE_TRACE_PC */
-
-        SAYF(
-            "\n"
-            "%s[++] [options]\n"
-            "\n"
-            "This is a helper application for afl-fuzz. It serves as a drop-in "
-            "replacement\n"
-            "for clang, letting you recompile third-party code with the "
-            "required "
-            "runtime\n"
-            "instrumentation. A common use pattern would be one of the "
-            "following:\n\n"
-
-            "  CC=%s/afl-clang-fast ./configure\n"
-            "  CXX=%s/afl-clang-fast++ ./configure\n\n"
-
-            "In contrast to the traditional afl-clang tool, this version is "
-            "implemented as\n"
-            "an LLVM pass and tends to offer improved performance with slow "
-            "programs.\n\n"
-
-            "Environment variables used:\n"
-            "AFL_CC: path to the C compiler to use\n"
-            "AFL_CXX: path to the C++ compiler to use\n"
-            "AFL_PATH: path to instrumenting pass and runtime "
-            "(afl-llvm-rt.*o)\n"
-            "AFL_DONT_OPTIMIZE: disable optimization instead of -O3\n"
-            "AFL_NO_BUILTIN: compile for use with libtokencap.so\n"
-            "AFL_INST_RATIO: percentage of branches to instrument\n"
-            "AFL_QUIET: suppress verbose output\n"
-            "AFL_DEBUG: enable developer debugging output\n"
-            "AFL_HARDEN: adds code hardening to catch memory bugs\n"
-            "AFL_USE_ASAN: activate address sanitizer\n"
-            "AFL_USE_MSAN: activate memory sanitizer\n"
-            "AFL_USE_UBSAN: activate undefined behaviour sanitizer\n"
-            "AFL_LLVM_WHITELIST: enable whitelisting (selective "
-            "instrumentation)\n"
-            "AFL_LLVM_NOT_ZERO: use cycling trace counters that skip zero\n"
-            "AFL_LLVM_USE_TRACE_PC: use LLVM trace-pc-guard instrumentation\n"
-            "AFL_LLVM_LAF_SPLIT_COMPARES: enable cascaded comparisons\n"
-            "AFL_LLVM_LAF_SPLIT_SWITCHES: casc. comp. in 'switch'\n"
-            "AFL_LLVM_LAF_TRANSFORM_COMPARES: transform library comparison "
-            "function calls\n"
-            " to cascaded comparisons\n"
-            "AFL_LLVM_LAF_SPLIT_FLOATS: transform floating point comp. to "
-            "cascaded "
-            "comp.\n"
-            "AFL_LLVM_LAF_SPLIT_COMPARES_BITW: size limit (default 8)\n"
-            "AFL_LLVM_INSTRIM: use light weight instrumentation InsTrim\n"
-            "AFL_LLVM_INSTRIM_LOOPHEAD: optimize loop tracing for speed\n"
-            "AFL_LLVM_CMPLOG: log operands of comparisons (RedQueen mutator)\n"
-            "\nafl-clang-fast was built for llvm %s with the llvm binary path "
-            "of "
-            "\"%s\".\n",
-            callname, BIN_PATH, BIN_PATH, LLVM_VERSION, LLVM_BINDIR);
-
-    if (strcmp(callname, "afl-clang-lto") == 0)
-      SAYF(
-          "Compiled with linker target \"%s\" and LTO flags \"%s\"\n\n"
-          "If anything fails - be sure to read README.lto.md!\n\n",
-          AFL_REAL_LD, AFL_CLANG_FLTO);
+  }
+
+#ifndef AFL_CLANG_FLTO
+  if (instrument_mode == INSTRUMENT_LTO)
+    FATAL("instrumentation mode LTO specified but LLVM support not available");
+#endif
+
+  if (argc < 2 || strcmp(argv[1], "-h") == 0) {
+
+    if (instrument_mode != INSTRUMENT_LTO)
+      printf("afl-clang-fast" VERSION " by <lszekeres@google.com> in %s mode\n",
+             ptr);
+    else
+      printf("afl-clang-lto" VERSION
+             "  by Marc \"vanHauser\" Heuse <mh@mh-sec.de> in %s mode\n",
+             ptr);
+
+    SAYF(
+        "\n"
+        "%s[++] [options]\n"
+        "\n"
+        "This is a helper application for afl-fuzz. It serves as a drop-in "
+        "replacement\n"
+        "for clang, letting you recompile third-party code with the "
+        "required "
+        "runtime\n"
+        "instrumentation. A common use pattern would be one of the "
+        "following:\n\n"
+
+        "  CC=%s/afl-clang-fast ./configure\n"
+        "  CXX=%s/afl-clang-fast++ ./configure\n\n"
+
+        "In contrast to the traditional afl-clang tool, this version is "
+        "implemented as\n"
+        "an LLVM pass and tends to offer improved performance with slow "
+        "programs.\n\n"
+
+        "Environment variables used:\n"
+        "AFL_CC: path to the C compiler to use\n"
+        "AFL_CXX: path to the C++ compiler to use\n"
+        "AFL_PATH: path to instrumenting pass and runtime "
+        "(afl-llvm-rt.*o)\n"
+        "AFL_DONT_OPTIMIZE: disable optimization instead of -O3\n"
+        "AFL_NO_BUILTIN: compile for use with libtokencap.so\n"
+        "AFL_INST_RATIO: percentage of branches to instrument\n"
+        "AFL_QUIET: suppress verbose output\n"
+        "AFL_DEBUG: enable developer debugging output\n"
+        "AFL_HARDEN: adds code hardening to catch memory bugs\n"
+        "AFL_USE_ASAN: activate address sanitizer\n"
+        "AFL_USE_MSAN: activate memory sanitizer\n"
+        "AFL_USE_UBSAN: activate undefined behaviour sanitizer\n"
+        "AFL_USE_CFISAN: activate control flow sanitizer\n"
+        "AFL_LLVM_WHITELIST: enable whitelisting (selective "
+        "instrumentation)\n"
+        "AFL_LLVM_NOT_ZERO: use cycling trace counters that skip zero\n"
+        "AFL_LLVM_LAF_SPLIT_COMPARES: enable cascaded comparisons\n"
+        "AFL_LLVM_LAF_SPLIT_SWITCHES: casc. comp. in 'switch'\n"
+        "AFL_LLVM_LAF_TRANSFORM_COMPARES: transform library comparison "
+        "function calls\n"
+        " to cascaded comparisons\n"
+        "AFL_LLVM_LAF_SPLIT_FLOATS: transform floating point comp. to "
+        "cascaded "
+        "comp.\n"
+        "AFL_LLVM_LAF_SPLIT_COMPARES_BITW: size limit (default 8)\n",
+        callname, BIN_PATH, BIN_PATH);
+
+    SAYF(
+        "\nafl-clang-fast specific environment variables:\n"
+        "AFL_LLVM_CMPLOG: log operands of comparisons (RedQueen mutator)\n"
+        "AFL_LLVM_INSTRUMENT: set instrumentation mode: DEFAULT, CFG "
+        "(INSTRIM), LTO, CTX, NGRAM-2 ... NGRAM-16\n"
+        "You can also use the old environment variables:"
+        "AFL_LLVM_CTX: use context sensitive coverage\n"
+        "AFL_LLVM_USE_TRACE_PC: use LLVM trace-pc-guard instrumentation\n"
+        "AFL_LLVM_NGRAM_SIZE: use ngram prev_loc count coverage\n"
+        "AFL_LLVM_INSTRIM: use light weight instrumentation InsTrim\n"
+        "AFL_LLVM_INSTRIM_LOOPHEAD: optimize loop tracing for speed (sub "
+        "option to INSTRIM)\n");
+
+#ifdef AFL_CLANG_FLTO
+    SAYF(
+        "\nafl-clang-lto specific environment variables:\n"
+        "AFL_LLVM_LTO_STARTID: from which ID to start counting from for a "
+        "bb\n"
+        "AFL_LLVM_LTO_DONTWRITEID: don't write the highest ID used to a "
+        "global var\n"
+        "AFL_REAL_LD: use this lld linker instead of the compiled in path\n"
+        "\nafl-clang-lto was built with linker target \"%s\" and LTO flags "
+        "\"%s\"\n"
+        "If anything fails - be sure to read README.lto.md!\n",
+        AFL_REAL_LD, AFL_CLANG_FLTO);
+#endif
+
+    SAYF(
+        "\nafl-clang-fast was built for llvm %s with the llvm binary path "
+        "of \"%s\".\n",
+        LLVM_VERSION, LLVM_BINDIR);
 
     SAYF("\n");
 
@@ -625,22 +770,17 @@ int main(int argc, char **argv, char **envp) {
 
              getenv("AFL_DEBUG") != NULL) {
 
-#ifdef USE_TRACE_PC
-    SAYF(cCYA "afl-clang-fast" VERSION cRST
-              " [tpcg] by <lszekeres@google.com>\n");
-#warning \
-    "You do not need to specifically compile with USE_TRACE_PC anymore, setting the environment variable AFL_LLVM_USE_TRACE_PC is enough."
-#else
-    if (strstr(argv[0], "afl-clang-lto") == NULL)
+    if (instrument_mode != INSTRUMENT_LTO)
 
-      SAYF(cCYA "afl-clang-fast" VERSION cRST " by <lszekeres@google.com>\n");
+      SAYF(cCYA "afl-clang-fast" VERSION cRST
+                " by <lszekeres@google.com> in %s mode\n",
+           ptr);
 
     else
 
       SAYF(cCYA "afl-clang-lto" VERSION cRST
-                " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n");
-
-#endif                                                     /* ^USE_TRACE_PC */
+                " by Marc \"vanHauser\" Heuse <mh@mh-sec.de> in mode %s\n",
+           ptr);
 
   }
 
@@ -663,7 +803,7 @@ int main(int argc, char **argv, char **envp) {
   find_obj(argv[0]);
 #endif
 
-  edit_params(argc, argv);
+  edit_params(argc, argv, envp);
 
   if (debug) {
 
diff --git a/llvm_mode/afl-ld.c b/llvm_mode/afl-ld.c
deleted file mode 100644
index eb46c85c..00000000
--- a/llvm_mode/afl-ld.c
+++ /dev/null
@@ -1,839 +0,0 @@
-/*
-  american fuzzy lop++ - wrapper for GNU ld
-  -----------------------------------------
-
-  Written by Marc Heuse <mh@mh-sec.de> for afl++
-
-  Maintained by Marc Heuse <mh@mh-sec.de>,
-                Heiko Eißfeldt <heiko.eissfeldt@hexco.de>
-                Andrea Fioraldi <andreafioraldi@gmail.com>
-                Dominik Maier <domenukk@gmail.com>
-
-  Copyright 2019-2020 AFLplusplus Project. All rights reserved.
-
-  Licensed under the Apache License, Version 2.0 (the "License");
-  you may not use this file except in compliance with the License.
-  You may obtain a copy of the License at:
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-  The sole purpose of this wrapper is to preprocess clang LTO files before
-  linking by ld and perform the instrumentation on the whole program.
-
-*/
-
-#define AFL_MAIN
-
-#include "config.h"
-#include "types.h"
-#include "debug.h"
-#include "alloc-inl.h"
-
-#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <ctype.h>
-#include <fcntl.h>
-
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <sys/time.h>
-
-#include <dirent.h>
-
-#define MAX_PARAM_COUNT 4096
-
-static u8 **ld_params,              /* Parameters passed to the real 'ld'   */
-    **link_params,                  /* Parameters passed to 'llvm-link'     */
-    **opt_params,                   /* Parameters passed to 'opt' opt       */
-    **inst_params;                  /* Parameters passed to 'opt' inst      */
-
-static u8 *input_file;              /* Originally specified input file      */
-static u8 *final_file,              /* Instrumented file for the real 'ld'  */
-    *linked_file,                   /* file where we link all files         */
-    *modified_file;                 /* file that was optimized before instr */
-static u8 *afl_path = AFL_PATH;
-static u8 *real_ld = AFL_REAL_LD;
-static u8  cwd[4096];
-static u8 *tmp_dir;
-static u8 *ar_dir;
-static u8  ar_dir_cnt;
-static u8 *libdirs[254];
-static u8  libdir_cnt;
-
-static u8 be_quiet,                 /* Quiet mode (no stderr output)        */
-    debug,                          /* AFL_DEBUG                            */
-    passthrough,                    /* AFL_LD_PASSTHROUGH - no link+optimize*/
-    we_link,                        /* we have bc/ll -> link + optimize     */
-    just_version;                   /* Just show version?                   */
-
-static u32 ld_param_cnt = 1,        /* Number of params to 'ld'             */
-    link_param_cnt = 1,             /* Number of params to 'llvm-link'      */
-    opt_param_cnt = 1,              /* Number of params to 'opt' opt        */
-    inst_param_cnt = 1;             /* Number of params to 'opt' instr      */
-
-/* This function wipes a directory - our AR unpack directory in this case */
-static u8 wipe_directory(u8 *path) {
-
-  DIR *          d;
-  struct dirent *d_ent;
-
-  d = opendir(path);
-
-  if (!d) return 0;
-
-  while ((d_ent = readdir(d))) {
-
-    if (strcmp(d_ent->d_name, ".") != 0 && strcmp(d_ent->d_name, "..") != 0) {
-
-      u8 *fname = alloc_printf("%s/%s", path, d_ent->d_name);
-      if (unlink(fname)) PFATAL("Unable to delete '%s'", fname);
-      ck_free(fname);
-
-    }
-
-  }
-
-  closedir(d);
-
-  return !!rmdir(path);
-
-}
-
-/* remove temporary files on fatal errors */
-static void at_exit_handler(void) {
-
-  if (!getenv("AFL_KEEP_ASSEMBLY")) {
-
-    if (linked_file) {
-
-      unlink(linked_file);
-      linked_file = NULL;
-
-    }
-
-    if (modified_file) {
-
-      unlink(modified_file);
-      modified_file = NULL;
-
-    }
-
-    if (final_file) {
-
-      unlink(final_file);
-      final_file = NULL;
-
-    }
-
-    if (ar_dir != NULL) {
-
-      wipe_directory(ar_dir);
-      ar_dir = NULL;
-
-    }
-
-  }
-
-}
-
-/* This function checks if the parameter is a) an existing file and b)
-   if it is a BC or LL file, if both are true it returns 1 and 0 otherwise */
-int is_llvm_file(const char *file) {
-
-  int fd;
-  u8  buf[5];
-
-  if ((fd = open(file, O_RDONLY)) < 0) {
-
-    if (debug) SAYF(cMGN "[D] " cRST "File %s not found", file);
-    return 0;
-
-  }
-
-  if (read(fd, buf, 4) != 4) return 0;
-  buf[sizeof(buf) - 1] = 0;
-
-  close(fd);
-
-  if (strncmp(buf, "; Mo", 4) == 0) return 1;
-
-  if (buf[0] == 'B' && buf[1] == 'C' && buf[2] == 0xc0 && buf[3] == 0xde)
-    return 1;
-
-  return 0;
-
-}
-
-/* Return the current working directory, not thread safe ;-) */
-u8 *getthecwd() {
-
-  static u8 fail[] = "";
-  if (getcwd(cwd, sizeof(cwd)) == NULL) return fail;
-  return cwd;
-
-}
-
-/* Check if an ar extracted file is already in the parameter list */
-int is_duplicate(u8 **params, u32 ld_param_cnt, u8 *ar_file) {
-
-  for (uint32_t i = 0; i < ld_param_cnt; i++)
-    if (params[i] != NULL)
-      if (strcmp(params[i], ar_file) == 0) return 1;
-
-  return 0;
-
-}
-
-/* Examine and modify parameters to pass to 'ld', 'llvm-link' and 'llmv-ar'.
-   Note that the file name is always the last parameter passed by GCC,
-   so we exploit this property to keep the code "simple". */
-static void edit_params(int argc, char **argv) {
-
-  u32 i, have_lto = 0, libdir_index;
-  u8  libdir_file[4096];
-
-  if (tmp_dir == NULL) {
-
-    tmp_dir = getenv("TMPDIR");
-    if (!tmp_dir) tmp_dir = getenv("TEMP");
-    if (!tmp_dir) tmp_dir = getenv("TMP");
-    if (!tmp_dir) tmp_dir = "/tmp";
-
-  }
-
-  linked_file =
-      alloc_printf("%s/.afl-%u-%u-1.ll", tmp_dir, getpid(), (u32)time(NULL));
-  modified_file =
-      alloc_printf("%s/.afl-%u-%u-2.bc", tmp_dir, getpid(), (u32)time(NULL));
-  final_file =
-      alloc_printf("%s/.afl-%u-%u-3.bc", tmp_dir, getpid(), (u32)time(NULL));
-
-  ld_params = ck_alloc(4096 * sizeof(u8 *));
-  link_params = ck_alloc(4096 * sizeof(u8 *));
-  inst_params = ck_alloc(12 * sizeof(u8 *));
-  opt_params = ck_alloc(12 * sizeof(u8 *));
-
-  ld_params[0] = (u8 *)real_ld;
-  ld_params[ld_param_cnt++] = "--allow-multiple-definition";
-
-  link_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "llvm-link");
-  link_params[link_param_cnt++] = "-S";  // we create the linked file as .ll
-  link_params[link_param_cnt++] = "-o";
-  link_params[link_param_cnt++] = linked_file;
-
-  opt_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "opt");
-  if (getenv("AFL_DONT_OPTIMIZE") == NULL)
-    opt_params[opt_param_cnt++] = "-O3";
-  else
-    opt_params[opt_param_cnt++] = "-O0";
-
-  // opt_params[opt_param_cnt++] = "-S"; // only when debugging
-  opt_params[opt_param_cnt++] = linked_file;  // input: .ll file
-  opt_params[opt_param_cnt++] = "-o";
-  opt_params[opt_param_cnt++] = modified_file;  // output: .bc file
-
-  inst_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "opt");
-  inst_params[inst_param_cnt++] =
-      alloc_printf("--load=%s/afl-llvm-lto-instrumentation.so", afl_path);
-  // inst_params[inst_param_cnt++] = "-S"; // only when debugging
-  inst_params[inst_param_cnt++] = "--disable-opt";
-  inst_params[inst_param_cnt++] = "--afl-lto";
-  inst_params[inst_param_cnt++] = modified_file;  // input: .bc file
-  inst_params[inst_param_cnt++] = "-o";
-  inst_params[inst_param_cnt++] = final_file;  // output: .bc file
-
-  // first we must collect all library search paths
-  for (i = 1; i < argc; i++)
-    if (strlen(argv[i]) > 2 && argv[i][0] == '-' && argv[i][1] == 'L')
-      libdirs[libdir_cnt++] = argv[i] + 2;
-
-  // then we inspect all options to the target linker
-  for (i = 1; i < argc; i++) {
-
-    if (ld_param_cnt >= MAX_PARAM_COUNT || link_param_cnt >= MAX_PARAM_COUNT)
-      FATAL(
-          "Too many command line parameters because of unpacking .a archives, "
-          "this would need to be done by hand ... sorry! :-(");
-
-    if (strncmp(argv[i], "-flto", 5) == 0) have_lto = 1;
-
-    if (!strcmp(argv[i], "-version")) {
-
-      just_version = 1;
-      ld_params[1] = argv[i];
-      ld_params[2] = NULL;
-      final_file = input_file;
-      return;
-
-    }
-
-    if (strcmp(argv[i], "--afl") == 0) {
-
-      if (!be_quiet) OKF("afl++ test command line flag detected, exiting.");
-      exit(0);
-
-    }
-
-    // if a -l library is linked and no .so is found but an .a archive is there
-    // then the archive will be used. So we have to emulate this and check
-    // if an archive will be used and if yes we will instrument it too
-    libdir_file[0] = 0;
-    libdir_index = libdir_cnt;
-    if (strncmp(argv[i], "-l", 2) == 0 && libdir_cnt > 0 &&
-        strncmp(argv[i], "-lgcc", 5) != 0) {
-
-      u8 found = 0;
-
-      for (uint32_t j = 0; j < libdir_cnt && !found; j++) {
-
-        snprintf(libdir_file, sizeof(libdir_file), "%s/lib%s%s", libdirs[j],
-                 argv[i] + 2, ".so");
-        if (access(libdir_file, R_OK) != 0) {  // no .so found?
-
-          snprintf(libdir_file, sizeof(libdir_file), "%s/lib%s%s", libdirs[j],
-                   argv[i] + 2, ".a");
-          if (access(libdir_file, R_OK) == 0) {  // but .a found?
-
-            libdir_index = j;
-            found = 1;
-            if (debug) SAYF(cMGN "[D] " cRST "Found %s\n", libdir_file);
-
-          }
-
-        } else {
-
-          found = 1;
-          if (debug) SAYF(cMGN "[D] " cRST "Found %s\n", libdir_file);
-
-        }
-
-      }
-
-    }
-
-    // is the parameter an .a AR archive? If so, unpack and check its files
-    if (libdir_index < libdir_cnt ||
-        (argv[i][0] != '-' && strlen(argv[i]) > 2 &&
-         argv[i][strlen(argv[i]) - 1] == 'a' &&
-         argv[i][strlen(argv[i]) - 2] == '.')) {
-
-      // This gets a bit odd. I encountered several .a files being linked and
-      // where the same "foo.o" was in both .a archives. llvm-link does not
-      // like this so we have to work around that ...
-
-      u8             this_wd[4096], *this_ar;
-      u8             ar_params_cnt = 4;
-      u8 *           ar_params[ar_params_cnt];
-      u8 *           file = argv[i];
-      s32            pid, status;
-      DIR *          arx;
-      struct dirent *dir_ent;
-
-      if (libdir_index < libdir_cnt) file = libdir_file;
-
-      if (ar_dir_cnt == 0) {  // first archive, we setup up the basics
-
-        ar_dir = alloc_printf("%s/.afl-%u-%u.dir", tmp_dir, getpid(),
-                              (u32)time(NULL));
-        if (mkdir(ar_dir, 0700) != 0)
-          FATAL("can not create temporary directory %s", ar_dir);
-
-      }
-
-      if (getcwd(this_wd, sizeof(this_wd)) == NULL)
-        FATAL("can not get the current working directory");
-      if (chdir(ar_dir) != 0)
-        FATAL("can not chdir to temporary directory %s", ar_dir);
-      if (file[0] == '/')
-        this_ar = file;
-      else
-        this_ar = alloc_printf("%s/%s", this_wd, file);
-      ar_params[0] = alloc_printf("%s/%s", LLVM_BINDIR, "llvm-ar");
-      ar_params[1] = "x";
-      ar_params[2] = this_ar;
-      ar_params[3] = NULL;
-
-      if (!be_quiet) OKF("Running ar unpacker on %s into %s", this_ar, ar_dir);
-
-      if (debug) {
-
-        SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd());
-        for (uint32_t j = 0; j < ar_params_cnt; j++)
-          SAYF(" \"%s\"", ar_params[j]);
-        SAYF("\n");
-
-      }
-
-      if (!(pid = fork())) {
-
-        execvp(ar_params[0], (char **)ar_params);
-        FATAL("Oops, failed to execute '%s'", ar_params[0]);
-
-      }
-
-      if (pid < 0) FATAL("fork() failed");
-      if (waitpid(pid, &status, 0) <= 0) FATAL("waitpid() failed");
-      if (WEXITSTATUS(status) != 0) exit(WEXITSTATUS(status));
-
-      if (chdir(this_wd) != 0)
-        FATAL("can not chdir back to our working directory %s", this_wd);
-
-      if (!(arx = opendir(ar_dir))) FATAL("can not open directory %s", ar_dir);
-
-      while ((dir_ent = readdir(arx)) != NULL) {
-
-        u8 *ar_file = alloc_printf("%s/%s", ar_dir, dir_ent->d_name);
-
-        if (dir_ent->d_name[strlen(dir_ent->d_name) - 1] == 'o' &&
-            dir_ent->d_name[strlen(dir_ent->d_name) - 2] == '.') {
-
-          if (passthrough || is_llvm_file(ar_file) == 0) {
-
-            if (is_duplicate(ld_params, ld_param_cnt, ar_file) == 0) {
-
-              ld_params[ld_param_cnt++] = ar_file;
-              if (debug)
-                SAYF(cMGN "[D] " cRST "not a LTO link file: %s\n", ar_file);
-
-            }
-
-          } else {
-
-            if (is_duplicate(link_params, link_param_cnt, ar_file) == 0) {
-
-              if (we_link == 0) {  // we have to honor order ...
-
-                ld_params[ld_param_cnt++] = final_file;
-                we_link = 1;
-
-              }
-
-              link_params[link_param_cnt++] = ar_file;
-              if (debug) SAYF(cMGN "[D] " cRST "is a link file: %s\n", ar_file);
-
-            }
-
-          }
-
-        } else
-
-            if (dir_ent->d_name[0] != '.' && !be_quiet)
-          WARNF("Unusual file found in ar archive %s: %s", argv[i], ar_file);
-
-      }
-
-      closedir(arx);
-      ar_dir_cnt++;
-
-      continue;
-
-    }
-
-    if (passthrough || argv[i][0] == '-' || is_llvm_file(argv[i]) == 0) {
-
-      // -O3 fucks up the CFG and instrumentation, so we downgrade to O2
-      // which is as we want things. Lets hope this is not too different
-      // in the various llvm versions!
-      if (strncmp(argv[i], "-plugin-opt=O", 13) == 0 &&
-          !getenv("AFL_DONT_OPTIMIZE"))
-        ld_params[ld_param_cnt++] = "-plugin-opt=O2";
-      else
-        ld_params[ld_param_cnt++] = argv[i];
-
-    } else {
-
-      if (we_link == 0) {  // we have to honor order ...
-        ld_params[ld_param_cnt++] = final_file;
-        we_link = 1;
-
-      }
-
-      link_params[link_param_cnt++] = argv[i];
-
-    }
-
-  }
-
-  // if (have_lto == 0) ld_params[ld_param_cnt++] = AFL_CLANG_FLTO; // maybe we
-  // should not ...
-  ld_params[ld_param_cnt] = NULL;
-  link_params[link_param_cnt] = NULL;
-  opt_params[opt_param_cnt] = NULL;
-  inst_params[inst_param_cnt] = NULL;
-
-}
-
-/* clean AFL_PATH from PATH */
-
-void clean_path() {
-
-  char *tmp, *newpath = NULL, *path = getenv("PATH");
-  u8    done = 0;
-
-  if (debug)
-    SAYF(cMGN "[D]" cRST " old PATH=%s, AFL_PATH=%s\n", path, AFL_PATH);
-
-  // wipe AFL paths from PATH that we set
-  // we added two paths so we remove the two paths
-  while (!done) {
-
-    if (*path == 0)
-      done = 1;
-    else if (*path++ == ':')
-      done = 1;
-
-  }
-
-  while (*path == ':')
-    path++;
-
-  // AFL_PATH could be additionally in PATH so check and remove to not call our
-  // 'ld'
-  const size_t pathlen = strlen(path);
-  const size_t afl_pathlen = strlen(AFL_PATH);
-  newpath = malloc(pathlen + 1);
-  if (strcmp(AFL_PATH, "/bin") != 0 && strcmp(AFL_PATH, "/usr/bin") != 0 &&
-      afl_pathlen > 1 && (tmp = strstr(path, AFL_PATH)) != NULL &&  // it exists
-      (tmp == path ||
-       (tmp > path &&
-        tmp[-1] == ':')) &&  // either starts with it or has a colon before
-      (tmp + afl_pathlen == path + pathlen ||
-       (tmp + afl_pathlen <
-        path + (pathlen && tmp[afl_pathlen] ==
-                               ':'))  // end with it or has a colon at the end
-       )) {
-
-    int one_colon = 1;
-
-    if (tmp > path) {
-
-      memcpy(newpath, path, tmp - path);
-      newpath[tmp - path - 1] = 0;  // remove ':'
-      one_colon = 0;
-
-    }
-
-    if (tmp + afl_pathlen < path + pathlen) tmp += afl_pathlen + one_colon;
-
-    setenv("PATH", newpath, 1);
-
-  } else
-
-    setenv("PATH", path, 1);
-
-  if (debug) SAYF(cMGN "[D]" cRST " new PATH=%s\n", getenv("PATH"));
-  free(newpath);
-
-}
-
-/* Main entry point */
-
-int main(int argc, char **argv) {
-
-  s32 pid, i;
-  int status;
-  u8 *ptr, exe[4096], exe2[4096], proc[32], val[2] = " ";
-  int have_afl_ld_caller = 0;
-
-  if (isatty(2) && !getenv("AFL_QUIET") && !getenv("AFL_DEBUG")) {
-
-    if (getenv("AFL_LD") != NULL)
-      SAYF(cCYA "afl-ld" VERSION cRST
-                " by Marc \"vanHauser\" Heuse <mh@mh-sec.de> (level %d)\n",
-           have_afl_ld_caller);
-
-  } else
-
-    be_quiet = 1;
-
-  if (getenv("AFL_DEBUG") != NULL) debug = 1;
-  if (getenv("AFL_PATH") != NULL) afl_path = getenv("AFL_PATH");
-  if (getenv("AFL_LD_PASSTHROUGH") != NULL) passthrough = 1;
-  if (getenv("AFL_REAL_LD") != NULL) real_ld = getenv("AFL_REAL_LD");
-  if (real_ld == NULL || strlen(real_ld) < 2) real_ld = "/bin/ld";
-  if (real_ld != NULL && real_ld[0] != '/')
-    real_ld = alloc_printf("/bin/%s", real_ld);
-
-  if ((ptr = getenv("AFL_LD_CALLER")) != NULL) have_afl_ld_caller = atoi(ptr);
-  val[0] = 0x31 + have_afl_ld_caller;
-  setenv("AFL_LD_CALLER", val, 1);
-
-  if (debug) {
-
-    SAYF(cMGN "[D] " cRST
-              "AFL_LD=%s, set AFL_LD_CALLER=%s, have_afl_ld_caller=%d, "
-              "real_ld=%s\n",
-         getenv("AFL_LD"), val, have_afl_ld_caller, real_ld);
-    SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd());
-    for (i = 0; i < argc; i++)
-      SAYF(" \"%s\"", argv[i]);
-    SAYF("\n");
-
-  }
-
-  sprintf(proc, "/proc/%d/exe", getpid());
-  if (readlink(proc, exe, sizeof(exe) - 1) > 0) {
-
-    if (readlink(real_ld, exe2, sizeof(exe2) - 1) < 1) exe2[0] = 0;
-    exe[sizeof(exe) - 1] = 0;
-    exe[sizeof(exe2) - 1] = 0;
-    if (strcmp(exe, real_ld) == 0 || strcmp(exe, exe2) == 0)
-      PFATAL(cLRD "[!] " cRST
-                  "Error: real 'ld' path points to afl-ld, set AFL_REAL_LD to "
-                  "the real 'ld' program!");
-
-  }
-
-  if (have_afl_ld_caller > 1)
-    PFATAL(cLRD "[!] " cRST
-                "Error: afl-ld calls itself in a loop, set AFL_REAL_LD to the "
-                "real 'ld' program!");
-
-  if (argc < 2) {
-
-    SAYF(
-        "\n"
-        "This is a helper application for afl-fuzz. It is a wrapper around GNU "
-        "'ld',\n"
-        "executed by the toolchain whenever using "
-        "afl-clang-lto/afl-clang-lto++.\n"
-        "You probably don't want to run this program directly.\n\n"
-
-        "Environment variables:\n"
-        "  AFL_LD_PASSTHROUGH   do not link+optimize == no instrumentation\n"
-        "  AFL_REAL_LD          point to the real ld if necessary\n"
-
-        "\nafl-ld was compiled with the fixed real 'ld' path of %s and the "
-        "clang "
-        "bin path of %s\n\n",
-        real_ld, LLVM_BINDIR);
-
-    exit(1);
-
-  }
-
-  if (getenv("AFL_LD") == NULL) {
-
-    /* if someone install clang/ld into the same directory as afl++ then
-       they are out of luck ... */
-
-    if (have_afl_ld_caller == 1) { clean_path(); }
-
-    if (real_ld != NULL && strlen(real_ld) > 1) execvp(real_ld, argv);
-    execvp("ld", argv);  // fallback
-    PFATAL("Oops, failed to execute 'ld' - check your PATH");
-
-  }
-
-  atexit(at_exit_handler);  // ensure to wipe temp files if things fail
-
-  edit_params(argc, argv);  // here most of the magic happens :-)
-
-  if (debug)
-    SAYF(cMGN "[D] " cRST
-              "param counts: ar:%u lib:%u ld:%u link:%u opt:%u instr:%u\n",
-         ar_dir_cnt, libdir_cnt, ld_param_cnt, link_param_cnt, opt_param_cnt,
-         inst_param_cnt);
-
-  if (!just_version) {
-
-    if (we_link == 0) {
-
-      if (!getenv("AFL_QUIET"))
-        WARNF("No LTO input file found, cannot instrument!");
-
-    } else {
-
-      /* first we link all files */
-      if (!be_quiet) OKF("Running bitcode linker, creating %s", linked_file);
-
-      if (debug) {
-
-        SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd());
-        for (i = 0; i < link_param_cnt; i++)
-          SAYF(" \"%s\"", link_params[i]);
-        SAYF("\n");
-
-      }
-
-      if (!(pid = fork())) {
-
-        execvp(link_params[0], (char **)link_params);
-        FATAL("Oops, failed to execute '%s'", link_params[0]);
-
-      }
-
-      if (pid < 0) PFATAL("fork() failed");
-      if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
-      if (WEXITSTATUS(status) != 0) {
-
-        SAYF(bSTOP RESET_G1 CURSOR_SHOW cRST cLRD
-             "\n[-] PROGRAM ABORT : " cRST);
-        SAYF(
-            "llvm-link failed! Probable causes:\n\n"
-            " #1  If the error is \"linking globals named '...': symbol "
-            "multiply defined\"\n"
-            "     then there is nothing we can do - llvm-link is missing an "
-            "important feature\n\n"
-            " #2  If the error is \"expected top-level entity\" and then "
-            "binary output, this\n"
-            "     is because the same file is present in different .a archives "
-            "in different\n"
-            "     formats. This can be fixed by manual doing the steps afl-ld "
-            "is doing but\n"
-            "     programmatically - sorry!\n\n");
-        exit(WEXITSTATUS(status));
-
-      }
-
-      /* then we perform an optimization on the collected objects files */
-      if (!be_quiet)
-        OKF("Performing optimization via opt, creating %s", modified_file);
-      if (debug) {
-
-        SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd());
-        for (i = 0; i < opt_param_cnt; i++)
-          SAYF(" \"%s\"", opt_params[i]);
-        SAYF("\n");
-
-      }
-
-      if (!(pid = fork())) {
-
-        execvp(opt_params[0], (char **)opt_params);
-        FATAL("Oops, failed to execute '%s'", opt_params[0]);
-
-      }
-
-      if (pid < 0) PFATAL("fork() failed");
-      if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
-      if (WEXITSTATUS(status) != 0) exit(WEXITSTATUS(status));
-
-      /* then we run the instrumentation through the optimizer */
-      if (!be_quiet)
-        OKF("Performing instrumentation via opt, creating %s", final_file);
-      if (debug) {
-
-        SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd());
-        for (i = 0; i < inst_param_cnt; i++)
-          SAYF(" \"%s\"", inst_params[i]);
-        SAYF("\n");
-
-      }
-
-      if (!(pid = fork())) {
-
-        execvp(inst_params[0], (char **)inst_params);
-        FATAL("Oops, failed to execute '%s'", inst_params[0]);
-
-      }
-
-      if (pid < 0) PFATAL("fork() failed");
-      if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
-      if (WEXITSTATUS(status) != 0) exit(WEXITSTATUS(status));
-
-    }
-
-    /* next step - run the linker! :-) */
-
-  }
-
-  if (!be_quiet) OKF("Running real linker %s", real_ld);
-  if (debug) {
-
-    SAYF(cMGN "[D]" cRST " cd \"%s\";", getthecwd());
-    for (i = 0; i < ld_param_cnt; i++)
-      SAYF(" \"%s\"", ld_params[i]);
-    SAYF("\n");
-
-  }
-
-  if (!(pid = fork())) {
-
-    clean_path();
-
-    unsetenv("AFL_LD");
-
-    if (strlen(real_ld) > 1) execvp(real_ld, (char **)ld_params);
-    execvp("ld", (char **)ld_params);  // fallback
-    FATAL("Oops, failed to execute 'ld' - check your PATH");
-
-  }
-
-  if (pid < 0) PFATAL("fork() failed");
-
-  if (waitpid(pid, &status, 0) <= 0) PFATAL("waitpid() failed");
-  if (debug) SAYF(cMGN "[D] " cRST "linker result: %d\n", status);
-
-  if (!just_version) {
-
-    if (!getenv("AFL_KEEP_ASSEMBLY")) {
-
-      if (linked_file) {
-
-        unlink(linked_file);
-        linked_file = NULL;
-
-      }
-
-      if (modified_file) {
-
-        unlink(modified_file);
-        modified_file = NULL;
-
-      }
-
-      if (final_file) {
-
-        unlink(final_file);
-        final_file = NULL;
-
-      }
-
-      if (ar_dir != NULL) {
-
-        wipe_directory(ar_dir);
-        ar_dir = NULL;
-
-      }
-
-    } else {
-
-      if (!be_quiet) {
-
-        SAYF(
-            "[!] afl-ld: keeping link file %s, optimized bitcode %s and "
-            "instrumented bitcode %s",
-            linked_file, modified_file, final_file);
-        if (ar_dir_cnt > 0 && ar_dir)
-          SAYF(" and ar archive unpack directory %s", ar_dir);
-        SAYF("\n");
-
-      }
-
-    }
-
-    if (status == 0) {
-
-      if (!be_quiet) OKF("Linker was successful");
-
-    } else {
-
-      SAYF(cLRD "[-] " cRST
-                "Linker failed, please investigate and send a bug report. Most "
-                "likely an 'ld' option is incompatible with %s. Try "
-                "AFL_KEEP_ASSEMBLY=1 and AFL_DEBUG=1 for replaying.\n",
-           AFL_CLANG_FLTO);
-
-    }
-
-  }
-
-  exit(WEXITSTATUS(status));
-
-}
-
diff --git a/llvm_mode/afl-llvm-lto-instrumentation.so.cc b/llvm_mode/afl-llvm-lto-instrumentation.so.cc
index febb8950..c5e7a2b7 100644
--- a/llvm_mode/afl-llvm-lto-instrumentation.so.cc
+++ b/llvm_mode/afl-llvm-lto-instrumentation.so.cc
@@ -23,12 +23,6 @@
 
  */
 
-// CONFIG OPTION:
-// If #define USE_SPLIT is used, then the llvm::SplitEdge function is used
-// instead of our own implementation. Ours looks better and will
-// compile everywhere. But it is not working for complex code. yet. damn.
-#define USE_SPLIT
-
 #define AFL_LLVM_PASS
 
 #include "config.h"
@@ -44,31 +38,24 @@
 #include <sys/time.h>
 
 #include "llvm/Config/llvm-config.h"
-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 5
-typedef long double max_align_t;
-#endif
-
+#include "llvm/ADT/Statistic.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
-
-#ifdef USE_SPLIT
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
-#endif
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Pass.h"
 
-#if LLVM_VERSION_MAJOR > 3 || \
-    (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 4)
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/CFG.h"
-#else
-#include "llvm/DebugInfo.h"
-#include "llvm/Support/CFG.h"
-#endif
+#include <set>
 
 using namespace llvm;
 
@@ -91,7 +78,6 @@ class AFLLTOPass : public ModulePass {
 
   }
 
-#ifdef USE_SPLIT
   void getAnalysisUsage(AnalysisUsage &AU) const override {
 
     ModulePass::getAnalysisUsage(AU);
@@ -100,8 +86,6 @@ class AFLLTOPass : public ModulePass {
 
   }
 
-#endif
-
   // Calculate the number of average collisions that would occur if all
   // location IDs would be assigned randomly (like normal afl/afl++).
   // This uses the "balls in bins" algorithm.
@@ -168,7 +152,7 @@ class AFLLTOPass : public ModulePass {
   bool runOnModule(Module &M) override;
 
  protected:
-  int      afl_global_id = 1, debug = 0;
+  int      afl_global_id = 1, debug = 0, autodictionary = 0;
   uint32_t be_quiet = 0, inst_blocks = 0, inst_funcs = 0, total_instr = 0;
 
 };
@@ -177,22 +161,19 @@ class AFLLTOPass : public ModulePass {
 
 bool AFLLTOPass::runOnModule(Module &M) {
 
-  LLVMContext &C = M.getContext();
+  LLVMContext &                    C = M.getContext();
+  std::vector<std::string>         dictionary;
+  std::vector<CallInst *>          calls;
+  DenseMap<Value *, std::string *> valueMap;
 
-  IntegerType *   Int8Ty = IntegerType::getInt8Ty(C);
-  IntegerType *   Int32Ty = IntegerType::getInt32Ty(C);
-  struct timeval  tv;
-  struct timezone tz;
-  u32             rand_seed;
+  IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
+  IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
 
-  /* Setup random() so we get Actually Random(TM) outputs from AFL_R() */
-  gettimeofday(&tv, &tz);
-  rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
-  AFL_SR(rand_seed);
+  if (getenv("AFL_DEBUG")) debug = 1;
 
   /* Show a banner */
 
-  if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) {
+  if ((isatty(2) && !getenv("AFL_QUIET")) || debug) {
 
     SAYF(cCYA "afl-llvm-lto" VERSION cRST
               " by Marc \"vanHauser\" Heuse <mh@mh-sec.de>\n");
@@ -201,9 +182,9 @@ bool AFLLTOPass::runOnModule(Module &M) {
 
     be_quiet = 1;
 
-#if LLVM_VERSION_MAJOR < 9
-  char *neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO");
-#endif
+  if (getenv("AFL_LLVM_AUTODICTIONARY") ||
+      getenv("AFL_LLVM_LTO_AUTODICTIONARY"))
+    autodictionary = 1;
 
   /* Get globals for the SHM region and the previous location. Note that
      __afl_prev_loc is thread-local. */
@@ -224,14 +205,320 @@ bool AFLLTOPass::runOnModule(Module &M) {
     if (F.size() < 2) continue;
     if (isBlacklisted(&F)) continue;
 
-#ifdef USE_SPLIT
-      // DominatorTree &DT =
-      // getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); LoopInfo & LI =
-      // getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo();
-#endif
-
     std::vector<BasicBlock *> InsBlocks;
 
+    if (autodictionary) {
+
+      /*  Some implementation notes.
+       *
+       *  We try to handle 3 cases:
+       *  - memcmp("foo", arg, 3) <- literal string
+       *  - static char globalvar[] = "foo";
+       *    memcmp(globalvar, arg, 3) <- global variable
+       *  - char localvar[] = "foo";
+       *    memcmp(locallvar, arg, 3) <- local variable
+       *
+       *  The local variable case is the hardest. We can only detect that
+       *  case if there is no reassignment or change in the variable.
+       *  And it might not work across llvm version.
+       *  What we do is hooking the initializer function for local variables
+       *  (llvm.memcpy.p0i8.p0i8.i64) and note the string and the assigned
+       *  variable. And if that variable is then used in a compare function
+       *  we use that noted string.
+       *  This seems not to work for tokens that have a size <= 4 :-(
+       *
+       *  - if the compared length is smaller than the string length we
+       *    save the full string. This is likely better for fuzzing but
+       *    might be wrong in a few cases depending on optimizers
+       *
+       *  - not using StringRef because there is a bug in the llvm 11
+       *    checkout I am using which sometimes points to wrong strings
+       *
+       *  Over and out. Took me a full day. damn. mh/vh
+       */
+
+      for (auto &BB : F) {
+
+        for (auto &IN : BB) {
+
+          CallInst *callInst = nullptr;
+
+          if ((callInst = dyn_cast<CallInst>(&IN))) {
+
+            bool    isStrcmp = true;
+            bool    isMemcmp = true;
+            bool    isStrncmp = true;
+            bool    isStrcasecmp = true;
+            bool    isStrncasecmp = true;
+            bool    isIntMemcpy = true;
+            bool    addedNull = false;
+            uint8_t optLen = 0;
+
+            Function *Callee = callInst->getCalledFunction();
+            if (!Callee) continue;
+            if (callInst->getCallingConv() != llvm::CallingConv::C) continue;
+            std::string FuncName = Callee->getName().str();
+            isStrcmp &= !FuncName.compare("strcmp");
+            isMemcmp &= !FuncName.compare("memcmp");
+            isStrncmp &= !FuncName.compare("strncmp");
+            isStrcasecmp &= !FuncName.compare("strcasecmp");
+            isStrncasecmp &= !FuncName.compare("strncasecmp");
+            isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64");
+
+            if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
+                !isStrncasecmp && !isIntMemcpy)
+              continue;
+
+            /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function
+             * prototype */
+            FunctionType *FT = Callee->getFunctionType();
+
+            isStrcmp &= FT->getNumParams() == 2 &&
+                        FT->getReturnType()->isIntegerTy(32) &&
+                        FT->getParamType(0) == FT->getParamType(1) &&
+                        FT->getParamType(0) ==
+                            IntegerType::getInt8PtrTy(M.getContext());
+            isStrcasecmp &= FT->getNumParams() == 2 &&
+                            FT->getReturnType()->isIntegerTy(32) &&
+                            FT->getParamType(0) == FT->getParamType(1) &&
+                            FT->getParamType(0) ==
+                                IntegerType::getInt8PtrTy(M.getContext());
+            isMemcmp &= FT->getNumParams() == 3 &&
+                        FT->getReturnType()->isIntegerTy(32) &&
+                        FT->getParamType(0)->isPointerTy() &&
+                        FT->getParamType(1)->isPointerTy() &&
+                        FT->getParamType(2)->isIntegerTy();
+            isStrncmp &= FT->getNumParams() == 3 &&
+                         FT->getReturnType()->isIntegerTy(32) &&
+                         FT->getParamType(0) == FT->getParamType(1) &&
+                         FT->getParamType(0) ==
+                             IntegerType::getInt8PtrTy(M.getContext()) &&
+                         FT->getParamType(2)->isIntegerTy();
+            isStrncasecmp &= FT->getNumParams() == 3 &&
+                             FT->getReturnType()->isIntegerTy(32) &&
+                             FT->getParamType(0) == FT->getParamType(1) &&
+                             FT->getParamType(0) ==
+                                 IntegerType::getInt8PtrTy(M.getContext()) &&
+                             FT->getParamType(2)->isIntegerTy();
+
+            if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
+                !isStrncasecmp && !isIntMemcpy)
+              continue;
+
+            /* is a str{n,}{case,}cmp/memcmp, check if we have
+             * str{case,}cmp(x, "const") or str{case,}cmp("const", x)
+             * strn{case,}cmp(x, "const", ..) or strn{case,}cmp("const", x, ..)
+             * memcmp(x, "const", ..) or memcmp("const", x, ..) */
+            Value *Str1P = callInst->getArgOperand(0),
+                  *Str2P = callInst->getArgOperand(1);
+            std::string Str1, Str2;
+            StringRef   TmpStr;
+            bool        HasStr1 = getConstantStringInfo(Str1P, TmpStr);
+            if (TmpStr.empty())
+              HasStr1 = false;
+            else
+              Str1 = TmpStr.str();
+            bool HasStr2 = getConstantStringInfo(Str2P, TmpStr);
+            if (TmpStr.empty())
+              HasStr2 = false;
+            else
+              Str2 = TmpStr.str();
+
+            if (debug)
+              fprintf(stderr, "F:%s %p(%s)->\"%s\"(%s) %p(%s)->\"%s\"(%s)\n",
+                      FuncName.c_str(), Str1P, Str1P->getName().str().c_str(),
+                      Str1.c_str(), HasStr1 == true ? "true" : "false", Str2P,
+                      Str2P->getName().str().c_str(), Str2.c_str(),
+                      HasStr2 == true ? "true" : "false");
+
+            // we handle the 2nd parameter first because of llvm memcpy
+            if (!HasStr2) {
+
+              auto *Ptr = dyn_cast<ConstantExpr>(Str2P);
+              if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+                if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+                  if (auto *Array =
+                          dyn_cast<ConstantDataArray>(Var->getInitializer())) {
+
+                    HasStr2 = true;
+                    Str2 = Array->getAsString().str();
+
+                  }
+
+                }
+
+              }
+
+            }
+
+            // for the internal memcpy routine we only care for the second
+            // parameter and are not reporting anything.
+            if (isIntMemcpy == true) {
+
+              if (HasStr2 == true) {
+
+                Value *      op2 = callInst->getArgOperand(2);
+                ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+                if (ilen) {
+
+                  uint64_t literalLength = Str2.size();
+                  uint64_t optLength = ilen->getZExtValue();
+                  if (literalLength + 1 == optLength) {
+
+                    Str2.append("\0", 1);  // add null byte
+                    addedNull = true;
+
+                  }
+
+                }
+
+                valueMap[Str1P] = new std::string(Str2);
+
+                if (debug)
+                  fprintf(stderr, "Saved: %s for %p\n", Str2.c_str(), Str1P);
+                continue;
+
+              }
+
+              continue;
+
+            }
+
+            // Neither a literal nor a global variable?
+            // maybe it is a local variable that we saved
+            if (!HasStr2) {
+
+              std::string *strng = valueMap[Str2P];
+              if (strng && !strng->empty()) {
+
+                Str2 = *strng;
+                HasStr2 = true;
+                if (debug)
+                  fprintf(stderr, "Filled2: %s for %p\n", strng->c_str(),
+                          Str2P);
+
+              }
+
+            }
+
+            if (!HasStr1) {
+
+              auto Ptr = dyn_cast<ConstantExpr>(Str1P);
+
+              if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+                if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+                  if (auto *Array =
+                          dyn_cast<ConstantDataArray>(Var->getInitializer())) {
+
+                    HasStr1 = true;
+                    Str1 = Array->getAsString().str();
+
+                  }
+
+                }
+
+              }
+
+            }
+
+            // Neither a literal nor a global variable?
+            // maybe it is a local variable that we saved
+            if (!HasStr1) {
+
+              std::string *strng = valueMap[Str1P];
+              if (strng && !strng->empty()) {
+
+                Str1 = *strng;
+                HasStr1 = true;
+                if (debug)
+                  fprintf(stderr, "Filled1: %s for %p\n", strng->c_str(),
+                          Str1P);
+
+              }
+
+            }
+
+            /* handle cases of one string is const, one string is variable */
+            if (!(HasStr1 ^ HasStr2)) continue;
+
+            std::string thestring;
+
+            if (HasStr1)
+              thestring = Str1;
+            else
+              thestring = Str2;
+
+            optLen = thestring.length();
+
+            if (isMemcmp || isStrncmp || isStrncasecmp) {
+
+              Value *      op2 = callInst->getArgOperand(2);
+              ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+              if (ilen) {
+
+                uint64_t literalLength = optLen;
+                optLen = ilen->getZExtValue();
+                if (literalLength + 1 == optLen) {  // add null byte
+                  thestring.append("\0", 1);
+                  addedNull = true;
+
+                }
+
+              }
+
+            }
+
+            // add null byte if this is a string compare function and a null
+            // was not already added
+            if (addedNull == false && !isMemcmp) {
+
+              thestring.append("\0", 1);  // add null byte
+              optLen++;
+
+            }
+
+            if (!be_quiet) {
+
+              std::string outstring;
+              fprintf(stderr, "%s: length %u/%u \"", FuncName.c_str(), optLen,
+                      (unsigned int)thestring.length());
+              for (uint8_t i = 0; i < thestring.length(); i++) {
+
+                uint8_t c = thestring[i];
+                if (c <= 32 || c >= 127)
+                  fprintf(stderr, "\\x%02x", c);
+                else
+                  fprintf(stderr, "%c", c);
+
+              }
+
+              fprintf(stderr, "\"\n");
+
+            }
+
+            // we take the longer string, even if the compare was to a
+            // shorter part. Note that depending on the optimizer of the
+            // compiler this can be wrong, but it is more likely that this
+            // is helping the fuzzer
+            if (optLen != thestring.length()) optLen = thestring.length();
+            if (optLen > MAX_AUTO_EXTRA) optLen = MAX_AUTO_EXTRA;
+            if (optLen < MIN_AUTO_EXTRA)  // too short? skip
+              continue;
+
+            dictionary.push_back(thestring.substr(0, optLen));
+
+          }
+
+        }
+
+      }
+
+    }
+
     for (auto &BB : F) {
 
       uint32_t succ = 0;
@@ -274,11 +561,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
 
         for (uint32_t j = 0; j < Successors.size(); j++) {
 
-#ifdef USE_SPLIT
           BasicBlock *newBB = llvm::SplitEdge(origBB, Successors[j]);
-#else
-          BasicBlock *newBB = BasicBlock::Create(C, "", &F, nullptr);
-#endif
 
           if (!newBB) {
 
@@ -287,12 +570,8 @@ bool AFLLTOPass::runOnModule(Module &M) {
 
           }
 
-#ifdef USE_SPLIT
           BasicBlock::iterator IP = newBB->getFirstInsertionPt();
           IRBuilder<>          IRB(&(*IP));
-#else
-          IRBuilder<> IRB(&(*newBB));
-#endif
 
           /* Set the ID of the inserted basic block */
 
@@ -313,38 +592,12 @@ bool AFLLTOPass::runOnModule(Module &M) {
 
           Value *Incr = IRB.CreateAdd(Counter, One);
 
-#if LLVM_VERSION_MAJOR < 9
-          if (neverZero_counters_str !=
-              NULL) {  // with llvm 9 we make this the default as the bug in
-                       // llvm is then fixed
-#endif
-            auto cf = IRB.CreateICmpEQ(Incr, Zero);
-            auto carry = IRB.CreateZExt(cf, Int8Ty);
-            Incr = IRB.CreateAdd(Incr, carry);
-#if LLVM_VERSION_MAJOR < 9
-
-          }
-
-#endif
+          auto cf = IRB.CreateICmpEQ(Incr, Zero);
+          auto carry = IRB.CreateZExt(cf, Int8Ty);
+          Incr = IRB.CreateAdd(Incr, carry);
           IRB.CreateStore(Incr, MapPtrIdx)
               ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
-#ifdef USE_SPLIT
-          // nothing
-#else
-
-          // Unconditional jump to the destination BB
-
-          IRB.CreateBr(Successors[j]);
-
-          // Replace the original destination to this newly inserted BB
-
-          origBB->replacePhiUsesWith(Successors[j], newBB);
-          BasicBlock *S = Successors[j];
-          S->replacePhiUsesWith(origBB, newBB);
-          TI->setSuccessor(j, newBB);
-
-#endif
           // done :)
 
           inst_blocks++;
@@ -355,31 +608,148 @@ bool AFLLTOPass::runOnModule(Module &M) {
 
     }
 
+    // save highest location ID to global variable
+    // do this after each function to fail faster
+    if (afl_global_id > MAP_SIZE) {
+
+      uint32_t pow2map = 1, map = afl_global_id;
+      while ((map = map >> 1))
+        pow2map++;
+      FATAL(
+          "We have %u blocks to instrument but the map size is only %u! Edit "
+          "config.h and set MAP_SIZE_POW2 from %u to %u, then recompile "
+          "afl-fuzz and llvm_mode.",
+          afl_global_id, MAP_SIZE, MAP_SIZE_POW2, pow2map);
+
+    }
+
   }
 
-  // save highest location ID to global variable
+  if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL || dictionary.size()) {
 
-  if (afl_global_id > MAP_SIZE) {
+    // yes we could create our own function, insert it into ctors ...
+    // but this would be a pain in the butt ... so we use afl-llvm-rt-lto.o
 
-    uint32_t pow2map = 1, map = afl_global_id;
-    while ((map = map >> 1))
-      pow2map++;
-    FATAL(
-        "We have %u blocks to instrument but the map size is only %u! Edit "
-        "config.h and set MAP_SIZE_POW2 from %u to %u, then recompile "
-        "afl-fuzz and llvm_mode.",
-        afl_global_id, MAP_SIZE, MAP_SIZE_POW2, pow2map);
+    Function *f = M.getFunction("__afl_auto_init_globals");
 
-  }
+    if (!f) {
+
+      fprintf(stderr,
+              "Error: init function could not be found (this hould not "
+              "happen)\n");
+      exit(-1);
+
+    }
+
+    BasicBlock *bb = &f->getEntryBlock();
+    if (!bb) {
+
+      fprintf(stderr,
+              "Error: init function does not have an EntryBlock (this should "
+              "not happen)\n");
+      exit(-1);
+
+    }
+
+    BasicBlock::iterator IP = bb->getFirstInsertionPt();
+    IRBuilder<>          IRB(&(*IP));
 
-  if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) {
+    if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) {
 
-    GlobalVariable *AFLFinalLoc = new GlobalVariable(
-        M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc", 0,
-        GlobalVariable::GeneralDynamicTLSModel, 0, false);
-    ConstantInt *const_loc = ConstantInt::get(Int32Ty, afl_global_id);
-    AFLFinalLoc->setAlignment(4);
-    AFLFinalLoc->setInitializer(const_loc);
+      uint32_t write_loc = afl_global_id;
+
+      if (afl_global_id % 8) write_loc = (((afl_global_id + 8) >> 3) << 3);
+
+      if (write_loc <= MAP_SIZE && write_loc <= 0x800000) {
+
+        GlobalVariable *AFLFinalLoc = new GlobalVariable(
+            M, Int32Ty, true, GlobalValue::ExternalLinkage, 0,
+            "__afl_final_loc", 0, GlobalVariable::GeneralDynamicTLSModel, 0,
+            false);
+        ConstantInt *const_loc = ConstantInt::get(Int32Ty, write_loc);
+        StoreInst *  StoreFinalLoc = IRB.CreateStore(const_loc, AFLFinalLoc);
+        StoreFinalLoc->setMetadata(M.getMDKindID("nosanitize"),
+                                   MDNode::get(C, None));
+
+      }
+
+    }
+
+    if (dictionary.size()) {
+
+      size_t memlen = 0, count = 0, offset = 0;
+      char * ptr;
+
+      for (auto token : dictionary) {
+
+        memlen += token.length();
+        count++;
+
+      }
+
+      if (!be_quiet)
+        printf("AUTODICTIONARY: %lu string%s found\n", count,
+               count == 1 ? "" : "s");
+
+      if (count) {
+
+        if ((ptr = (char *)malloc(memlen + count)) == NULL) {
+
+          fprintf(stderr, "Error: malloc for %lu bytes failed!\n",
+                  memlen + count);
+          exit(-1);
+
+        }
+
+        count = 0;
+
+        for (auto token : dictionary) {
+
+          if (offset + token.length() < 0xfffff0 && count < MAX_AUTO_EXTRAS) {
+
+            ptr[offset++] = (uint8_t)token.length();
+            memcpy(ptr + offset, token.c_str(), token.length());
+            offset += token.length();
+            count++;
+
+          }
+
+        }
+
+        GlobalVariable *AFLDictionaryLen = new GlobalVariable(
+            M, Int32Ty, false, GlobalValue::ExternalLinkage, 0,
+            "__afl_dictionary_len", 0, GlobalVariable::GeneralDynamicTLSModel,
+            0, false);
+        ConstantInt *const_len = ConstantInt::get(Int32Ty, offset);
+        StoreInst *StoreDictLen = IRB.CreateStore(const_len, AFLDictionaryLen);
+        StoreDictLen->setMetadata(M.getMDKindID("nosanitize"),
+                                  MDNode::get(C, None));
+
+        ArrayType *ArrayTy = ArrayType::get(IntegerType::get(C, 8), offset);
+        GlobalVariable *AFLInternalDictionary = new GlobalVariable(
+            M, ArrayTy, true, GlobalValue::ExternalLinkage,
+            ConstantDataArray::get(C,
+                                   *(new ArrayRef<char>((char *)ptr, offset))),
+            "__afl_internal_dictionary", 0,
+            GlobalVariable::GeneralDynamicTLSModel, 0, false);
+        AFLInternalDictionary->setInitializer(ConstantDataArray::get(
+            C, *(new ArrayRef<char>((char *)ptr, offset))));
+        AFLInternalDictionary->setConstant(true);
+
+        GlobalVariable *AFLDictionary = new GlobalVariable(
+            M, PointerType::get(Int8Ty, 0), false, GlobalValue::ExternalLinkage,
+            0, "__afl_dictionary");
+
+        Value *AFLDictOff = IRB.CreateGEP(AFLInternalDictionary, Zero);
+        Value *AFLDictPtr =
+            IRB.CreatePointerCast(AFLDictOff, PointerType::get(Int8Ty, 0));
+        StoreInst *StoreDict = IRB.CreateStore(AFLDictPtr, AFLDictionary);
+        StoreDict->setMetadata(M.getMDKindID("nosanitize"),
+                               MDNode::get(C, None));
+
+      }
+
+    }
 
   }
 
@@ -392,10 +762,11 @@ bool AFLLTOPass::runOnModule(Module &M) {
     else {
 
       char modeline[100];
-      snprintf(modeline, sizeof(modeline), "%s%s%s%s",
+      snprintf(modeline, sizeof(modeline), "%s%s%s%s%s",
                getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
                getenv("AFL_USE_ASAN") ? ", ASAN" : "",
                getenv("AFL_USE_MSAN") ? ", MSAN" : "",
+               getenv("AFL_USE_CFISAN") ? ", CFISAN" : "",
                getenv("AFL_USE_UBSAN") ? ", UBSAN" : "");
       OKF("Instrumented %u locations with no collisions (on average %llu "
           "collisions would be in afl-gcc/afl-clang-fast) (%s mode).",
@@ -422,5 +793,5 @@ static RegisterPass<AFLLTOPass> X("afl-lto", "afl++ LTO instrumentation pass",
                                   false, false);
 
 static RegisterStandardPasses RegisterAFLLTOPass(
-    PassManagerBuilder::EP_OptimizerLast, registerAFLLTOPass);
+    PassManagerBuilder::EP_FullLinkTimeOptimizationLast, registerAFLLTOPass);
 
diff --git a/llvm_mode/afl-llvm-pass.so.cc b/llvm_mode/afl-llvm-pass.so.cc
index 133c64b4..b4249802 100644
--- a/llvm_mode/afl-llvm-pass.so.cc
+++ b/llvm_mode/afl-llvm-pass.so.cc
@@ -2,12 +2,15 @@
    american fuzzy lop++ - LLVM-mode instrumentation pass
    ---------------------------------------------------
 
-   Written by Laszlo Szekeres <lszekeres@google.com> and
+   Written by Laszlo Szekeres <lszekeres@google.com>,
+              Adrian Herrera <adrian.herrera@anu.edu.au>,
               Michal Zalewski
 
    LLVM integration design comes from Laszlo Szekeres. C bits copied-and-pasted
    from afl-as.c are Michal's fault.
 
+   NGRAM previous location coverage comes from Adrian Herrera.
+
    Copyright 2015, 2016 Google Inc. All rights reserved.
    Copyright 2019-2020 AFLplusplus Project. All rights reserved.
 
@@ -27,7 +30,6 @@
 
 #include "config.h"
 #include "debug.h"
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@@ -47,6 +49,7 @@ typedef long double max_align_t;
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 
 #if LLVM_VERSION_MAJOR > 3 || \
@@ -58,6 +61,8 @@ typedef long double max_align_t;
 #include "llvm/Support/CFG.h"
 #endif
 
+#include "llvm-ngram-coverage.h"
+
 using namespace llvm;
 
 namespace {
@@ -118,6 +123,9 @@ class AFLCoverage : public ModulePass {
 
  protected:
   std::list<std::string> myWhitelist;
+  uint32_t               ngram_size = 0;
+  uint32_t               debug = 0;
+  char *                 ctx_str = NULL;
 
 };
 
@@ -125,12 +133,40 @@ class AFLCoverage : public ModulePass {
 
 char AFLCoverage::ID = 0;
 
+/* needed up to 3.9.0 */
+#if LLVM_VERSION_MAJOR == 3 && \
+    (LLVM_VERSION_MINOR < 9 || \
+     (LLVM_VERSION_MINOR == 9 && LLVM_VERSION_PATCH < 1))
+uint64_t PowerOf2Ceil(unsigned in) {
+
+  uint64_t in64 = in - 1;
+  in64 |= (in64 >> 1);
+  in64 |= (in64 >> 2);
+  in64 |= (in64 >> 4);
+  in64 |= (in64 >> 8);
+  in64 |= (in64 >> 16);
+  in64 |= (in64 >> 32);
+  return in64 + 1;
+
+}
+
+#endif
+
+/* #if LLVM_VERSION_STRING >= "4.0.1" */
+#if LLVM_VERSION_MAJOR >= 4 || \
+    (LLVM_VERSION_MAJOR == 4 && LLVM_VERSION_PATCH >= 1)
+#define AFL_HAVE_VECTOR_INTRINSICS 1
+#endif
 bool AFLCoverage::runOnModule(Module &M) {
 
   LLVMContext &C = M.getContext();
 
-  IntegerType *   Int8Ty = IntegerType::getInt8Ty(C);
-  IntegerType *   Int32Ty = IntegerType::getInt32Ty(C);
+  IntegerType *Int8Ty = IntegerType::getInt8Ty(C);
+  IntegerType *Int32Ty = IntegerType::getInt32Ty(C);
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+  IntegerType *IntLocTy =
+      IntegerType::getIntNTy(C, sizeof(PREV_LOC_T) * CHAR_BIT);
+#endif
   struct timeval  tv;
   struct timezone tz;
   u32             rand_seed;
@@ -145,9 +181,12 @@ bool AFLCoverage::runOnModule(Module &M) {
 
   char be_quiet = 0;
 
+  if (getenv("AFL_DEBUG")) debug = 1;
+
   if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) {
 
-    SAYF(cCYA "afl-llvm-pass" VERSION cRST " by <lszekeres@google.com>\n");
+    SAYF(cCYA "afl-llvm-pass" VERSION cRST
+              " by <lszekeres@google.com> and <adrian.herrera@anu.edu.au>\n");
 
   } else
 
@@ -170,32 +209,171 @@ bool AFLCoverage::runOnModule(Module &M) {
   char *neverZero_counters_str = getenv("AFL_LLVM_NOT_ZERO");
 #endif
 
+  unsigned PrevLocSize;
+
+  char *ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE");
+  if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE");
+  ctx_str = getenv("AFL_LLVM_CTX");
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+  /* Decide previous location vector size (must be a power of two) */
+  VectorType *PrevLocTy;
+
+  if (ngram_size_str)
+    if (sscanf(ngram_size_str, "%u", &ngram_size) != 1 || ngram_size < 2 ||
+        ngram_size > NGRAM_SIZE_MAX)
+      FATAL(
+          "Bad value of AFL_NGRAM_SIZE (must be between 2 and NGRAM_SIZE_MAX "
+          "(%u))",
+          NGRAM_SIZE_MAX);
+
+  if (ngram_size == 1) ngram_size = 0;
+  if (ngram_size)
+    PrevLocSize = ngram_size - 1;
+  else
+#else
+  if (ngram_size_str)
+    FATAL("Sorry, NGRAM branch coverage is not supported with llvm version %s!",
+          LLVM_VERSION_STRING);
+#endif
+    PrevLocSize = 1;
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+  uint64_t PrevLocVecSize = PowerOf2Ceil(PrevLocSize);
+  if (ngram_size) PrevLocTy = VectorType::get(IntLocTy, PrevLocVecSize);
+#endif
+
+  if (ctx_str && ngram_size_str)
+    FATAL("you must decide between NGRAM and CTX instrumentation");
+
   /* Get globals for the SHM region and the previous location. Note that
      __afl_prev_loc is thread-local. */
 
   GlobalVariable *AFLMapPtr =
       new GlobalVariable(M, PointerType::get(Int8Ty, 0), false,
                          GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
+  GlobalVariable *AFLPrevLoc;
+  GlobalVariable *AFLContext;
 
+  if (ctx_str)
 #ifdef __ANDROID__
-  GlobalVariable *AFLPrevLoc = new GlobalVariable(
-      M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc");
+    AFLContext = new GlobalVariable(
+        M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx");
 #else
-  GlobalVariable *AFLPrevLoc = new GlobalVariable(
+    AFLContext = new GlobalVariable(
+        M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx", 0,
+        GlobalVariable::GeneralDynamicTLSModel, 0, false);
+#endif
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+  if (ngram_size)
+#ifdef __ANDROID__
+    AFLPrevLoc = new GlobalVariable(
+        M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage,
+        /* Initializer */ nullptr, "__afl_prev_loc");
+#else
+    AFLPrevLoc = new GlobalVariable(
+        M, PrevLocTy, /* isConstant */ false, GlobalValue::ExternalLinkage,
+        /* Initializer */ nullptr, "__afl_prev_loc",
+        /* InsertBefore */ nullptr, GlobalVariable::GeneralDynamicTLSModel,
+        /* AddressSpace */ 0, /* IsExternallyInitialized */ false);
+#endif
+  else
+#endif
+#ifdef __ANDROID__
+    AFLPrevLoc = new GlobalVariable(
+        M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc");
+#else
+  AFLPrevLoc = new GlobalVariable(
       M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_loc", 0,
       GlobalVariable::GeneralDynamicTLSModel, 0, false);
 #endif
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+  /* Create the vector shuffle mask for updating the previous block history.
+     Note that the first element of the vector will store cur_loc, so just set
+     it to undef to allow the optimizer to do its thing. */
+
+  SmallVector<Constant *, 32> PrevLocShuffle = {UndefValue::get(Int32Ty)};
+
+  for (unsigned I = 0; I < PrevLocSize - 1; ++I)
+    PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, I));
+
+  for (unsigned I = PrevLocSize; I < PrevLocVecSize; ++I)
+    PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, PrevLocSize));
+
+  Constant *PrevLocShuffleMask = ConstantVector::get(PrevLocShuffle);
+#endif
+
+  // other constants we need
   ConstantInt *Zero = ConstantInt::get(Int8Ty, 0);
   ConstantInt *One = ConstantInt::get(Int8Ty, 1);
 
+  LoadInst *PrevCtx;  // CTX sensitive coverage
+
   /* Instrument all the things! */
 
   int inst_blocks = 0;
 
   for (auto &F : M) {
 
+    int has_calls = 0;
+    if (debug)
+      fprintf(stderr, "FUNCTION: %s (%zu)\n", F.getName().str().c_str(),
+              F.size());
+
     if (isBlacklisted(&F)) continue;
 
+    // AllocaInst *CallingContext = nullptr;
+
+    if (ctx_str && F.size() > 1) {  // Context sensitive coverage
+      // load the context ID of the previous function and write to to a local
+      // variable on the stack
+      auto                 bb = &F.getEntryBlock();
+      BasicBlock::iterator IP = bb->getFirstInsertionPt();
+      IRBuilder<>          IRB(&(*IP));
+      PrevCtx = IRB.CreateLoad(AFLContext);
+      PrevCtx->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+
+      // does the function have calls? and is any of the calls larger than one
+      // basic block?
+      has_calls = 0;
+      for (auto &BB : F) {
+
+        if (has_calls) break;
+        for (auto &IN : BB) {
+
+          CallInst *callInst = nullptr;
+          if ((callInst = dyn_cast<CallInst>(&IN))) {
+
+            Function *Callee = callInst->getCalledFunction();
+            if (!Callee || Callee->size() < 2)
+              continue;
+            else {
+
+              has_calls = 1;
+              break;
+
+            }
+
+          }
+
+        }
+
+      }
+
+      // if yes we store a context ID for this function in the global var
+      if (has_calls) {
+
+        ConstantInt *NewCtx = ConstantInt::get(Int32Ty, AFL_R(MAP_SIZE));
+        StoreInst *  StoreCtx = IRB.CreateStore(NewCtx, AFLContext);
+        StoreCtx->setMetadata(M.getMDKindID("nosanitize"),
+                              MDNode::get(C, None));
+
+      }
+
+    }
+
     for (auto &BB : F) {
 
       BasicBlock::iterator IP = BB.getFirstInsertionPt();
@@ -310,6 +488,22 @@ bool AFLCoverage::runOnModule(Module &M) {
 
       }
 
+      // in CTX mode we have to restore the original context for the caller -
+      // she might be calling other functions which need the correct CTX
+      if (ctx_str && has_calls) {
+
+        Instruction *Inst = BB.getTerminator();
+        if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
+
+          IRBuilder<> Post_IRB(Inst);
+          StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
+          RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
+                                  MDNode::get(C, None));
+
+        }
+
+      }
+
       if (AFL_R(100) >= inst_ratio) continue;
 
       /* Make up cur_loc */
@@ -356,20 +550,50 @@ bool AFLCoverage::runOnModule(Module &M) {
       // fprintf(stderr, " == %d\n", more_than_one);
       if (more_than_one != 1) continue;
 #endif
-      ConstantInt *CurLoc = ConstantInt::get(Int32Ty, cur_loc);
+
+      ConstantInt *CurLoc;
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+      if (ngram_size)
+        CurLoc = ConstantInt::get(IntLocTy, cur_loc);
+      else
+#endif
+        CurLoc = ConstantInt::get(Int32Ty, cur_loc);
 
       /* Load prev_loc */
 
       LoadInst *PrevLoc = IRB.CreateLoad(AFLPrevLoc);
       PrevLoc->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
-      Value *PrevLocCasted = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty());
+      Value *PrevLocTrans;
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+      /* "For efficiency, we propose to hash the tuple as a key into the
+         hit_count map as (prev_block_trans << 1) ^ curr_block_trans, where
+         prev_block_trans = (block_trans_1 ^ ... ^ block_trans_(n-1)" */
+
+      if (ngram_size)
+        PrevLocTrans = IRB.CreateXorReduce(PrevLoc);
+      else
+#endif
+          if (ctx_str)
+        PrevLocTrans = IRB.CreateZExt(IRB.CreateXor(PrevLoc, PrevCtx), Int32Ty);
+      else
+        PrevLocTrans = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty());
 
       /* Load SHM pointer */
 
       LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
       MapPtr->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
-      Value *MapPtrIdx =
-          IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocCasted, CurLoc));
+
+      Value *MapPtrIdx;
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+      if (ngram_size)
+        MapPtrIdx = IRB.CreateGEP(
+            MapPtr,
+            IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, CurLoc), Int32Ty));
+      else
+#endif
+        MapPtrIdx = IRB.CreateGEP(MapPtr, IRB.CreateXor(PrevLocTrans, CurLoc));
 
       /* Update bitmap */
 
@@ -449,11 +673,31 @@ bool AFLCoverage::runOnModule(Module &M) {
       IRB.CreateStore(Incr, MapPtrIdx)
           ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
-      /* Set prev_loc to cur_loc >> 1 */
+      /* Update prev_loc history vector (by placing cur_loc at the head of the
+         vector and shuffle the other elements back by one) */
+
+      StoreInst *Store;
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+      if (ngram_size) {
+
+        Value *ShuffledPrevLoc = IRB.CreateShuffleVector(
+            PrevLoc, UndefValue::get(PrevLocTy), PrevLocShuffleMask);
+        Value *UpdatedPrevLoc = IRB.CreateInsertElement(
+            ShuffledPrevLoc, IRB.CreateLShr(CurLoc, (uint64_t)1), (uint64_t)0);
+
+        Store = IRB.CreateStore(UpdatedPrevLoc, AFLPrevLoc);
+        Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
-      StoreInst *Store =
-          IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1), AFLPrevLoc);
-      Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+      } else
+
+#endif
+      {
+
+        Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1),
+                                AFLPrevLoc);
+
+      }
 
       inst_blocks++;
 
@@ -470,10 +714,11 @@ bool AFLCoverage::runOnModule(Module &M) {
     else {
 
       char modeline[100];
-      snprintf(modeline, sizeof(modeline), "%s%s%s%s",
+      snprintf(modeline, sizeof(modeline), "%s%s%s%s%s",
                getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
                getenv("AFL_USE_ASAN") ? ", ASAN" : "",
                getenv("AFL_USE_MSAN") ? ", MSAN" : "",
+               getenv("AFL_USE_CFISAN") ? ", CFISAN" : "",
                getenv("AFL_USE_UBSAN") ? ", UBSAN" : "");
       OKF("Instrumented %u locations (%s mode, ratio %u%%).", inst_blocks,
           modeline, inst_ratio);
diff --git a/llvm_mode/afl-llvm-rt-lto.o.c b/llvm_mode/afl-llvm-rt-lto.o.c
new file mode 100644
index 00000000..5921f968
--- /dev/null
+++ b/llvm_mode/afl-llvm-rt-lto.o.c
@@ -0,0 +1,23 @@
+/*
+   american fuzzy lop++ - LLVM instrumentation bootstrap
+   -----------------------------------------------------
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at:
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+*/
+
+// to prevent the function from being removed
+unsigned char __afl_lto_mode = 0;
+
+/* Proper initialization routine. */
+
+__attribute__((constructor(0))) void __afl_auto_init_globals(void) {
+
+  __afl_lto_mode = 1;
+
+}
+
diff --git a/llvm_mode/afl-llvm-rt.o.c b/llvm_mode/afl-llvm-rt.o.c
index 5f9a5534..3ad9eab4 100644
--- a/llvm_mode/afl-llvm-rt.o.c
+++ b/llvm_mode/afl-llvm-rt.o.c
@@ -26,6 +26,7 @@
 #include "config.h"
 #include "types.h"
 #include "cmplog.h"
+#include "llvm-ngram-coverage.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -41,15 +42,15 @@
 #include <sys/wait.h>
 #include <sys/types.h>
 
+#ifdef __linux__
+#include "snapshot-inl.h"
+#endif
+
 /* This is a somewhat ugly hack for the experimental 'trace-pc-guard' mode.
    Basically, we need to make sure that the forkserver is initialized after
    the LLVM-generated runtime initialization pass, not before. */
 
-#ifdef USE_TRACE_PC
 #define CONST_PRIO 5
-#else
-#define CONST_PRIO 0
-#endif                                                     /* ^USE_TRACE_PC */
 
 #include <sys/mman.h>
 #include <fcntl.h>
@@ -60,17 +61,23 @@
 
 u8  __afl_area_initial[MAP_SIZE];
 u8 *__afl_area_ptr = __afl_area_initial;
+u8 *__afl_dictionary;
 
 #ifdef __ANDROID__
-u32 __afl_prev_loc;
-u32 __afl_final_loc;
+PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX];
+u32        __afl_final_loc;
+u32        __afl_prev_ctx;
+u32        __afl_cmp_counter;
+u32        __afl_dictionary_len;
 #else
-__thread u32 __afl_prev_loc;
-__thread u32 __afl_final_loc;
+__thread PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX];
+__thread u32        __afl_final_loc;
+__thread u32        __afl_prev_ctx;
+__thread u32        __afl_cmp_counter;
+__thread u32        __afl_dictionary_len;
 #endif
 
 struct cmp_map *__afl_cmp_map;
-__thread u32    __afl_cmp_counter;
 
 /* Running in persistent mode? */
 
@@ -92,6 +99,10 @@ static void __afl_map_shm(void) {
     const char *   shm_file_path = id_str;
     int            shm_fd = -1;
     unsigned char *shm_base = NULL;
+    unsigned int   map_size = MAP_SIZE
+
+        if (__afl_final_loc > 1 && __afl_final_loc < MAP_SIZE) map_size =
+            __afl_final_loc;
 
     /* create the shared memory segment as if it was a file */
     shm_fd = shm_open(shm_file_path, O_RDWR, 0600);
@@ -103,7 +114,7 @@ static void __afl_map_shm(void) {
     }
 
     /* map the shared memory segment to the address space of the process */
-    shm_base = mmap(0, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
+    shm_base = mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
     if (shm_base == MAP_FAILED) {
 
       close(shm_fd);
@@ -176,30 +187,264 @@ static void __afl_map_shm(void) {
 
 }
 
+#ifdef __linux__
+static void __afl_start_snapshots(void) {
+
+  static u8 tmp[4] = {0, 0, 0, 0};
+  s32       child_pid;
+  u32       status = 0;
+  u32       map_size = MAP_SIZE;
+  u32       already_read_first = 0;
+  u32       was_killed;
+
+  if (__afl_final_loc > 1 && __afl_final_loc < MAP_SIZE)
+    map_size = __afl_final_loc;
+
+  u8 child_stopped = 0;
+
+  void (*old_sigchld_handler)(int) = 0;  // = signal(SIGCHLD, SIG_DFL);
+
+  /* Phone home and tell the parent that we're OK. If parent isn't there,
+     assume we're not running in forkserver mode and just execute program. */
+
+  status |= (FS_OPT_ENABLED | FS_OPT_SNAPSHOT);
+  if (map_size <= 0x800000)
+    status |= (FS_OPT_SET_MAPSIZE(map_size) | FS_OPT_MAPSIZE);
+  if (__afl_dictionary_len > 0 && __afl_dictionary) status |= FS_OPT_AUTODICT;
+  memcpy(tmp, &status, 4);
+
+  if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
+
+  if (__afl_dictionary_len > 0 && __afl_dictionary) {
+
+    if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
+
+    if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) ==
+        (FS_OPT_ENABLED | FS_OPT_AUTODICT)) {
+
+      // great lets pass the dictionary through the forkserver FD
+      u32 len = __afl_dictionary_len, offset = 0;
+      s32 ret;
+
+      if (write(FORKSRV_FD + 1, &len, 4) != 4) {
+
+        write(2, "Error: could not send dictionary len\n",
+              strlen("Error: could not send dictionary len\n"));
+        _exit(1);
+
+      }
+
+      while (len != 0) {
+
+        ret = write(FORKSRV_FD + 1, __afl_dictionary + offset, len);
+
+        if (ret < 1) {
+
+          write(2, "Error: could not send dictionary\n",
+                strlen("Error: could not send dictionary\n"));
+          _exit(1);
+
+        }
+
+        len -= ret;
+        offset += ret;
+
+      }
+
+    } else {
+
+      // uh this forkserver master does not understand extended option passing
+      // or does not want the dictionary
+      already_read_first = 1;
+
+    }
+
+  }
+
+  while (1) {
+
+    int status;
+
+    if (already_read_first) {
+
+      already_read_first = 0;
+
+    } else {
+
+      /* Wait for parent by reading from the pipe. Abort if read fails. */
+      if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
+
+    }
+
+    /* If we stopped the child in persistent mode, but there was a race
+       condition and afl-fuzz already issued SIGKILL, write off the old
+       process. */
+
+    if (child_stopped && was_killed) {
+
+      child_stopped = 0;
+      if (waitpid(child_pid, &status, 0) < 0) _exit(1);
+
+    }
+
+    if (!child_stopped) {
+
+      /* Once woken up, create a clone of our process. */
+
+      child_pid = fork();
+      if (child_pid < 0) _exit(1);
+
+      /* In child process: close fds, resume execution. */
+
+      if (!child_pid) {
+
+        signal(SIGCHLD, old_sigchld_handler);
+
+        close(FORKSRV_FD);
+        close(FORKSRV_FD + 1);
+
+        if (!afl_snapshot_do()) { raise(SIGSTOP); }
+
+        __afl_area_ptr[0] = 1;
+        memset(__afl_prev_loc, 0, NGRAM_SIZE_MAX * sizeof(PREV_LOC_T));
+
+        return;
+
+      }
+
+    } else {
+
+      /* Special handling for persistent mode: if the child is alive but
+         currently stopped, simply restart it with SIGCONT. */
+
+      kill(child_pid, SIGCONT);
+      child_stopped = 0;
+
+    }
+
+    /* In parent process: write PID to pipe, then wait for child. */
+
+    if (write(FORKSRV_FD + 1, &child_pid, 4) != 4) _exit(1);
+
+    if (waitpid(child_pid, &status, WUNTRACED) < 0) _exit(1);
+
+    /* In persistent mode, the child stops itself with SIGSTOP to indicate
+       a successful run. In this case, we want to wake it up without forking
+       again. */
+
+    if (WIFSTOPPED(status)) child_stopped = 1;
+
+    /* Relay wait status to pipe, then loop back. */
+
+    if (write(FORKSRV_FD + 1, &status, 4) != 4) _exit(1);
+
+  }
+
+}
+
+#endif
+
 /* Fork server logic. */
 
 static void __afl_start_forkserver(void) {
 
-  static u8 tmp[4];
-  s32       child_pid;
+#ifdef __linux__
+  if (!is_persistent && !__afl_cmp_map && !getenv("AFL_NO_SNAPSHOT") &&
+      afl_snapshot_init() >= 0) {
+
+    __afl_start_snapshots();
+    return;
+
+  }
+
+#endif
+
+  u8  tmp[4] = {0, 0, 0, 0};
+  s32 child_pid;
+  u32 status = 0;
+  u32 map_size = MAP_SIZE;
+  u32 already_read_first = 0;
+  u32 was_killed;
+
+  if (__afl_final_loc > 1 && __afl_final_loc < MAP_SIZE)
+    map_size = __afl_final_loc;
 
   u8 child_stopped = 0;
 
   void (*old_sigchld_handler)(int) = 0;  // = signal(SIGCHLD, SIG_DFL);
 
+  if (map_size <= 0x800000)
+    status |= (FS_OPT_SET_MAPSIZE(map_size) | FS_OPT_MAPSIZE);
+  if (__afl_dictionary_len > 0 && __afl_dictionary) status |= FS_OPT_AUTODICT;
+  if (status) status |= (FS_OPT_ENABLED);
+  memcpy(tmp, &status, 4);
+
   /* Phone home and tell the parent that we're OK. If parent isn't there,
      assume we're not running in forkserver mode and just execute program. */
 
   if (write(FORKSRV_FD + 1, tmp, 4) != 4) return;
 
+  if (__afl_dictionary_len > 0 && __afl_dictionary) {
+
+    if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
+
+    if ((was_killed & (FS_OPT_ENABLED | FS_OPT_AUTODICT)) ==
+        (FS_OPT_ENABLED | FS_OPT_AUTODICT)) {
+
+      // great lets pass the dictionary through the forkserver FD
+      u32 len = __afl_dictionary_len, offset = 0;
+      s32 ret;
+
+      if (write(FORKSRV_FD + 1, &len, 4) != 4) {
+
+        write(2, "Error: could not send dictionary len\n",
+              strlen("Error: could not send dictionary len\n"));
+        _exit(1);
+
+      }
+
+      while (len != 0) {
+
+        ret = write(FORKSRV_FD + 1, __afl_dictionary + offset, len);
+
+        if (ret < 1) {
+
+          write(2, "Error: could not send dictionary\n",
+                strlen("Error: could not send dictionary\n"));
+          _exit(1);
+
+        }
+
+        len -= ret;
+        offset += ret;
+
+      }
+
+    } else {
+
+      // uh this forkserver master does not understand extended option passing
+      // or does not want the dictionary
+      already_read_first = 1;
+
+    }
+
+  }
+
   while (1) {
 
-    u32 was_killed;
     int status;
 
     /* Wait for parent by reading from the pipe. Abort if read fails. */
 
-    if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
+    if (already_read_first) {
+
+      already_read_first = 0;
+
+    } else {
+
+      if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
+
+    }
 
     /* If we stopped the child in persistent mode, but there was a race
        condition and afl-fuzz already issued SIGKILL, write off the old
@@ -267,8 +512,12 @@ static void __afl_start_forkserver(void) {
 
 int __afl_persistent_loop(unsigned int max_cnt) {
 
-  static u8  first_pass = 1;
-  static u32 cycle_cnt;
+  static u8    first_pass = 1;
+  static u32   cycle_cnt;
+  unsigned int map_size = MAP_SIZE;
+
+  if (__afl_final_loc > 1 && __afl_final_loc < MAP_SIZE)
+    map_size = __afl_final_loc;
 
   if (first_pass) {
 
@@ -279,9 +528,9 @@ int __afl_persistent_loop(unsigned int max_cnt) {
 
     if (is_persistent) {
 
-      memset(__afl_area_ptr, 0, MAP_SIZE);
+      memset(__afl_area_ptr, 0, map_size);
       __afl_area_ptr[0] = 1;
-      __afl_prev_loc = 0;
+      memset(__afl_prev_loc, 0, NGRAM_SIZE_MAX * sizeof(PREV_LOC_T));
 
     }
 
@@ -298,7 +547,7 @@ int __afl_persistent_loop(unsigned int max_cnt) {
       raise(SIGSTOP);
 
       __afl_area_ptr[0] = 1;
-      __afl_prev_loc = 0;
+      memset(__afl_prev_loc, 0, NGRAM_SIZE_MAX * sizeof(PREV_LOC_T));
 
       return 1;
 
diff --git a/llvm_mode/compare-transform-pass.so.cc b/llvm_mode/compare-transform-pass.so.cc
index 2ca70659..84a9b8d9 100644
--- a/llvm_mode/compare-transform-pass.so.cc
+++ b/llvm_mode/compare-transform-pass.so.cc
@@ -112,11 +112,12 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
                                      const bool processStrcasecmp,
                                      const bool processStrncasecmp) {
 
-  std::vector<CallInst *> calls;
-  LLVMContext &           C = M.getContext();
-  IntegerType *           Int8Ty = IntegerType::getInt8Ty(C);
-  IntegerType *           Int32Ty = IntegerType::getInt32Ty(C);
-  IntegerType *           Int64Ty = IntegerType::getInt64Ty(C);
+  DenseMap<Value *, std::string *> valueMap;
+  std::vector<CallInst *>          calls;
+  LLVMContext &                    C = M.getContext();
+  IntegerType *                    Int8Ty = IntegerType::getInt8Ty(C);
+  IntegerType *                    Int32Ty = IntegerType::getInt32Ty(C);
+  IntegerType *                    Int64Ty = IntegerType::getInt64Ty(C);
 
 #if LLVM_VERSION_MAJOR < 9
   Constant *
@@ -263,6 +264,8 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
           bool isStrncmp = processStrncmp;
           bool isStrcasecmp = processStrcasecmp;
           bool isStrncasecmp = processStrncasecmp;
+          bool isIntMemcpy = true;
+          bool indirect = false;
 
           Function *Callee = callInst->getCalledFunction();
           if (!Callee) continue;
@@ -273,9 +276,10 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
           isStrncmp &= !FuncName.compare(StringRef("strncmp"));
           isStrcasecmp &= !FuncName.compare(StringRef("strcasecmp"));
           isStrncasecmp &= !FuncName.compare(StringRef("strncasecmp"));
+          isIntMemcpy &= !FuncName.compare("llvm.memcpy.p0i8.p0i8.i64");
 
           if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
-              !isStrncasecmp)
+              !isStrncasecmp && !isIntMemcpy)
             continue;
 
           /* Verify the strcmp/memcmp/strncmp/strcasecmp/strncasecmp function
@@ -309,7 +313,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
                            FT->getParamType(2)->isIntegerTy();
 
           if (!isStrcmp && !isMemcmp && !isStrncmp && !isStrcasecmp &&
-              !isStrncasecmp)
+              !isStrncasecmp && !isIntMemcpy)
             continue;
 
           /* is a str{n,}{case,}cmp/memcmp, check if we have
@@ -322,6 +326,97 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
           bool      HasStr1 = getConstantStringInfo(Str1P, Str1);
           bool      HasStr2 = getConstantStringInfo(Str2P, Str2);
 
+          if (isIntMemcpy && HasStr2) {
+
+            valueMap[Str1P] = new std::string(Str2.str());
+            // fprintf(stderr, "saved %s for %p\n", Str2.str().c_str(), Str1P);
+            continue;
+
+          }
+
+          // not literal? maybe global or local variable
+          if (!(HasStr1 ^ HasStr2)) {
+
+            auto *Ptr = dyn_cast<ConstantExpr>(Str2P);
+            if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+              if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+                if (auto *Array =
+                        dyn_cast<ConstantDataArray>(Var->getInitializer())) {
+
+                  HasStr2 = true;
+                  Str2 = Array->getAsString();
+                  valueMap[Str2P] = new std::string(Str2.str());
+                  // fprintf(stderr, "glo2 %s\n", Str2.str().c_str());
+
+                }
+
+              }
+
+            }
+
+            if (!HasStr2) {
+
+              auto *Ptr = dyn_cast<ConstantExpr>(Str1P);
+              if (Ptr && Ptr->isGEPWithNoNotionalOverIndexing()) {
+
+                if (auto *Var = dyn_cast<GlobalVariable>(Ptr->getOperand(0))) {
+
+                  if (auto *Array =
+                          dyn_cast<ConstantDataArray>(Var->getInitializer())) {
+
+                    HasStr1 = true;
+                    Str1 = Array->getAsString();
+                    valueMap[Str1P] = new std::string(Str1.str());
+                    // fprintf(stderr, "glo1 %s\n", Str1.str().c_str());
+
+                  }
+
+                }
+
+              }
+
+            } else if (isIntMemcpy) {
+
+              valueMap[Str1P] = new std::string(Str2.str());
+              // fprintf(stderr, "saved\n");
+
+            }
+
+            if ((HasStr1 ^ HasStr2)) indirect = true;
+
+          }
+
+          if (isIntMemcpy) continue;
+
+          if (!(HasStr1 ^ HasStr2)) {
+
+            // do we have a saved local variable initialization?
+            std::string *val = valueMap[Str1P];
+            if (val && !val->empty()) {
+
+              Str1 = StringRef(*val);
+              HasStr1 = true;
+              indirect = true;
+              // fprintf(stderr, "loaded1 %s\n", Str1.str().c_str());
+
+            } else {
+
+              val = valueMap[Str2P];
+              if (val && !val->empty()) {
+
+                Str2 = StringRef(*val);
+                HasStr2 = true;
+                indirect = true;
+                // fprintf(stderr, "loaded2 %s\n", Str2.str().c_str());
+
+              }
+
+            }
+
+          }
+
           /* handle cases of one string is const, one string is variable */
           if (!(HasStr1 ^ HasStr2)) continue;
 
@@ -334,9 +429,8 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
             if (!ilen) continue;
             /* final precaution: if size of compare is larger than constant
              * string skip it*/
-            uint64_t literalLength =
-                HasStr1 ? GetStringLength(Str1P) : GetStringLength(Str2P);
-            if (literalLength < ilen->getZExtValue()) continue;
+            uint64_t literalLength = HasStr1 ? Str1.size() : Str2.size();
+            if (literalLength + 1 < ilen->getZExtValue()) continue;
 
           }
 
@@ -363,9 +457,9 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
     std::string TmpConstStr;
     Value *     VarStr;
     bool        HasStr1 = getConstantStringInfo(Str1P, Str1);
-    getConstantStringInfo(Str2P, Str2);
-    uint64_t constLen, sizedLen;
-    bool     isMemcmp =
+    bool        HasStr2 = getConstantStringInfo(Str2P, Str2);
+    uint64_t    constLen, sizedLen;
+    bool        isMemcmp =
         !callInst->getCalledFunction()->getName().compare(StringRef("memcmp"));
     bool isSizedcmp = isMemcmp ||
                       !callInst->getCalledFunction()->getName().compare(
@@ -389,6 +483,29 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
 
     }
 
+    if (!(HasStr1 ^ HasStr2)) {
+
+      // do we have a saved local or global variable initialization?
+      std::string *val = valueMap[Str1P];
+      if (val && !val->empty()) {
+
+        Str1 = StringRef(*val);
+        HasStr1 = true;
+
+      } else {
+
+        val = valueMap[Str2P];
+        if (val && !val->empty()) {
+
+          Str2 = StringRef(*val);
+          HasStr2 = true;
+
+        }
+
+      }
+
+    }
+
     if (HasStr1) {
 
       TmpConstStr = Str1.str();
diff --git a/llvm_mode/llvm-ngram-coverage.h b/llvm_mode/llvm-ngram-coverage.h
new file mode 100644
index 00000000..12b666e9
--- /dev/null
+++ b/llvm_mode/llvm-ngram-coverage.h
@@ -0,0 +1,18 @@
+#ifndef AFL_NGRAM_CONFIG_H
+#define AFL_NGRAM_CONFIG_H
+
+#include "../config.h"
+
+#if (MAP_SIZE_POW2 <= 16)
+typedef u16 PREV_LOC_T;
+#elif (MAP_SIZE_POW2 <= 32)
+typedef u32 PREV_LOC_T;
+#else
+typedef u64 PREV_LOC_T;
+#endif
+
+/* Maximum ngram size */
+#define NGRAM_SIZE_MAX 16U
+
+#endif
+
diff --git a/llvm_mode/split-compares-pass.so.cc b/llvm_mode/split-compares-pass.so.cc
index d296ba3d..f0615f85 100644
--- a/llvm_mode/split-compares-pass.so.cc
+++ b/llvm_mode/split-compares-pass.so.cc
@@ -1235,8 +1235,8 @@ bool SplitComparesTransform::runOnModule(Module &M) {
 
   int bitw = 64;
 
-  char *bitw_env = getenv("LAF_SPLIT_COMPARES_BITW");
-  if (!bitw_env) bitw_env = getenv("AFL_LLVM_LAF_SPLIT_COMPARES_BITW");
+  char *bitw_env = getenv("AFL_LLVM_LAF_SPLIT_COMPARES_BITW");
+  if (!bitw_env) bitw_env = getenv("LAF_SPLIT_COMPARES_BITW");
   if (bitw_env) { bitw = atoi(bitw_env); }
 
   enableFPSplit = getenv("AFL_LLVM_LAF_SPLIT_FLOATS") != NULL;
diff --git a/qbdi_mode/build.sh b/qbdi_mode/build.sh
index c2912e94..b10971d9 100755
--- a/qbdi_mode/build.sh
+++ b/qbdi_mode/build.sh
@@ -52,6 +52,6 @@ ${compiler_prefix}${CC} -shared -o libdemo.so demo-so.c -w -g
 echo "[+] Building afl-fuzz for Android"
 # build afl-fuzz
 cd ..
-${compiler_prefix}${CC} -DANDROID_DISABLE_FANCY=1 -O3 -funroll-loops -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign -I include/ -DAFL_PATH=\"/usr/local/lib/afl\" -DBIN_PATH=\"/usr/local/bin\" -DDOC_PATH=\"/usr/local/share/doc/afl\" -Wno-unused-function src/afl-fuzz-misc.c src/afl-fuzz-extras.c src/afl-fuzz-queue.c src/afl-fuzz-one.c src/afl-fuzz-python.c src/afl-fuzz-stats.c src/afl-fuzz-init.c src/afl-fuzz.c src/afl-fuzz-bitmap.c src/afl-fuzz-run.c src/afl-fuzz-globals.c src/afl-common.c src/afl-sharedmem.c src/afl-forkserver.c -o qbdi_mode/afl-fuzz  -ldl -w
+${compiler_prefix}${CC} -DANDROID_DISABLE_FANCY=1 -O3 -funroll-loops -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign -I include/ -DAFL_PATH=\"/usr/local/lib/afl\" -DBIN_PATH=\"/usr/local/bin\" -DDOC_PATH=\"/usr/local/share/doc/afl\" -Wno-unused-function src/afl-fuzz-*.c src/afl-fuzz.c src/afl-common.c src/afl-sharedmem.c src/afl-forkserver.c -o qbdi_mode/afl-fuzz  -ldl -w
 
 echo "[+] All done. Enjoy!"
diff --git a/qemu_mode/build_qemu_support.sh b/qemu_mode/build_qemu_support.sh
index 09c04e7f..118f6ebd 100755
--- a/qemu_mode/build_qemu_support.sh
+++ b/qemu_mode/build_qemu_support.sh
@@ -62,15 +62,15 @@ if [ ! -f "../afl-showmap" ]; then
 
 fi
 
+PREREQ_NOTFOUND=
+for i in libtool wget python automake autoconf sha384sum bison flex iconv patch pkg-config; do
 
-for i in libtool wget python automake autoconf sha384sum bison iconv; do
-
-  T=`which "$i" 2>/dev/null`
+  T=`command -v "$i" 2>/dev/null`
 
   if [ "$T" = "" ]; then
 
     echo "[-] Error: '$i' not found, please install first."
-    exit 1
+    PREREQ_NOTFOUND=1
 
   fi
 
@@ -79,17 +79,21 @@ done
 if [ ! -d "/usr/include/glib-2.0/" -a ! -d "/usr/local/include/glib-2.0/" ]; then
 
   echo "[-] Error: devel version of 'glib2' not found, please install first."
-  exit 1
+  PREREQ_NOTFOUND=1
 
 fi
 
 if echo "$CC" | grep -qF /afl-; then
 
   echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool."
-  exit 1
+  PREREQ_NOTFOUND=1
 
 fi
 
+if [ "$PREREQ_NOTFOUND" = "1" ]; then
+  exit 1
+fi
+
 echo "[+] All checks passed!"
 
 ARCHIVE="`basename -- "$QEMU_URL"`"
@@ -139,8 +143,16 @@ echo "[*] Configuring QEMU for $CPU_TARGET..."
 
 ORIG_CPU_TARGET="$CPU_TARGET"
 
-test "$CPU_TARGET" = "" && CPU_TARGET="`uname -m`"
-test "$CPU_TARGET" = "i686" && CPU_TARGET="i386"
+if [ "$ORIG_CPU_TARGET" = "" ]; then
+  CPU_TARGET="`uname -m`"
+  test "$CPU_TARGET" = "i686" && CPU_TARGET="i386"
+  test "$CPU_TARGET" = "arm64v8" && CPU_TARGET="aarch64"
+  case "$CPU_TARGET" in 
+    *arm*)
+      CPU_TARGET="arm"
+      ;;
+  esac
+fi
 
 cd qemu-$VERSION || exit 1
 
@@ -162,6 +174,8 @@ patch -p1 <../patches/configure.diff || exit 1
 patch -p1 <../patches/tcg-runtime.diff || exit 1
 patch -p1 <../patches/tcg-runtime-head.diff || exit 1
 patch -p1 <../patches/translator.diff || exit 1
+patch -p1 <../patches/__init__.py.diff || exit 1
+patch -p1 <../patches/make_strncpy_safe.diff || exit 1
 
 echo "[+] Patching done."
 
diff --git a/qemu_mode/patches/__init__.py.diff b/qemu_mode/patches/__init__.py.diff
new file mode 100644
index 00000000..7e189b99
--- /dev/null
+++ b/qemu_mode/patches/__init__.py.diff
@@ -0,0 +1,17 @@
+--- a/scripts/tracetool/__init__.py	2020-03-28 13:42:21.937700726 +0100
++++ b/scripts/tracetool/__init__.py	2020-03-28 13:41:50.991034257 +0100
+@@ -447,12 +447,12 @@
+     import tracetool
+ 
+     format = str(format)
+-    if len(format) is 0:
++    if len(format) == 0:
+         raise TracetoolError("format not set")
+     if not tracetool.format.exists(format):
+         raise TracetoolError("unknown format: %s" % format)
+ 
+-    if len(backends) is 0:
++    if len(backends) == 0:
+         raise TracetoolError("no backends specified")
+     for backend in backends:
+         if not tracetool.backend.exists(backend):
diff --git a/qemu_mode/patches/afl-qemu-cpu-inl.h b/qemu_mode/patches/afl-qemu-cpu-inl.h
index 9fdc23ea..ee3ec44e 100644
--- a/qemu_mode/patches/afl-qemu-cpu-inl.h
+++ b/qemu_mode/patches/afl-qemu-cpu-inl.h
@@ -293,7 +293,8 @@ static void print_mappings(void) {
 
 void afl_forkserver(CPUState *cpu) {
 
-  static unsigned char tmp[4];
+  u32                  map_size = 0;
+  static unsigned char tmp[4] = {0, 0, 0, 0};
 
   if (forkserver_installed == 1) return;
   forkserver_installed = 1;
@@ -306,6 +307,15 @@ void afl_forkserver(CPUState *cpu) {
   int   t_fd[2];
   u8    child_stopped = 0;
 
+  // if in the future qemu has non-collding coverage then switch MAP_SIZE
+  // with the max ID value
+  if (MAP_SIZE <= 0x800000) {
+
+    map_size = (FS_OPT_ENABLED | FS_OPT_MAPSIZE | FS_OPT_SET_MAPSIZE(MAP_SIZE));
+    memcpy(tmp, &map_size, 4);
+
+  }
+
   /* Tell the parent that we're alive. If the parent doesn't want
      to talk, assume that we're not running in forkserver mode. */
 
@@ -413,7 +423,7 @@ void afl_forkserver(CPUState *cpu) {
 void afl_persistent_loop(void) {
 
   static u32            cycle_cnt;
-  static struct afl_tsl exit_cmd_tsl = {{-1, 0, 0, 0}, NULL};
+  static struct afl_tsl exit_cmd_tsl = {{-1, 0, 0, 0}, '\0'};
 
   if (!afl_fork_child) return;
 
diff --git a/qemu_mode/patches/afl-qemu-tcg-inl.h b/qemu_mode/patches/afl-qemu-tcg-inl.h
index 430574e5..f7c662db 100644
--- a/qemu_mode/patches/afl-qemu-tcg-inl.h
+++ b/qemu_mode/patches/afl-qemu-tcg-inl.h
@@ -30,6 +30,7 @@
    have a look at afl-showmap.c.
 
  */
+void afl_gen_tcg_plain_call(void *func);
 
 void afl_gen_tcg_plain_call(void *func) {
 
diff --git a/qemu_mode/patches/afl-qemu-tcg-runtime-inl.h b/qemu_mode/patches/afl-qemu-tcg-runtime-inl.h
index b7cd71bb..1526f09c 100644
--- a/qemu_mode/patches/afl-qemu-tcg-runtime-inl.h
+++ b/qemu_mode/patches/afl-qemu-tcg-runtime-inl.h
@@ -185,8 +185,8 @@ void HELPER(afl_cmplog_rtn)(CPUArchState *env) {
   if (!area_is_mapped(stack, sizeof(target_ulong) * 2)) return;
 
   // when this hook is executed, the retaddr is not on stack yet
-  void *ptr1 = g2h(stack[0]);
-  void *ptr2 = g2h(stack[1]);
+  void *    ptr1 = g2h(stack[0]);
+  void *    ptr2 = g2h(stack[1]);
 
 #else
 
diff --git a/qemu_mode/patches/make_strncpy_safe.diff b/qemu_mode/patches/make_strncpy_safe.diff
new file mode 100644
index 00000000..38c7d248
--- /dev/null
+++ b/qemu_mode/patches/make_strncpy_safe.diff
@@ -0,0 +1,31 @@
+--- a/util/qemu-sockets.c	2020-03-28 13:55:09.511029429 +0100
++++ b/util/qemu-sockets.c	2020-03-28 14:01:12.147693937 +0100
+@@ -877,7 +877,7 @@
+ 
+     memset(&un, 0, sizeof(un));
+     un.sun_family = AF_UNIX;
+-    strncpy(un.sun_path, path, sizeof(un.sun_path));
++    strncpy(un.sun_path, path, sizeof(un.sun_path) - 1);
+ 
+     if (bind(sock, (struct sockaddr*) &un, sizeof(un)) < 0) {
+         error_setg_errno(errp, errno, "Failed to bind socket to %s", path);
+@@ -922,7 +922,7 @@
+ 
+     memset(&un, 0, sizeof(un));
+     un.sun_family = AF_UNIX;
+-    strncpy(un.sun_path, saddr->path, sizeof(un.sun_path));
++    strncpy(un.sun_path, saddr->path, sizeof(un.sun_path) - 1);
+ 
+     /* connect to peer */
+     do {
+--- a/block/sheepdog.c	2020-03-28 14:01:57.164360270 +0100
++++ b/block/sheepdog.c	2020-03-28 14:02:52.781026597 +0100
+@@ -1236,7 +1236,7 @@
+      * don't want the send_req to read uninitialized data.
+      */
+     strncpy(buf, filename, SD_MAX_VDI_LEN);
+-    strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN);
++    strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN - 1);
+ 
+     memset(&hdr, 0, sizeof(hdr));
+     if (lock) {
diff --git a/qemu_mode/unsigaction/Makefile b/qemu_mode/unsigaction/Makefile
index 31fa8c55..d5063dab 100644
--- a/qemu_mode/unsigaction/Makefile
+++ b/qemu_mode/unsigaction/Makefile
@@ -12,23 +12,23 @@
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
+.POSIX:
 
-ifndef AFL_NO_X86
+_UNIQ=_QINU_
 
-all: lib_i386 lib_amd64
+TARGETCANDIDATES=unsigaction32.so unsigaction64.so
+_TARGETS=$(_UNIQ)$(AFL_NO_X86)$(_UNIQ)
+__TARGETS=$(_TARGETS:$(_UNIQ)1$(_UNIQ)=)
+TARGETS=$(__TARGETS:$(_UNIQ)$(_UNIQ)=$(TARGETCANDIDATES))
 
-lib_i386:
+all:  $(TARGETS)
+	@if [ "$(AFL_NO_X86)" != "" ]; then echo "[!] Note: skipping compilation of unsigaction (AFL_NO_X86 set)."; fi
+
+unsigaction32.so:
 	@$(CC) -m32 -fPIC -shared unsigaction.c -o unsigaction32.so 2>/dev/null ; if [ "$$?" = "0" ]; then echo "unsigaction32 build success"; else echo "unsigaction32 build failure (that's fine)"; fi
 
-lib_amd64:
-	$(CC) -fPIC -shared unsigaction.c -o unsigaction64.so
+unsigaction64.so:
+	$(CC) -m64 -fPIC -shared unsigaction.c -o unsigaction64.so
 
 clean:
 	rm -f unsigaction32.so unsigaction64.so
-
-else
-
-all:
-	@echo "[!] Note: skipping compilation of unsigaction (AFL_NO_X86 set)."
-
-endif
diff --git a/src/README.md b/src/README.md
new file mode 100644
index 00000000..6da534c3
--- /dev/null
+++ b/src/README.md
@@ -0,0 +1,24 @@
+# Source Folder
+
+Quick explanation about the files here:
+
+- `afl-analyze.c`		- afl-analyze binary tool
+- `afl-as.c`		- afl-as binary tool
+- `afl-gotcpu.c`		- afl-gotcpu binary tool
+- `afl-showmap.c`		- afl-showmap binary tool
+- `afl-tmin.c`		- afl-tmin binary tool
+- `afl-fuzz.c`		- afl-fuzz binary tool (just main() and usage())
+- `afl-fuzz-bitmap.c`	- afl-fuzz bitmap handling
+- `afl-fuzz-extras.c`	- afl-fuzz the *extra* function calls
+- `afl-fuzz-state.c`	- afl-fuzz state and globals
+- `afl-fuzz-init.c`		- afl-fuzz initialization
+- `afl-fuzz-misc.c`		- afl-fuzz misc functions
+- `afl-fuzz-one.c`          - afl-fuzz fuzzer_one big loop, this is where the mutation is happening
+- `afl-fuzz-python.c`	- afl-fuzz the python mutator extension
+- `afl-fuzz-queue.c`	- afl-fuzz handling the queue
+- `afl-fuzz-run.c`		- afl-fuzz running the target
+- `afl-fuzz-stats.c`	- afl-fuzz writing the statistics file
+- `afl-gcc.c`		- afl-gcc binary tool (deprecated)
+- `afl-common.c`		- common functions, used by afl-analyze, afl-fuzz, afl-showmap and afl-tmin
+- `afl-forkserver.c`	- forkserver implementation, used by afl-fuzz and afl-tmin
+afl-sharedmem.c		- sharedmem implementation, used by afl-fuzz and afl-tmin
diff --git a/src/README.src b/src/README.src
deleted file mode 100644
index 244f5ddd..00000000
--- a/src/README.src
+++ /dev/null
@@ -1,22 +0,0 @@
-Quick explanation about the files here:
-
-afl-analyze.c		- afl-analyze binary tool
-afl-as.c		- afl-as binary tool
-afl-gotcpu.c		- afl-gotcpu binary tool
-afl-showmap.c		- afl-showmap binary tool
-afl-tmin.c		- afl-tmin binary tool
-afl-fuzz.c		- afl-fuzz binary tool (just main() and usage())
-afl-fuzz-bitmap.c	- afl-fuzz bitmap handling
-afl-fuzz-extras.c	- afl-fuzz the *extra* function calls
-afl-fuzz-globals.c	- afl-fuzz global variables
-afl-fuzz-init.c		- afl-fuzz initialization
-afl-fuzz-misc.c		- afl-fuzz misc functions
-afl-fuzz-one.c          - afl-fuzz fuzzer_one big loop, this is where the mutation is happening
-afl-fuzz-python.c	- afl-fuzz the python mutator extension
-afl-fuzz-queue.c	- afl-fuzz handling the queue
-afl-fuzz-run.c		- afl-fuzz running the target
-afl-fuzz-stats.c	- afl-fuzz writing the statistics file
-afl-gcc.c		- afl-gcc binary tool (deprecated)
-afl-common.c		- common functions, used by afl-analyze, afl-fuzz, afl-showmap and afl-tmin
-afl-forkserver.c	- forkserver implementation, used by afl-fuzz and afl-tmin
-afl-sharedmem.c		- sharedmem implementation, used by afl-fuzz and afl-tmin
diff --git a/src/afl-analyze.c b/src/afl-analyze.c
index 2148cdf0..510ec94a 100644
--- a/src/afl-analyze.c
+++ b/src/afl-analyze.c
@@ -36,6 +36,7 @@
 #include "hash.h"
 #include "sharedmem.h"
 #include "common.h"
+#include "forkserver.h"
 
 #include <stdio.h>
 #include <unistd.h>
@@ -57,11 +58,10 @@
 
 static s32 child_pid;                  /* PID of the tested program         */
 
-u8 *trace_bits;                        /* SHM with instrumentation bitmap   */
+static u8 *trace_bits;                 /* SHM with instrumentation bitmap   */
 
 static u8 *in_file,                    /* Analyzer input test case          */
-    *prog_in,                          /* Targeted program input file       */
-    *doc_path;                         /* Path to docs                      */
+    *prog_in;                          /* Targeted program input file       */
 
 static u8 *in_data;                    /* Input data for analysis           */
 
@@ -75,16 +75,15 @@ static u64 mem_limit = MEM_LIMIT;      /* Memory limit (MB)                 */
 
 static s32 dev_null_fd = -1;           /* FD to /dev/null                   */
 
-u8 edges_only,                         /* Ignore hit counts?                */
+static u8 edges_only,                  /* Ignore hit counts?                */
     use_hex_offsets,                   /* Show hex offsets?                 */
-    be_quiet, use_stdin = 1;           /* Use stdin for program input?      */
+    use_stdin = 1;                     /* Use stdin for program input?      */
 
 static volatile u8 stop_soon,          /* Ctrl-C pressed?                   */
     child_timed_out;                   /* Child timed out?                  */
 
-static u8 qemu_mode;
-
 static u8 *target_path;
+static u8  qemu_mode;
 
 /* Constants used for describing byte behavior. */
 
@@ -207,15 +206,6 @@ static s32 write_to_file(u8 *path, u8 *mem, u32 len) {
 
 }
 
-/* Handle timeout signal. */
-
-static void handle_timeout(int sig) {
-
-  child_timed_out = 1;
-  if (child_pid > 0) kill(child_pid, SIGKILL);
-
-}
-
 /* Execute target application. Returns exec checksum, or 0 if program
    times out. */
 
@@ -770,11 +760,6 @@ static void setup_signal_handlers(void) {
   sigaction(SIGINT, &sa, NULL);
   sigaction(SIGTERM, &sa, NULL);
 
-  /* Exec timeout notifications. */
-
-  sa.sa_handler = handle_timeout;
-  sigaction(SIGALRM, &sa, NULL);
-
 }
 
 /* Display usage hints. */
diff --git a/src/afl-common.c b/src/afl-common.c
index 1aa15442..7eba6ae4 100644
--- a/src/afl-common.c
+++ b/src/afl-common.c
@@ -30,6 +30,7 @@
 #include "debug.h"
 #include "alloc-inl.h"
 #include "envs.h"
+#include "common.h"
 
 /* Detect @@ in args. */
 #ifndef __glibc__
@@ -37,8 +38,11 @@
 #endif
 #include <limits.h>
 
-extern u8 be_quiet;
-char *    afl_environment_variables[] = {
+u8  be_quiet = 0;
+u8 *doc_path = "";
+u8  last_intr = 0;
+
+char *afl_environment_variables[] = {
 
     "AFL_ALIGNED_ALLOC", "AFL_ALLOW_TMP", "AFL_ANALYZE_HEX", "AFL_AS",
     "AFL_AUTORESUME", "AFL_AS_FORCE_INSTRUMENT", "AFL_BENCH_JUST_ONE",
@@ -54,10 +58,12 @@ char *    afl_environment_variables[] = {
     "AFL_INST_LIBS", "AFL_INST_RATIO", "AFL_KEEP_TRACES", "AFL_KEEP_ASSEMBLY",
     "AFL_LD_HARD_FAIL", "AFL_LD_LIMIT_MB", "AFL_LD_NO_CALLOC_OVER",
     "AFL_LD_PRELOAD", "AFL_LD_VERBOSE", "AFL_LLVM_CMPLOG", "AFL_LLVM_INSTRIM",
-    "AFL_LLVM_INSTRIM_LOOPHEAD", "AFL_LLVM_INSTRIM_SKIPSINGLEBLOCK",
-    "AFL_LLVM_LAF_SPLIT_COMPARES", "AFL_LLVM_LAF_SPLIT_COMPARES_BITW",
-    "AFL_LLVM_LAF_SPLIT_FLOATS", "AFL_LLVM_LAF_SPLIT_SWITCHES",
-    "AFL_LLVM_LAF_TRANSFORM_COMPARES", "AFL_LLVM_NOT_ZERO",
+    "AFL_LLVM_CTX", "AFL_LLVM_INSTRUMENT", "AFL_LLVM_INSTRIM_LOOPHEAD",
+    "AFL_LLVM_LTO_AUTODICTIONARY", "AFL_LLVM_AUTODICTIONARY",
+    "AFL_LLVM_INSTRIM_SKIPSINGLEBLOCK", "AFL_LLVM_LAF_SPLIT_COMPARES",
+    "AFL_LLVM_LAF_SPLIT_COMPARES_BITW", "AFL_LLVM_LAF_SPLIT_FLOATS",
+    "AFL_LLVM_LAF_SPLIT_SWITCHES", "AFL_LLVM_LAF_TRANSFORM_COMPARES",
+    "AFL_LLVM_NGRAM_SIZE", "AFL_NGRAM_SIZE", "AFL_LLVM_NOT_ZERO",
     "AFL_LLVM_WHITELIST", "AFL_NO_AFFINITY", "AFL_LLVM_LTO_STARTID",
     "AFL_LLVM_LTO_DONTWRITEID", "AFL_NO_ARITH", "AFL_NO_BUILTIN",
     "AFL_NO_CPU_RED", "AFL_NO_FORKSRV", "AFL_NO_UI",
@@ -73,7 +79,8 @@ char *    afl_environment_variables[] = {
     "AFL_SHUFFLE_QUEUE", "AFL_SKIP_BIN_CHECK", "AFL_SKIP_CPUFREQ",
     "AFL_SKIP_CRASHES", "AFL_TMIN_EXACT", "AFL_TMPDIR", "AFL_TOKEN_FILE",
     "AFL_TRACE_PC", "AFL_USE_ASAN", "AFL_USE_MSAN", "AFL_USE_TRACE_PC",
-    "AFL_USE_UBSAN", "AFL_WINE_PATH", NULL};
+    "AFL_USE_UBSAN", "AFL_USE_CFISAN", "AFL_WINE_PATH", "AFL_NO_SNAPSHOT",
+    NULL};
 
 void detect_file_args(char **argv, u8 *prog_in, u8 *use_stdin) {
 
@@ -134,7 +141,7 @@ void detect_file_args(char **argv, u8 *prog_in, u8 *use_stdin) {
 
 char **argv_cpy_dup(int argc, char **argv) {
 
-  u32 i = 0;
+  int i = 0;
 
   char **ret = ck_alloc((argc + 1) * sizeof(char *));
 
@@ -228,7 +235,7 @@ char **get_qemu_argv(u8 *own_loc, u8 **target_path_p, int argc, char **argv) {
        "Oops, unable to find the 'afl-qemu-trace' binary. The binary must be "
        "built\n"
        "    separately by following the instructions in "
-       "afl->qemu_mode/README.md. "
+       "qemu_mode/README.md. "
        "If you\n"
        "    already have the binary installed, you may need to specify "
        "AFL_PATH in the\n"
@@ -325,7 +332,7 @@ char **get_wine_argv(u8 *own_loc, u8 **target_path_p, int argc, char **argv) {
        "Oops, unable to find the '%s' binary. The binary must be "
        "built\n"
        "    separately by following the instructions in "
-       "afl->qemu_mode/README.md. "
+       "qemu_mode/README.md. "
        "If you\n"
        "    already have the binary installed, you may need to specify "
        "AFL_PATH in the\n"
@@ -348,7 +355,7 @@ void check_environment_vars(char **envp) {
   if (be_quiet) return;
 
   int   index = 0, found = 0;
-  char *env;
+  char *env, *val;
   while ((env = envp[index++]) != NULL) {
 
     if (strncmp(env, "ALF_", 4) == 0) {
@@ -362,10 +369,21 @@ void check_environment_vars(char **envp) {
       while (match == 0 && afl_environment_variables[i] != NULL)
         if (strncmp(env, afl_environment_variables[i],
                     strlen(afl_environment_variables[i])) == 0 &&
-            env[strlen(afl_environment_variables[i])] == '=')
+            env[strlen(afl_environment_variables[i])] == '=') {
+
           match = 1;
-        else
+          if ((val = getenv(afl_environment_variables[i])) && !*val)
+            WARNF(
+                "AFL environment variable %s defined but is empty, this can "
+                "lead to unexpected consequences",
+                afl_environment_variables[i]);
+
+        } else {
+
           i++;
+
+        }
+
       if (match == 0) {
 
         WARNF("Mistyped AFL environment variable: %s", env);
@@ -393,3 +411,394 @@ char *get_afl_env(char *env) {
 
 }
 
+u64 get_cur_time(void) {
+
+  struct timeval  tv;
+  struct timezone tz;
+
+  gettimeofday(&tv, &tz);
+
+  return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000);
+
+}
+
+/* Get unix time in microseconds */
+
+u64 get_cur_time_us(void) {
+
+  struct timeval  tv;
+  struct timezone tz;
+
+  gettimeofday(&tv, &tz);
+
+  return (tv.tv_sec * 1000000ULL) + tv.tv_usec;
+
+}
+
+/* Describe integer. The buf should be
+   at least 6 bytes to fit all ints we randomly see.
+   Will return buf for convenience. */
+
+u8 *stringify_int(u8 *buf, size_t len, u64 val) {
+\
+#define CHK_FORMAT(_divisor, _limit_mult, _fmt, _cast)     \
+  do {                                                     \
+                                                           \
+    if (val < (_divisor) * (_limit_mult)) {                \
+                                                           \
+      snprintf(buf, len, _fmt, ((_cast)val) / (_divisor)); \
+      return buf;                                          \
+                                                           \
+    }                                                      \
+                                                           \
+  } while (0)
+
+  /* 0-9999 */
+  CHK_FORMAT(1, 10000, "%llu", u64);
+
+  /* 10.0k - 99.9k */
+  CHK_FORMAT(1000, 99.95, "%0.01fk", double);
+
+  /* 100k - 999k */
+  CHK_FORMAT(1000, 1000, "%lluk", u64);
+
+  /* 1.00M - 9.99M */
+  CHK_FORMAT(1000 * 1000, 9.995, "%0.02fM", double);
+
+  /* 10.0M - 99.9M */
+  CHK_FORMAT(1000 * 1000, 99.95, "%0.01fM", double);
+
+  /* 100M - 999M */
+  CHK_FORMAT(1000 * 1000, 1000, "%lluM", u64);
+
+  /* 1.00G - 9.99G */
+  CHK_FORMAT(1000LL * 1000 * 1000, 9.995, "%0.02fG", double);
+
+  /* 10.0G - 99.9G */
+  CHK_FORMAT(1000LL * 1000 * 1000, 99.95, "%0.01fG", double);
+
+  /* 100G - 999G */
+  CHK_FORMAT(1000LL * 1000 * 1000, 1000, "%lluG", u64);
+
+  /* 1.00T - 9.99G */
+  CHK_FORMAT(1000LL * 1000 * 1000 * 1000, 9.995, "%0.02fT", double);
+
+  /* 10.0T - 99.9T */
+  CHK_FORMAT(1000LL * 1000 * 1000 * 1000, 99.95, "%0.01fT", double);
+
+  /* 100T+ */
+  strncpy(buf, "infty", len);
+  buf[len - 1] = '\0';
+
+  return buf;
+
+}
+
+/* Describe float. Similar as int. */
+
+u8 *stringify_float(u8 *buf, size_t len, double val) {
+
+  if (val < 99.995) {
+
+    snprintf(buf, len, "%0.02f", val);
+
+  } else if (val < 999.95) {
+
+    snprintf(buf, len, "%0.01f", val);
+
+  } else {
+
+    stringify_int(buf, len, (u64)val);
+
+  }
+
+  return buf;
+
+}
+
+/* Describe integer as memory size. */
+
+u8 *stringify_mem_size(u8 *buf, size_t len, u64 val) {
+
+  /* 0-9999 */
+  CHK_FORMAT(1, 10000, "%llu B", u64);
+
+  /* 10.0k - 99.9k */
+  CHK_FORMAT(1024, 99.95, "%0.01f kB", double);
+
+  /* 100k - 999k */
+  CHK_FORMAT(1024, 1000, "%llu kB", u64);
+
+  /* 1.00M - 9.99M */
+  CHK_FORMAT(1024 * 1024, 9.995, "%0.02f MB", double);
+
+  /* 10.0M - 99.9M */
+  CHK_FORMAT(1024 * 1024, 99.95, "%0.01f MB", double);
+
+  /* 100M - 999M */
+  CHK_FORMAT(1024 * 1024, 1000, "%llu MB", u64);
+
+  /* 1.00G - 9.99G */
+  CHK_FORMAT(1024LL * 1024 * 1024, 9.995, "%0.02f GB", double);
+
+  /* 10.0G - 99.9G */
+  CHK_FORMAT(1024LL * 1024 * 1024, 99.95, "%0.01f GB", double);
+
+  /* 100G - 999G */
+  CHK_FORMAT(1024LL * 1024 * 1024, 1000, "%llu GB", u64);
+
+  /* 1.00T - 9.99G */
+  CHK_FORMAT(1024LL * 1024 * 1024 * 1024, 9.995, "%0.02f TB", double);
+
+  /* 10.0T - 99.9T */
+  CHK_FORMAT(1024LL * 1024 * 1024 * 1024, 99.95, "%0.01f TB", double);
+
+#undef CHK_FORMAT
+
+  /* 100T+ */
+  strncpy(buf, "infty", len - 1);
+  buf[len - 1] = '\0';
+
+  return buf;
+
+}
+
+/* Describe time delta as string.
+   Returns a pointer to buf for convenience. */
+
+u8 *stringify_time_diff(u8 *buf, size_t len, u64 cur_ms, u64 event_ms) {
+
+  u64 delta;
+  s32 t_d, t_h, t_m, t_s;
+  u8  val_buf[STRINGIFY_VAL_SIZE_MAX];
+
+  if (!event_ms) {
+
+    snprintf(buf, len, "none seen yet");
+
+  } else {
+
+    delta = cur_ms - event_ms;
+
+    t_d = delta / 1000 / 60 / 60 / 24;
+    t_h = (delta / 1000 / 60 / 60) % 24;
+    t_m = (delta / 1000 / 60) % 60;
+    t_s = (delta / 1000) % 60;
+
+    stringify_int(val_buf, sizeof(val_buf), t_d);
+    snprintf(buf, len, "%s days, %d hrs, %d min, %d sec", val_buf, t_h, t_m,
+             t_s);
+
+  }
+
+  return buf;
+
+}
+
+/* Unsafe Describe integer. The buf sizes are not checked.
+   This is unsafe but fast.
+   Will return buf for convenience. */
+
+u8 *u_stringify_int(u8 *buf, u64 val) {
+\
+#define CHK_FORMAT(_divisor, _limit_mult, _fmt, _cast) \
+  do {                                                 \
+                                                       \
+    if (val < (_divisor) * (_limit_mult)) {            \
+                                                       \
+      sprintf(buf, _fmt, ((_cast)val) / (_divisor));   \
+      return buf;                                      \
+                                                       \
+    }                                                  \
+                                                       \
+  } while (0)
+
+  /* 0-9999 */
+  CHK_FORMAT(1, 10000, "%llu", u64);
+
+  /* 10.0k - 99.9k */
+  CHK_FORMAT(1000, 99.95, "%0.01fk", double);
+
+  /* 100k - 999k */
+  CHK_FORMAT(1000, 1000, "%lluk", u64);
+
+  /* 1.00M - 9.99M */
+  CHK_FORMAT(1000 * 1000, 9.995, "%0.02fM", double);
+
+  /* 10.0M - 99.9M */
+  CHK_FORMAT(1000 * 1000, 99.95, "%0.01fM", double);
+
+  /* 100M - 999M */
+  CHK_FORMAT(1000 * 1000, 1000, "%lluM", u64);
+
+  /* 1.00G - 9.99G */
+  CHK_FORMAT(1000LL * 1000 * 1000, 9.995, "%0.02fG", double);
+
+  /* 10.0G - 99.9G */
+  CHK_FORMAT(1000LL * 1000 * 1000, 99.95, "%0.01fG", double);
+
+  /* 100G - 999G */
+  CHK_FORMAT(1000LL * 1000 * 1000, 1000, "%lluG", u64);
+
+  /* 1.00T - 9.99G */
+  CHK_FORMAT(1000LL * 1000 * 1000 * 1000, 9.995, "%0.02fT", double);
+
+  /* 10.0T - 99.9T */
+  CHK_FORMAT(1000LL * 1000 * 1000 * 1000, 99.95, "%0.01fT", double);
+
+  /* 100T+ */
+  strcpy(buf, "infty");
+
+  return buf;
+
+}
+
+/* Unsafe describe float. Similar as unsafe int. */
+
+u8 *u_stringify_float(u8 *buf, double val) {
+
+  if (val < 99.995) {
+
+    sprintf(buf, "%0.02f", val);
+
+  } else if (val < 999.95) {
+
+    sprintf(buf, "%0.01f", val);
+
+  } else {
+
+    return u_stringify_int(buf, (u64)val);
+
+  }
+
+  return buf;
+
+}
+
+/* Unsafe describe integer as memory size. */
+
+u8 *u_stringify_mem_size(u8 *buf, u64 val) {
+
+  /* 0-9999 */
+  CHK_FORMAT(1, 10000, "%llu B", u64);
+
+  /* 10.0k - 99.9k */
+  CHK_FORMAT(1024, 99.95, "%0.01f kB", double);
+
+  /* 100k - 999k */
+  CHK_FORMAT(1024, 1000, "%llu kB", u64);
+
+  /* 1.00M - 9.99M */
+  CHK_FORMAT(1024 * 1024, 9.995, "%0.02f MB", double);
+
+  /* 10.0M - 99.9M */
+  CHK_FORMAT(1024 * 1024, 99.95, "%0.01f MB", double);
+
+  /* 100M - 999M */
+  CHK_FORMAT(1024 * 1024, 1000, "%llu MB", u64);
+
+  /* 1.00G - 9.99G */
+  CHK_FORMAT(1024LL * 1024 * 1024, 9.995, "%0.02f GB", double);
+
+  /* 10.0G - 99.9G */
+  CHK_FORMAT(1024LL * 1024 * 1024, 99.95, "%0.01f GB", double);
+
+  /* 100G - 999G */
+  CHK_FORMAT(1024LL * 1024 * 1024, 1000, "%llu GB", u64);
+
+  /* 1.00T - 9.99G */
+  CHK_FORMAT(1024LL * 1024 * 1024 * 1024, 9.995, "%0.02f TB", double);
+
+  /* 10.0T - 99.9T */
+  CHK_FORMAT(1024LL * 1024 * 1024 * 1024, 99.95, "%0.01f TB", double);
+
+#undef CHK_FORMAT
+
+  /* 100T+ */
+  strcpy(buf, "infty");
+
+  return buf;
+
+}
+
+/* Unsafe describe time delta as string.
+   Returns a pointer to buf for convenience. */
+
+u8 *u_stringify_time_diff(u8 *buf, u64 cur_ms, u64 event_ms) {
+
+  u64 delta;
+  s32 t_d, t_h, t_m, t_s;
+  u8  val_buf[STRINGIFY_VAL_SIZE_MAX];
+
+  if (!event_ms) {
+
+    sprintf(buf, "none seen yet");
+
+  } else {
+
+    delta = cur_ms - event_ms;
+
+    t_d = delta / 1000 / 60 / 60 / 24;
+    t_h = (delta / 1000 / 60 / 60) % 24;
+    t_m = (delta / 1000 / 60) % 60;
+    t_s = (delta / 1000) % 60;
+
+    u_stringify_int(val_buf, t_d);
+    sprintf(buf, "%s days, %d hrs, %d min, %d sec", val_buf, t_h, t_m, t_s);
+
+  }
+
+  return buf;
+
+}
+
+/* Wrapper for select() and read(), reading exactly len bytes.
+  Returns the time passed to read.
+  If the wait times out, returns timeout_ms + 1;
+  Returns 0 if an error occurred (fd closed, signal, ...); */
+u32 read_timed(s32 fd, void *buf, size_t len, u32 timeout_ms,
+               volatile u8 *stop_soon_p) {
+
+  struct timeval timeout;
+  fd_set         readfds;
+  FD_ZERO(&readfds);
+  FD_SET(fd, &readfds);
+
+  timeout.tv_sec = (timeout_ms / 1000);
+  timeout.tv_usec = (timeout_ms % 1000) * 1000;
+
+  size_t read_total = 0;
+  size_t len_read = 0;
+
+  while (len_read < len) {
+
+    /* set exceptfds as well to return when a child exited/closed the pipe. */
+    int sret = select(fd + 1, &readfds, NULL, NULL, &timeout);
+
+    if (!sret) {
+
+      // printf("Timeout in sret.");
+      return timeout_ms + 1;
+
+    } else if (sret < 0) {
+
+      /* Retry select for all signals other than than ctrl+c */
+      if (errno == EINTR && !*stop_soon_p) { continue; }
+      return 0;
+
+    }
+
+    len_read = read(fd, ((u8 *)buf) + len_read, len - len_read);
+    if (!len_read) { return 0; }
+    read_total += len_read;
+
+  }
+
+  s32 exec_ms =
+      MIN(timeout_ms,
+          ((u64)timeout_ms - (timeout.tv_sec * 1000 + timeout.tv_usec / 1000)));
+  return exec_ms > 0 ? exec_ms
+                     : 1;  // at least 1 milli must have passed (0 is an error)
+
+}
+
diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c
index c7a3475f..28f664fa 100644
--- a/src/afl-forkserver.c
+++ b/src/afl-forkserver.c
@@ -49,90 +49,11 @@
 
 /* Describe integer as memory size. */
 
-extern u8 *doc_path;
-
-u8 *forkserver_DMS(u64 val) {
-
-  static u8 tmp[12][16];
-  static u8 cur;
-
-#define CHK_FORMAT(_divisor, _limit_mult, _fmt, _cast)    \
-  do {                                                    \
-                                                          \
-    if (val < (_divisor) * (_limit_mult)) {               \
-                                                          \
-      sprintf(tmp[cur], _fmt, ((_cast)val) / (_divisor)); \
-      return tmp[cur];                                    \
-                                                          \
-    }                                                     \
-                                                          \
-  } while (0)
-
-  cur = (cur + 1) % 12;
-
-  /* 0-9999 */
-  CHK_FORMAT(1, 10000, "%llu B", u64);
-
-  /* 10.0k - 99.9k */
-  CHK_FORMAT(1024, 99.95, "%0.01f kB", double);
-
-  /* 100k - 999k */
-  CHK_FORMAT(1024, 1000, "%llu kB", u64);
-
-  /* 1.00M - 9.99M */
-  CHK_FORMAT(1024 * 1024, 9.995, "%0.02f MB", double);
-
-  /* 10.0M - 99.9M */
-  CHK_FORMAT(1024 * 1024, 99.95, "%0.01f MB", double);
-
-  /* 100M - 999M */
-  CHK_FORMAT(1024 * 1024, 1000, "%llu MB", u64);
-
-  /* 1.00G - 9.99G */
-  CHK_FORMAT(1024LL * 1024 * 1024, 9.995, "%0.02f GB", double);
-
-  /* 10.0G - 99.9G */
-  CHK_FORMAT(1024LL * 1024 * 1024, 99.95, "%0.01f GB", double);
-
-  /* 100G - 999G */
-  CHK_FORMAT(1024LL * 1024 * 1024, 1000, "%llu GB", u64);
-
-  /* 1.00T - 9.99G */
-  CHK_FORMAT(1024LL * 1024 * 1024 * 1024, 9.995, "%0.02f TB", double);
-
-  /* 10.0T - 99.9T */
-  CHK_FORMAT(1024LL * 1024 * 1024 * 1024, 99.95, "%0.01f TB", double);
-
-#undef CHK_FORMAT
-
-  /* 100T+ */
-  strcpy(tmp[cur], "infty");
-  return tmp[cur];
-
-}
-
 list_t fsrv_list = {.element_prealloc_count = 0};
 
-/* the timeout handler */
-
-void handle_timeout(int sig) {
-
-  LIST_FOREACH(&fsrv_list, afl_forkserver_t, {
-
-    // TODO: We need a proper timer to handle multiple timeouts
-    if (el->child_pid > 0) {
+static void fsrv_exec_child(afl_forkserver_t *fsrv, char **argv) {
 
-      el->child_timed_out = 1;
-      kill(el->child_pid, SIGKILL);
-
-    } else if (el->child_pid == -1 && el->fsrv_pid > 0) {
-
-      el->child_timed_out = 1;
-      kill(el->fsrv_pid, SIGKILL);
-
-    }
-
-  });
+  execv(fsrv->target_path, argv);
 
 }
 
@@ -153,21 +74,51 @@ void afl_fsrv_init(afl_forkserver_t *fsrv) {
   fsrv->exec_tmout = EXEC_TIMEOUT;
   fsrv->mem_limit = MEM_LIMIT;
   fsrv->child_pid = -1;
-  fsrv->out_dir_fd = -1;
-
+  fsrv->map_size = MAP_SIZE;
   fsrv->use_fauxsrv = 0;
+  fsrv->prev_timed_out = 0;
+
+  fsrv->init_child_func = fsrv_exec_child;
 
   list_append(&fsrv_list, fsrv);
 
 }
 
+/* Initialize a new forkserver instance, duplicating "global" settings */
+void afl_fsrv_init_dup(afl_forkserver_t *fsrv_to, afl_forkserver_t *from) {
+
+  fsrv_to->use_stdin = from->use_stdin;
+  fsrv_to->dev_null_fd = from->dev_null_fd;
+  fsrv_to->exec_tmout = from->exec_tmout;
+  fsrv_to->mem_limit = from->mem_limit;
+  fsrv_to->map_size = from->map_size;
+
+#ifndef HAVE_ARC4RANDOM
+  fsrv_to->dev_urandom_fd = from->dev_urandom_fd;
+#endif
+
+  // These are forkserver specific.
+  fsrv_to->out_fd = -1;
+  fsrv_to->out_dir_fd = -1;
+  fsrv_to->child_pid = -1;
+  fsrv_to->use_fauxsrv = 0;
+  fsrv_to->prev_timed_out = 0;
+
+  fsrv_to->init_child_func = fsrv_exec_child;
+
+  list_append(&fsrv_list, fsrv_to);
+
+}
+
 /* Internal forkserver for dumb_mode=1 and non-forkserver mode runs.
   It execvs for each fork, forwarding exit codes and child pids to afl. */
 
 static void afl_fauxsrv_execv(afl_forkserver_t *fsrv, char **argv) {
 
-  static unsigned char tmp[4] = {0};
-  pid_t                child_pid = -1;
+  unsigned char tmp[4] = {0, 0, 0, 0};
+  pid_t         child_pid = -1;
+
+  if (!be_quiet) ACTF("Using Fauxserver:");
 
   /* Phone home and tell the parent that we're OK. If parent isn't there,
      assume we're not running in forkserver mode and just execute program. */
@@ -244,16 +195,25 @@ static void afl_fauxsrv_execv(afl_forkserver_t *fsrv, char **argv) {
    cloning a stopped child. So, we just execute once, and then send commands
    through a pipe. The other part of this logic is in afl-as.h / llvm_mode */
 
-void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv) {
+void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
+                    volatile u8 *stop_soon_p, u8 debug_child_output) {
+
+  int st_pipe[2], ctl_pipe[2];
+  int status;
+  s32 rlen;
+
+  if (!be_quiet) ACTF("Spinning up the fork server...");
+
+  if (fsrv->use_fauxsrv) {
 
-  struct timeval timeout;
-  int            st_pipe[2], ctl_pipe[2];
-  int            status;
-  s32            rlen;
+    /* TODO: Come up with sone nice way to initalize this all */
 
-  if (fsrv->use_fauxsrv) ACTF("Using Fauxserver:");
+    if (fsrv->init_child_func != fsrv_exec_child)
+      FATAL("Different forkserver not compatible with fauxserver");
 
-  if (!getenv("AFL_QUIET")) ACTF("Spinning up the fork server...");
+    fsrv->init_child_func = afl_fauxsrv_execv;
+
+  }
 
   if (pipe(st_pipe) || pipe(ctl_pipe)) PFATAL("pipe() failed");
 
@@ -305,7 +265,7 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv) {
 
     setsid();
 
-    if (!get_afl_env("AFL_DEBUG_CHILD_OUTPUT")) {
+    if (!(debug_child_output)) {
 
       dup2(fsrv->dev_null_fd, 1);
       dup2(fsrv->dev_null_fd, 2);
@@ -338,7 +298,7 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv) {
 #ifndef HAVE_ARC4RANDOM
     close(fsrv->dev_urandom_fd);
 #endif
-    close(fsrv->plot_file == NULL ? -1 : fileno(fsrv->plot_file));
+    if (fsrv->plot_file != NULL) fclose(fsrv->plot_file);
 
     /* This should improve performance a bit, since it stops the linker from
        doing extra work post-fork(). */
@@ -367,15 +327,7 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv) {
            "msan_track_origins=0",
            0);
 
-    if (fsrv->use_fauxsrv) {
-
-      afl_fauxsrv_execv(fsrv, argv);
-
-    } else {
-
-      execv(fsrv->target_path, argv);
-
-    }
+    fsrv->init_child_func(fsrv, argv);
 
     /* Use a distinctive bitmap signature to tell the parent about execv()
        falling through. */
@@ -400,24 +352,22 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv) {
   rlen = 0;
   if (fsrv->exec_tmout) {
 
-    fd_set readfds;
+    u32 time = read_timed(fsrv->fsrv_st_fd, &status, 4,
+                          fsrv->exec_tmout * FORK_WAIT_MULT, stop_soon_p);
 
-    FD_ZERO(&readfds);
-    FD_SET(fsrv->fsrv_st_fd, &readfds);
-    timeout.tv_sec = ((fsrv->exec_tmout * FORK_WAIT_MULT) / 1000);
-    timeout.tv_usec = ((fsrv->exec_tmout * FORK_WAIT_MULT) % 1000) * 1000;
+    if (!time) {
 
-    int sret = select(fsrv->fsrv_st_fd + 1, &readfds, NULL, NULL, &timeout);
+      kill(fsrv->fsrv_pid, SIGKILL);
 
-    if (sret == 0) {
+    } else if (time > fsrv->exec_tmout * FORK_WAIT_MULT) {
 
       fsrv->child_timed_out = 1;
-      kill(fsrv->child_pid, SIGKILL);
+      kill(fsrv->fsrv_pid, SIGKILL);
 
     } else {
-
-      rlen = read(fsrv->fsrv_st_fd, &status, 4);
-
+    
+      rlen = 4;
+    
     }
 
   } else {
@@ -431,7 +381,98 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv) {
 
   if (rlen == 4) {
 
-    if (!getenv("AFL_QUIET")) OKF("All right - fork server is up.");
+    if (!be_quiet) OKF("All right - fork server is up.");
+
+    if ((status & FS_OPT_ENABLED) == FS_OPT_ENABLED) {
+
+      if (!be_quiet)
+        ACTF("Extended forkserver functions received (%08x).", status);
+
+      if ((status & FS_OPT_SNAPSHOT) == FS_OPT_SNAPSHOT) {
+
+        fsrv->snapshot = 1;
+        if (!be_quiet) ACTF("Using SNAPSHOT feature.");
+
+      }
+
+      if ((status & FS_OPT_MAPSIZE) == FS_OPT_MAPSIZE) {
+
+        fsrv->map_size = FS_OPT_GET_MAPSIZE(status);
+        if (fsrv->map_size % 8)  // should not happen
+          fsrv->map_size = (((fsrv->map_size + 8) >> 3) << 3);
+        if (!be_quiet) ACTF("Target map size: %u", fsrv->map_size);
+        if (fsrv->map_size > MAP_SIZE)
+          FATAL(
+              "Target's coverage map size of %u is larger than the one this "
+              "afl++ is compiled with (%u)\n",
+              fsrv->map_size, MAP_SIZE);
+
+      }
+
+      if ((status & FS_OPT_AUTODICT) == FS_OPT_AUTODICT) {
+
+        if (fsrv->function_ptr == NULL || fsrv->function_opt == NULL) {
+
+          // this is not afl-fuzz - we deny and return
+          status = (0xffffffff ^ (FS_OPT_ENABLED | FS_OPT_AUTODICT));
+          if (write(fsrv->fsrv_ctl_fd, &status, 4) != 4)
+            FATAL("Writing to forkserver failed.");
+          return;
+
+        }
+
+        if (!be_quiet) ACTF("Using AUTODICT feature.");
+        status = (FS_OPT_ENABLED | FS_OPT_AUTODICT);
+        if (write(fsrv->fsrv_ctl_fd, &status, 4) != 4)
+          FATAL("Writing to forkserver failed.");
+        if (read(fsrv->fsrv_st_fd, &status, 4) != 4)
+          FATAL("Reading from forkserver failed.");
+
+        if (status < 2 || (u32)status > 0xffffff)
+          FATAL("Dictionary has an illegal size: %d", status);
+
+        u32 len = status, offset = 0, count = 0;
+        u8 *dict = ck_alloc(len);
+        if (dict == NULL)
+          FATAL("Could not allocate %u bytes of autodictionary memmory", len);
+
+        while (len != 0) {
+
+          rlen = read(fsrv->fsrv_st_fd, dict + offset, len);
+          if (rlen > 0) {
+
+            len -= rlen;
+            offset += rlen;
+
+          } else {
+
+            FATAL(
+                "Reading autodictionary fail at position %u with %u bytes "
+                "left.",
+                offset, len);
+
+          }
+
+        }
+
+        len = status;
+        offset = 0;
+        while (offset < status && (u8)dict[offset] + offset < status) {
+
+          fsrv->function_ptr(fsrv->function_opt, dict + offset + 1,
+                             (u8)dict[offset]);
+          offset += (1 + dict[offset]);
+          count++;
+
+        }
+
+        if (!be_quiet) ACTF("Loaded %u autodictionary entries", count);
+        ck_free(dict);
+
+      }
+
+    }
+
     return;
 
   }
@@ -476,6 +517,8 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv) {
 
     } else {
 
+      u8 val_buf[STRINGIFY_VAL_SIZE_MAX];
+
       SAYF("\n" cLRD "[-] " cRST
            "Whoops, the target binary crashed suddenly, "
            "before receiving any input\n"
@@ -508,7 +551,8 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv) {
            "options\n"
            "      fail, poke <afl-users@googlegroups.com> for troubleshooting "
            "tips.\n",
-           forkserver_DMS(fsrv->mem_limit << 20), fsrv->mem_limit - 1);
+           stringify_mem_size(val_buf, sizeof(val_buf), fsrv->mem_limit << 20),
+           fsrv->mem_limit - 1);
 
     }
 
@@ -543,6 +587,8 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv) {
 
   } else {
 
+    u8 val_buf[STRINGIFY_VAL_SIZE_MAX];
+
     SAYF(
         "\n" cLRD "[-] " cRST
         "Hmm, looks like the target binary terminated "
@@ -574,7 +620,8 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv) {
               "never\n"
               "      reached before the program terminates.\n\n"
             : "",
-        forkserver_DMS(fsrv->mem_limit << 20), fsrv->mem_limit - 1);
+        stringify_int(val_buf, sizeof(val_buf), fsrv->mem_limit << 20),
+        fsrv->mem_limit - 1);
 
   }
 
@@ -582,11 +629,19 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv) {
 
 }
 
+static void afl_fsrv_kill(afl_forkserver_t *fsrv) {
+
+    if (fsrv->child_pid > 0) kill(fsrv->child_pid, SIGKILL);
+    if (fsrv->fsrv_pid > 0) kill(fsrv->fsrv_pid, SIGKILL);
+    if (waitpid(fsrv->fsrv_pid, NULL, 0) <= 0) { WARNF("error waitpid\n"); }
+
+}
+
 void afl_fsrv_killall() {
 
   LIST_FOREACH(&fsrv_list, afl_forkserver_t, {
 
-    if (el->child_pid > 0) kill(el->child_pid, SIGKILL);
+    afl_fsrv_kill(el);
 
   });
 
@@ -594,6 +649,7 @@ void afl_fsrv_killall() {
 
 void afl_fsrv_deinit(afl_forkserver_t *fsrv) {
 
+  afl_fsrv_kill(fsrv);
   list_remove(&fsrv_list, fsrv);
 
 }
diff --git a/src/afl-fuzz-bitmap.c b/src/afl-fuzz-bitmap.c
index 7e2d3212..b6a494db 100644
--- a/src/afl-fuzz-bitmap.c
+++ b/src/afl-fuzz-bitmap.c
@@ -24,6 +24,7 @@
  */
 
 #include "afl-fuzz.h"
+#include <limits.h>
 
 /* Write bitmap to file. The bitmap is useful mostly for the secret
    -B option, to focus a separate fuzzing session on a particular
@@ -31,13 +32,13 @@
 
 void write_bitmap(afl_state_t *afl) {
 
-  u8 *fname;
+  u8  fname[PATH_MAX];
   s32 fd;
 
   if (!afl->bitmap_changed) return;
   afl->bitmap_changed = 0;
 
-  fname = alloc_printf("%s/fuzz_bitmap", afl->out_dir);
+  snprintf(fname, PATH_MAX, "%s/fuzz_bitmap", afl->out_dir);
   fd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, 0600);
 
   if (fd < 0) PFATAL("Unable to open '%s'", fname);
@@ -45,7 +46,6 @@ void write_bitmap(afl_state_t *afl) {
   ck_write(fd, afl->virgin_bits, MAP_SIZE, fname);
 
   close(fd);
-  ck_free(fname);
 
 }
 
@@ -78,16 +78,17 @@ u8 has_new_bits(afl_state_t *afl, u8 *virgin_map) {
   u64 *current = (u64 *)afl->fsrv.trace_bits;
   u64 *virgin = (u64 *)virgin_map;
 
-  u32 i = (MAP_SIZE >> 3);
+  u32 i = (afl->fsrv.map_size >> 3);
 
 #else
 
   u32 *current = (u32 *)afl->fsrv.trace_bits;
   u32 *virgin = (u32 *)virgin_map;
 
-  u32 i = (MAP_SIZE >> 2);
+  u32 i = (afl->fsrv.map_size >> 2);
 
 #endif                                                     /* ^WORD_SIZE_64 */
+  if (i == 0) i = 1;
 
   u8 ret = 0;
 
@@ -138,7 +139,8 @@ u8 has_new_bits(afl_state_t *afl, u8 *virgin_map) {
 
   }
 
-  if (ret && virgin_map == afl->virgin_bits) afl->bitmap_changed = 1;
+  if (unlikely(ret) && unlikely(virgin_map == afl->virgin_bits))
+    afl->bitmap_changed = 1;
 
   return ret;
 
@@ -147,12 +149,14 @@ u8 has_new_bits(afl_state_t *afl, u8 *virgin_map) {
 /* Count the number of bits set in the provided bitmap. Used for the status
    screen several times every second, does not have to be fast. */
 
-u32 count_bits(u8 *mem) {
+u32 count_bits(afl_state_t *afl, u8 *mem) {
 
   u32 *ptr = (u32 *)mem;
-  u32  i = (MAP_SIZE >> 2);
+  u32  i = (afl->fsrv.map_size >> 2);
   u32  ret = 0;
 
+  if (i == 0) i = 1;
+
   while (i--) {
 
     u32 v = *(ptr++);
@@ -177,27 +181,27 @@ u32 count_bits(u8 *mem) {
 
 }
 
-#define FF(_b) (0xff << ((_b) << 3))
-
 /* Count the number of bytes set in the bitmap. Called fairly sporadically,
    mostly to update the status screen or calibrate and examine confirmed
    new paths. */
 
-u32 count_bytes(u8 *mem) {
+u32 count_bytes(afl_state_t *afl, u8 *mem) {
 
   u32 *ptr = (u32 *)mem;
-  u32  i = (MAP_SIZE >> 2);
+  u32  i = (afl->fsrv.map_size >> 2);
   u32  ret = 0;
 
+  if (i == 0) i = 1;
+
   while (i--) {
 
     u32 v = *(ptr++);
 
     if (!v) continue;
-    if (v & FF(0)) ++ret;
-    if (v & FF(1)) ++ret;
-    if (v & FF(2)) ++ret;
-    if (v & FF(3)) ++ret;
+    if (v & 0x000000ff) ++ret;
+    if (v & 0x0000ff00) ++ret;
+    if (v & 0x00ff0000) ++ret;
+    if (v & 0xff000000) ++ret;
 
   }
 
@@ -208,12 +212,14 @@ u32 count_bytes(u8 *mem) {
 /* Count the number of non-255 bytes set in the bitmap. Used strictly for the
    status screen, several calls per second or so. */
 
-u32 count_non_255_bytes(u8 *mem) {
+u32 count_non_255_bytes(afl_state_t *afl, u8 *mem) {
 
   u32 *ptr = (u32 *)mem;
-  u32  i = (MAP_SIZE >> 2);
+  u32  i = (afl->fsrv.map_size >> 2);
   u32  ret = 0;
 
+  if (i == 0) i = 1;
+
   while (i--) {
 
     u32 v = *(ptr++);
@@ -222,10 +228,10 @@ u32 count_non_255_bytes(u8 *mem) {
        case. */
 
     if (v == 0xffffffff) continue;
-    if ((v & FF(0)) != FF(0)) ++ret;
-    if ((v & FF(1)) != FF(1)) ++ret;
-    if ((v & FF(2)) != FF(2)) ++ret;
-    if ((v & FF(3)) != FF(3)) ++ret;
+    if ((v & 0x000000ff) != 0x000000ff) ++ret;
+    if ((v & 0x0000ff00) != 0x0000ff00) ++ret;
+    if ((v & 0x00ff0000) != 0x00ff0000) ++ret;
+    if ((v & 0xff000000) != 0xff000000) ++ret;
 
   }
 
@@ -246,9 +252,11 @@ const u8 simplify_lookup[256] = {
 
 #ifdef WORD_SIZE_64
 
-void simplify_trace(u64 *mem) {
+void simplify_trace(afl_state_t *afl, u64 *mem) {
 
-  u32 i = MAP_SIZE >> 3;
+  u32 i = (afl->fsrv.map_size >> 3);
+
+  if (i == 0) i = 1;
 
   while (i--) {
 
@@ -279,9 +287,11 @@ void simplify_trace(u64 *mem) {
 
 #else
 
-void simplify_trace(u32 *mem) {
+void simplify_trace(afl_state_t *afl, u32 *mem) {
+
+  u32 i = (afl->fsrv.map_size >> 2);
 
-  u32 i = MAP_SIZE >> 2;
+  if (i == 0) i = 1;
 
   while (i--) {
 
@@ -341,9 +351,11 @@ void init_count_class16(void) {
 
 #ifdef WORD_SIZE_64
 
-void classify_counts(u64 *mem) {
+void classify_counts(afl_state_t *afl, u64 *mem) {
+
+  u32 i = (afl->fsrv.map_size >> 3);
 
-  u32 i = MAP_SIZE >> 3;
+  if (i == 0) i = 1;
 
   while (i--) {
 
@@ -368,9 +380,11 @@ void classify_counts(u64 *mem) {
 
 #else
 
-void classify_counts(u32 *mem) {
+void classify_counts(afl_state_t *afl, u32 *mem) {
 
-  u32 i = MAP_SIZE >> 2;
+  u32 i = (afl->fsrv.map_size >> 2);
+
+  if (i == 0) i = 1;
 
   while (i--) {
 
@@ -397,11 +411,11 @@ void classify_counts(u32 *mem) {
    count information here. This is called only sporadically, for some
    new paths. */
 
-void minimize_bits(u8 *dst, u8 *src) {
+void minimize_bits(afl_state_t *afl, u8 *dst, u8 *src) {
 
   u32 i = 0;
 
-  while (i < MAP_SIZE) {
+  while (i < afl->fsrv.map_size) {
 
     if (*(src++)) dst[i >> 3] |= 1 << (i & 7);
     ++i;
@@ -413,13 +427,13 @@ void minimize_bits(u8 *dst, u8 *src) {
 #ifndef SIMPLE_FILES
 
 /* Construct a file name for a new test case, capturing the operation
-   that led to its discovery. Uses a static buffer. */
+   that led to its discovery. Returns a ptr to afl->describe_op_buf_256. */
 
 u8 *describe_op(afl_state_t *afl, u8 hnb) {
 
   u8 *ret = afl->describe_op_buf_256;
 
-  if (afl->syncing_party) {
+  if (unlikely(afl->syncing_party)) {
 
     sprintf(ret, "sync:%s,src:%06u", afl->syncing_party, afl->syncing_case);
 
@@ -461,20 +475,23 @@ u8 *describe_op(afl_state_t *afl, u8 hnb) {
 
 static void write_crash_readme(afl_state_t *afl) {
 
-  u8 *  fn = alloc_printf("%s/crashes/README.txt", afl->out_dir);
+  u8    fn[PATH_MAX];
   s32   fd;
   FILE *f;
 
+  u8 val_buf[STRINGIFY_VAL_SIZE_MAX];
+
+  sprintf(fn, "%s/crashes/README.txt", afl->out_dir);
+
   fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
-  ck_free(fn);
 
   /* Do not die on errors here - that would be impolite. */
 
-  if (fd < 0) return;
+  if (unlikely(fd < 0)) return;
 
   f = fdopen(fd, "w");
 
-  if (!f) {
+  if (unlikely(!f)) {
 
     close(fd);
     return;
@@ -501,7 +518,9 @@ static void write_crash_readme(afl_state_t *afl) {
 
       "  https://github.com/AFLplusplus/AFLplusplus\n\n",
 
-      afl->orig_cmdline, DMS(afl->fsrv.mem_limit << 20));  /* ignore errors */
+      afl->orig_cmdline,
+      stringify_mem_size(val_buf, sizeof(val_buf),
+                         afl->fsrv.mem_limit << 20));      /* ignore errors */
 
   fclose(f);
 
@@ -513,15 +532,17 @@ static void write_crash_readme(afl_state_t *afl) {
 
 u8 save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
-  if (len == 0) return 0;
+  if (unlikely(len == 0)) return 0;
 
-  u8 *fn = "";
+  u8 *queue_fn = "";
   u8  hnb;
   s32 fd;
   u8  keeping = 0, res;
 
+  u8 fn[PATH_MAX];
+
   /* Update path frequency. */
-  u32 cksum = hash32(afl->fsrv.trace_bits, MAP_SIZE, HASH_CONST);
+  u32 cksum = hash32(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
 
   struct queue_entry *q = afl->queue;
   while (q) {
@@ -537,30 +558,31 @@ u8 save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
   }
 
-  if (fault == afl->crash_mode) {
+  if (unlikely(fault == afl->crash_mode)) {
 
     /* Keep only if there are new bits in the map, add to queue for
        future fuzzing, etc. */
 
     if (!(hnb = has_new_bits(afl, afl->virgin_bits))) {
 
-      if (afl->crash_mode) ++afl->total_crashes;
+      if (unlikely(afl->crash_mode)) ++afl->total_crashes;
       return 0;
 
     }
 
 #ifndef SIMPLE_FILES
 
-    fn = alloc_printf("%s/queue/id:%06u,%s", afl->out_dir, afl->queued_paths,
-                      describe_op(afl, hnb));
+    queue_fn = alloc_printf("%s/queue/id:%06u,%s", afl->out_dir,
+                            afl->queued_paths, describe_op(afl, hnb));
 
 #else
 
-    fn = alloc_printf("%s/queue/id_%06u", afl->out_dir, afl->queued_paths);
+    queue_fn =
+        alloc_printf("%s/queue/id_%06u", afl->out_dir, afl->queued_paths);
 
 #endif                                                    /* ^!SIMPLE_FILES */
 
-    add_to_queue(afl, fn, len, 0);
+    add_to_queue(afl, queue_fn, len, 0);
 
     if (hnb == 2) {
 
@@ -576,11 +598,12 @@ u8 save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
     res = calibrate_case(afl, afl->queue_top, mem, afl->queue_cycle - 1, 0);
 
-    if (res == FAULT_ERROR) FATAL("Unable to execute target application");
+    if (unlikely(res == FAULT_ERROR))
+      FATAL("Unable to execute target application");
 
-    fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
-    if (fd < 0) PFATAL("Unable to create '%s'", fn);
-    ck_write(fd, mem, len, fn);
+    fd = open(queue_fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
+    if (unlikely(fd < 0)) PFATAL("Unable to create '%s'", queue_fn);
+    ck_write(fd, mem, len, queue_fn);
     close(fd);
 
     keeping = 1;
@@ -600,12 +623,12 @@ u8 save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
       if (afl->unique_hangs >= KEEP_UNIQUE_HANG) return keeping;
 
-      if (!afl->dumb_mode) {
+      if (likely(!afl->dumb_mode)) {
 
 #ifdef WORD_SIZE_64
-        simplify_trace((u64 *)afl->fsrv.trace_bits);
+        simplify_trace(afl, (u64 *)afl->fsrv.trace_bits);
 #else
-        simplify_trace((u32 *)afl->fsrv.trace_bits);
+        simplify_trace(afl, (u32 *)afl->fsrv.trace_bits);
 #endif                                                     /* ^WORD_SIZE_64 */
 
         if (!has_new_bits(afl, afl->virgin_tmout)) return keeping;
@@ -622,7 +645,7 @@ u8 save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
         u8 new_fault;
         write_to_testcase(afl, mem, len);
-        new_fault = run_target(afl, afl->hang_tmout);
+        new_fault = run_target(afl, &afl->fsrv, afl->hang_tmout);
 
         /* A corner case that one user reported bumping into: increasing the
            timeout actually uncovers a crash. Make sure we don't discard it if
@@ -636,12 +659,13 @@ u8 save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
 #ifndef SIMPLE_FILES
 
-      fn = alloc_printf("%s/hangs/id:%06llu,%s", afl->out_dir,
-                        afl->unique_hangs, describe_op(afl, 0));
+      snprintf(fn, PATH_MAX, "%s/hangs/id:%06llu,%s", afl->out_dir,
+               afl->unique_hangs, describe_op(afl, 0));
 
 #else
 
-      fn = alloc_printf("%s/hangs/id_%06llu", afl->out_dir, afl->unique_hangs);
+      snprintf(fn, PATH_MAX, "%s/hangs/id_%06llu", afl->out_dir,
+               afl->unique_hangs);
 
 #endif                                                    /* ^!SIMPLE_FILES */
 
@@ -663,38 +687,37 @@ u8 save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
 
       if (afl->unique_crashes >= KEEP_UNIQUE_CRASH) return keeping;
 
-      if (!afl->dumb_mode) {
+      if (likely(!afl->dumb_mode)) {
 
 #ifdef WORD_SIZE_64
-        simplify_trace((u64 *)afl->fsrv.trace_bits);
+        simplify_trace(afl, (u64 *)afl->fsrv.trace_bits);
 #else
-        simplify_trace((u32 *)afl->fsrv.trace_bits);
+        simplify_trace(afl, (u32 *)afl->fsrv.trace_bits);
 #endif                                                     /* ^WORD_SIZE_64 */
 
         if (!has_new_bits(afl, afl->virgin_crash)) return keeping;
 
       }
 
-      if (!afl->unique_crashes) write_crash_readme(afl);
+      if (unlikely(!afl->unique_crashes)) write_crash_readme(afl);
 
 #ifndef SIMPLE_FILES
 
-      fn = alloc_printf("%s/crashes/id:%06llu,sig:%02u,%s", afl->out_dir,
-                        afl->unique_crashes, afl->kill_signal,
-                        describe_op(afl, 0));
+      snprintf(fn, PATH_MAX, "%s/crashes/id:%06llu,sig:%02u,%s", afl->out_dir,
+               afl->unique_crashes, afl->kill_signal, describe_op(afl, 0));
 
 #else
 
-      fn = alloc_printf("%s/crashes/id_%06llu_%02u", afl->out_dir,
-                        afl->unique_crashes, afl->kill_signal);
+      snprintf(fn, PATH_MAX, "%s/crashes/id_%06llu_%02u", afl->out_dir,
+               afl->unique_crashes, afl->kill_signal);
 
 #endif                                                    /* ^!SIMPLE_FILES */
 
       ++afl->unique_crashes;
-      if (afl->infoexec) {  // if the user wants to be informed on new crashes -
-                            // do
+      if (unlikely(afl->infoexec)) {
+
+        // if the user wants to be informed on new crashes - do that
 #if !TARGET_OS_IPHONE
-        // that
         if (system(afl->infoexec) == -1)
           hnb += 0;  // we dont care if system errors, but we dont want a
                      // compiler warning either
@@ -719,12 +742,10 @@ u8 save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
      test case, too. */
 
   fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
-  if (fd < 0) PFATAL("Unable to create '%s'", fn);
+  if (unlikely(fd < 0)) PFATAL("Unable to create '%s'", fn);
   ck_write(fd, mem, len, fn);
   close(fd);
 
-  ck_free(fn);
-
   return keeping;
 
 }
diff --git a/src/afl-fuzz-cmplog.c b/src/afl-fuzz-cmplog.c
index 1600af53..e2747097 100644
--- a/src/afl-fuzz-cmplog.c
+++ b/src/afl-fuzz-cmplog.c
@@ -29,590 +29,18 @@
 #include "afl-fuzz.h"
 #include "cmplog.h"
 
-void init_cmplog_forkserver(afl_state_t *afl) {
+typedef struct cmplog_data {
 
-  static struct timeval timeout;
-  int                   st_pipe[2], ctl_pipe[2];
-  int                   status;
-  s32                   rlen;
+} cmplog_data_t;
 
-  ACTF("Spinning up the cmplog fork server...");
+void cmplog_exec_child(afl_forkserver_t *fsrv, char **argv) {
 
-  if (pipe(st_pipe) || pipe(ctl_pipe)) PFATAL("pipe() failed");
+  setenv("___AFL_EINS_ZWEI_POLIZEI___", "1", 1);
 
-  afl->fsrv.child_timed_out = 0;
-  afl->cmplog_fsrv_pid = fork();
+  if (!fsrv->qemu_mode && argv[0] != fsrv->cmplog_binary)
+    argv[0] = fsrv->cmplog_binary;
 
-  if (afl->cmplog_fsrv_pid < 0) PFATAL("fork() failed");
-
-  if (!afl->cmplog_fsrv_pid) {
-
-    /* CHILD PROCESS */
-
-    struct rlimit r;
-
-    /* Umpf. On OpenBSD, the default fd limit for root users is set to
-       soft 128. Let's try to fix that... */
-
-    if (!getrlimit(RLIMIT_NOFILE, &r) && r.rlim_cur < FORKSRV_FD + 2) {
-
-      r.rlim_cur = FORKSRV_FD + 2;
-      setrlimit(RLIMIT_NOFILE, &r);                        /* Ignore errors */
-
-    }
-
-    if (afl->fsrv.mem_limit) {
-
-      r.rlim_max = r.rlim_cur = ((rlim_t)afl->fsrv.mem_limit) << 20;
-
-#ifdef RLIMIT_AS
-      setrlimit(RLIMIT_AS, &r);                            /* Ignore errors */
-#else
-      /* This takes care of OpenBSD, which doesn't have RLIMIT_AS, but
-         according to reliable sources, RLIMIT_DATA covers anonymous
-         maps - so we should be getting good protection against OOM bugs. */
-
-      setrlimit(RLIMIT_DATA, &r);                          /* Ignore errors */
-#endif                                                        /* ^RLIMIT_AS */
-
-    }
-
-    /* Dumping cores is slow and can lead to anomalies if SIGKILL is delivered
-       before the dump is complete. */
-
-    //    r.rlim_max = r.rlim_cur = 0;
-    //    setrlimit(RLIMIT_CORE, &r);                      /* Ignore errors */
-
-    /* Isolate the process and configure standard descriptors. If
-       afl->fsrv.out_file is specified, stdin is /dev/null; otherwise,
-       afl->fsrv.out_fd is cloned instead. */
-
-    setsid();
-
-    if (!(afl->afl_env.afl_debug_child_output)) {
-
-      dup2(afl->fsrv.dev_null_fd, 1);
-      dup2(afl->fsrv.dev_null_fd, 2);
-
-    }
-
-    if (!afl->fsrv.use_stdin) {
-
-      dup2(afl->fsrv.dev_null_fd, 0);
-
-    } else {
-
-      dup2(afl->fsrv.out_fd, 0);
-      close(afl->fsrv.out_fd);
-
-    }
-
-    /* Set up control and status pipes, close the unneeded original fds. */
-
-    if (dup2(ctl_pipe[0], FORKSRV_FD) < 0) PFATAL("dup2() failed");
-    if (dup2(st_pipe[1], FORKSRV_FD + 1) < 0) PFATAL("dup2() failed");
-
-    close(ctl_pipe[0]);
-    close(ctl_pipe[1]);
-    close(st_pipe[0]);
-    close(st_pipe[1]);
-
-    close(afl->fsrv.out_dir_fd);
-    close(afl->fsrv.dev_null_fd);
-#ifndef HAVE_ARC4RANDOM
-    close(afl->fsrv.dev_urandom_fd);
-#endif
-    close(afl->fsrv.plot_file == NULL ? -1 : fileno(afl->fsrv.plot_file));
-
-    /* This should improve performance a bit, since it stops the linker from
-       doing extra work post-fork(). */
-
-    if (!getenv("LD_BIND_LAZY")) setenv("LD_BIND_NOW", "1", 0);
-
-    /* Set sane defaults for ASAN if nothing else specified. */
-
-    setenv("ASAN_OPTIONS",
-           "abort_on_error=1:"
-           "detect_leaks=0:"
-           "malloc_context_size=0:"
-           "symbolize=0:"
-           "allocator_may_return_null=1",
-           0);
-
-    /* MSAN is tricky, because it doesn't support abort_on_error=1 at this
-       point. So, we do this in a very hacky way. */
-
-    setenv("MSAN_OPTIONS",
-           "exit_code=" STRINGIFY(MSAN_ERROR) ":"
-           "symbolize=0:"
-           "abort_on_error=1:"
-           "malloc_context_size=0:"
-           "allocator_may_return_null=1:"
-           "msan_track_origins=0",
-           0);
-
-    setenv("___AFL_EINS_ZWEI_POLIZEI___", "1", 1);
-
-    if (!afl->qemu_mode && afl->argv[0] != afl->cmplog_binary) {
-
-      ck_free(afl->argv[0]);
-      afl->argv[0] = afl->cmplog_binary;
-
-    }
-
-    execv(afl->argv[0], afl->argv);
-
-    /* Use a distinctive bitmap signature to tell the parent about execv()
-       falling through. */
-
-    *(u32 *)afl->fsrv.trace_bits = EXEC_FAIL_SIG;
-    exit(0);
-
-  }
-
-  /* PARENT PROCESS */
-
-  /* Close the unneeded endpoints. */
-
-  close(ctl_pipe[0]);
-  close(st_pipe[1]);
-
-  afl->cmplog_fsrv_ctl_fd = ctl_pipe[1];
-  afl->cmplog_fsrv_st_fd = st_pipe[0];
-
-  /* Wait for the fork server to come up, but don't wait too long. */
-
-  rlen = 0;
-  if (afl->fsrv.exec_tmout) {
-
-    fd_set readfds;
-    FD_ZERO(&readfds);
-    FD_SET(afl->cmplog_fsrv_st_fd, &readfds);
-    timeout.tv_sec = ((afl->fsrv.exec_tmout * FORK_WAIT_MULT) / 1000);
-    timeout.tv_usec = ((afl->fsrv.exec_tmout * FORK_WAIT_MULT) % 1000) * 1000;
-
-    int sret =
-        select(afl->cmplog_fsrv_st_fd + 1, &readfds, NULL, NULL, &timeout);
-
-    if (sret == 0) {
-
-      kill(afl->cmplog_fsrv_pid, SIGKILL);
-
-    } else {
-
-      rlen = read(afl->cmplog_fsrv_st_fd, &status, 4);
-
-    }
-
-  } else {
-
-    rlen = read(afl->cmplog_fsrv_st_fd, &status, 4);
-
-  }
-
-  /* If we have a four-byte "hello" message from the server, we're all set.
-     Otherwise, try to figure out what went wrong. */
-
-  if (rlen == 4) {
-
-    OKF("All right - fork server is up.");
-    return;
-
-  }
-
-  if (afl->fsrv.child_timed_out)
-    FATAL(
-        "Timeout while initializing cmplog fork server (adjusting -t may "
-        "help)");
-
-  if (waitpid(afl->cmplog_fsrv_pid, &status, 0) <= 0)
-    PFATAL("waitpid() failed");
-
-  if (WIFSIGNALED(status)) {
-
-    if (afl->fsrv.mem_limit && afl->fsrv.mem_limit < 500 &&
-        afl->fsrv.uses_asan) {
-
-      SAYF("\n" cLRD "[-] " cRST
-           "Whoops, the target binary crashed suddenly, "
-           "before receiving any input\n"
-           "    from the fuzzer! Since it seems to be built with ASAN and you "
-           "have a\n"
-           "    restrictive memory limit configured, this is expected; please "
-           "read\n"
-           "    %s/notes_for_asan.md for help.\n",
-           doc_path);
-
-    } else if (!afl->fsrv.mem_limit) {
-
-      SAYF("\n" cLRD "[-] " cRST
-           "Whoops, the target binary crashed suddenly, "
-           "before receiving any input\n"
-           "    from the fuzzer! There are several probable explanations:\n\n"
-
-           "    - The binary is just buggy and explodes entirely on its own. "
-           "If so, you\n"
-           "      need to fix the underlying problem or find a better "
-           "replacement.\n\n"
-
-           MSG_FORK_ON_APPLE
-
-           "    - Less likely, there is a horrible bug in the fuzzer. If other "
-           "options\n"
-           "      fail, poke <afl-users@googlegroups.com> for troubleshooting "
-           "tips.\n");
-
-    } else {
-
-      SAYF("\n" cLRD "[-] " cRST
-           "Whoops, the target binary crashed suddenly, "
-           "before receiving any input\n"
-           "    from the fuzzer! There are several probable explanations:\n\n"
-
-           "    - The current memory limit (%s) is too restrictive, causing "
-           "the\n"
-           "      target to hit an OOM condition in the dynamic linker. Try "
-           "bumping up\n"
-           "      the limit with the -m setting in the command line. A simple "
-           "way confirm\n"
-           "      this diagnosis would be:\n\n"
-
-           MSG_ULIMIT_USAGE
-           " /path/to/fuzzed_app )\n\n"
-
-           "      Tip: you can use http://jwilk.net/software/recidivm to "
-           "quickly\n"
-           "      estimate the required amount of virtual memory for the "
-           "binary.\n\n"
-
-           "    - The binary is just buggy and explodes entirely on its own. "
-           "If so, you\n"
-           "      need to fix the underlying problem or find a better "
-           "replacement.\n\n"
-
-           MSG_FORK_ON_APPLE
-
-           "    - Less likely, there is a horrible bug in the fuzzer. If other "
-           "options\n"
-           "      fail, poke <afl-users@googlegroups.com> for troubleshooting "
-           "tips.\n",
-           DMS(afl->fsrv.mem_limit << 20), afl->fsrv.mem_limit - 1);
-
-    }
-
-    FATAL("Cmplog fork server crashed with signal %d", WTERMSIG(status));
-
-  }
-
-  if (*(u32 *)afl->fsrv.trace_bits == EXEC_FAIL_SIG)
-    FATAL("Unable to execute target application ('%s')", afl->argv[0]);
-
-  if (afl->fsrv.mem_limit && afl->fsrv.mem_limit < 500 && afl->fsrv.uses_asan) {
-
-    SAYF("\n" cLRD "[-] " cRST
-         "Hmm, looks like the target binary terminated "
-         "before we could complete a\n"
-         "    handshake with the injected code. Since it seems to be built "
-         "with ASAN and\n"
-         "    you have a restrictive memory limit configured, this is "
-         "expected; please\n"
-         "    read %s/notes_for_asan.md for help.\n",
-         doc_path);
-
-  } else if (!afl->fsrv.mem_limit) {
-
-    SAYF("\n" cLRD "[-] " cRST
-         "Hmm, looks like the target binary terminated "
-         "before we could complete a\n"
-         "    handshake with the injected code. Perhaps there is a horrible "
-         "bug in the\n"
-         "    fuzzer. Poke <afl-users@googlegroups.com> for troubleshooting "
-         "tips.\n");
-
-  } else {
-
-    SAYF(
-        "\n" cLRD "[-] " cRST
-        "Hmm, looks like the target binary terminated "
-        "before we could complete a\n"
-        "    handshake with the injected code. There are %s probable "
-        "explanations:\n\n"
-
-        "%s"
-        "    - The current memory limit (%s) is too restrictive, causing an "
-        "OOM\n"
-        "      fault in the dynamic linker. This can be fixed with the -m "
-        "option. A\n"
-        "      simple way to confirm the diagnosis may be:\n\n"
-
-        MSG_ULIMIT_USAGE
-        " /path/to/fuzzed_app )\n\n"
-
-        "      Tip: you can use http://jwilk.net/software/recidivm to quickly\n"
-        "      estimate the required amount of virtual memory for the "
-        "binary.\n\n"
-
-        "    - Less likely, there is a horrible bug in the fuzzer. If other "
-        "options\n"
-        "      fail, poke <afl-users@googlegroups.com> for troubleshooting "
-        "tips.\n",
-        getenv(DEFER_ENV_VAR) ? "three" : "two",
-        getenv(DEFER_ENV_VAR)
-            ? "    - You are using deferred forkserver, but __AFL_INIT() is "
-              "never\n"
-              "      reached before the program terminates.\n\n"
-            : "",
-        DMS(afl->fsrv.mem_limit << 20), afl->fsrv.mem_limit - 1);
-
-  }
-
-  FATAL("Cmplog fork server handshake failed");
-
-}
-
-u8 run_cmplog_target(afl_state_t *afl, u32 timeout) {
-
-  static struct itimerval it;
-  static u32              prev_timed_out = 0;
-  static u64              exec_ms = 0;
-
-  int status = 0;
-  u32 tb4;
-
-  afl->fsrv.child_timed_out = 0;
-
-  /* After this memset, afl->fsrv.trace_bits[] are effectively volatile, so we
-     must prevent any earlier operations from venturing into that
-     territory. */
-
-  memset(afl->fsrv.trace_bits, 0, MAP_SIZE);
-  MEM_BARRIER();
-
-  /* If we're running in "dumb" mode, we can't rely on the fork server
-     logic compiled into the target program, so we will just keep calling
-     execve(). There is a bit of code duplication between here and
-     init_forkserver(), but c'est la vie. */
-
-  if (afl->dumb_mode == 1 || afl->no_forkserver) {
-
-    afl->cmplog_child_pid = fork();
-
-    if (afl->cmplog_child_pid < 0) PFATAL("fork() failed");
-
-    if (!afl->cmplog_child_pid) {
-
-      struct rlimit r;
-
-      if (afl->fsrv.mem_limit) {
-
-        r.rlim_max = r.rlim_cur = ((rlim_t)afl->fsrv.mem_limit) << 20;
-
-#ifdef RLIMIT_AS
-
-        setrlimit(RLIMIT_AS, &r);                          /* Ignore errors */
-
-#else
-
-        setrlimit(RLIMIT_DATA, &r);                        /* Ignore errors */
-
-#endif                                                        /* ^RLIMIT_AS */
-
-      }
-
-      r.rlim_max = r.rlim_cur = 0;
-
-      setrlimit(RLIMIT_CORE, &r);                          /* Ignore errors */
-
-      /* Isolate the process and configure standard descriptors. If
-         afl->fsrv.out_file is specified, stdin is /dev/null; otherwise,
-         afl->fsrv.out_fd is cloned instead. */
-
-      setsid();
-
-      dup2(afl->fsrv.dev_null_fd, 1);
-      dup2(afl->fsrv.dev_null_fd, 2);
-
-      if (afl->fsrv.out_file) {
-
-        dup2(afl->fsrv.dev_null_fd, 0);
-
-      } else {
-
-        dup2(afl->fsrv.out_fd, 0);
-        close(afl->fsrv.out_fd);
-
-      }
-
-      /* On Linux, would be faster to use O_CLOEXEC. Maybe TODO. */
-
-      close(afl->fsrv.dev_null_fd);
-      close(afl->fsrv.out_dir_fd);
-#ifndef HAVE_ARC4RANDOM
-      close(afl->fsrv.dev_urandom_fd);
-#endif
-      close(fileno(afl->fsrv.plot_file));
-
-      /* Set sane defaults for ASAN if nothing else specified. */
-
-      setenv("ASAN_OPTIONS",
-             "abort_on_error=1:"
-             "detect_leaks=0:"
-             "symbolize=0:"
-             "allocator_may_return_null=1",
-             0);
-
-      setenv("MSAN_OPTIONS", "exit_code=" STRINGIFY(MSAN_ERROR) ":"
-                             "symbolize=0:"
-                             "msan_track_origins=0", 0);
-
-      setenv("___AFL_EINS_ZWEI_POLIZEI___", "1", 1);
-
-      if (!afl->qemu_mode && afl->argv[0] != afl->cmplog_binary) {
-
-        ck_free(afl->argv[0]);
-        afl->argv[0] = afl->cmplog_binary;
-
-      }
-
-      execv(afl->argv[0], afl->argv);
-
-      /* Use a distinctive bitmap value to tell the parent about execv()
-         falling through. */
-
-      *(u32 *)afl->fsrv.trace_bits = EXEC_FAIL_SIG;
-      exit(0);
-
-    }
-
-  } else {
-
-    s32 res;
-
-    /* In non-dumb mode, we have the fork server up and running, so simply
-       tell it to have at it, and then read back PID. */
-
-    if ((res = write(afl->cmplog_fsrv_ctl_fd, &prev_timed_out, 4)) != 4) {
-
-      if (afl->stop_soon) return 0;
-      RPFATAL(res,
-              "Unable to request new process from cmplog fork server (OOM?)");
-
-    }
-
-    if ((res = read(afl->cmplog_fsrv_st_fd, &afl->cmplog_child_pid, 4)) != 4) {
-
-      if (afl->stop_soon) return 0;
-      RPFATAL(res,
-              "Unable to request new process from cmplog fork server (OOM?)");
-
-    }
-
-    if (afl->cmplog_child_pid <= 0)
-      FATAL("Cmplog fork server is misbehaving (OOM?)");
-
-  }
-
-  /* Configure timeout, as requested by user, then wait for child to terminate.
-   */
-
-  it.it_value.tv_sec = (timeout / 1000);
-  it.it_value.tv_usec = (timeout % 1000) * 1000;
-
-  setitimer(ITIMER_REAL, &it, NULL);
-
-  /* The SIGALRM handler simply kills the afl->cmplog_child_pid and sets
-   * afl->fsrv.child_timed_out. */
-
-  if (afl->dumb_mode == 1 || afl->no_forkserver) {
-
-    if (waitpid(afl->cmplog_child_pid, &status, 0) <= 0)
-      PFATAL("waitpid() failed");
-
-  } else {
-
-    s32 res;
-
-    if ((res = read(afl->cmplog_fsrv_st_fd, &status, 4)) != 4) {
-
-      if (afl->stop_soon) return 0;
-      SAYF(
-          "\n" cLRD "[-] " cRST
-          "Unable to communicate with fork server. Some possible reasons:\n\n"
-          "    - You've run out of memory. Use -m to increase the the memory "
-          "limit\n"
-          "      to something higher than %lld.\n"
-          "    - The binary or one of the libraries it uses manages to create\n"
-          "      threads before the forkserver initializes.\n"
-          "    - The binary, at least in some circumstances, exits in a way "
-          "that\n"
-          "      also kills the parent process - raise() could be the "
-          "culprit.\n\n"
-          "If all else fails you can disable the fork server via "
-          "AFL_NO_FORKSRV=1.\n",
-          afl->fsrv.mem_limit);
-      RPFATAL(res, "Unable to communicate with fork server");
-
-    }
-
-  }
-
-  if (!WIFSTOPPED(status)) afl->cmplog_child_pid = 0;
-
-  getitimer(ITIMER_REAL, &it);
-  exec_ms =
-      (u64)timeout - (it.it_value.tv_sec * 1000 + it.it_value.tv_usec / 1000);
-  if (afl->slowest_exec_ms < exec_ms) afl->slowest_exec_ms = exec_ms;
-
-  it.it_value.tv_sec = 0;
-  it.it_value.tv_usec = 0;
-
-  setitimer(ITIMER_REAL, &it, NULL);
-
-  ++afl->total_execs;
-
-  /* Any subsequent operations on afl->fsrv.trace_bits must not be moved by the
-     compiler below this point. Past this location, afl->fsrv.trace_bits[]
-     behave very normally and do not have to be treated as volatile. */
-
-  MEM_BARRIER();
-
-  tb4 = *(u32 *)afl->fsrv.trace_bits;
-
-#ifdef WORD_SIZE_64
-  classify_counts((u64 *)afl->fsrv.trace_bits);
-#else
-  classify_counts((u32 *)afl->fsrv.trace_bits);
-#endif                                                     /* ^WORD_SIZE_64 */
-
-  prev_timed_out = afl->fsrv.child_timed_out;
-
-  /* Report outcome to caller. */
-
-  if (WIFSIGNALED(status) && !afl->stop_soon) {
-
-    afl->kill_signal = WTERMSIG(status);
-
-    if (afl->fsrv.child_timed_out && afl->kill_signal == SIGKILL)
-      return FAULT_TMOUT;
-
-    return FAULT_CRASH;
-
-  }
-
-  /* A somewhat nasty hack for MSAN, which doesn't support abort_on_error and
-     must use a special exit code. */
-
-  if (afl->fsrv.uses_asan && WEXITSTATUS(status) == MSAN_ERROR) {
-
-    afl->kill_signal = 0;
-    return FAULT_CRASH;
-
-  }
-
-  if ((afl->dumb_mode == 1 || afl->no_forkserver) && tb4 == EXEC_FAIL_SIG)
-    return FAULT_ERROR;
-
-  return FAULT_NONE;
+  execv(argv[0], argv);
 
 }
 
@@ -622,14 +50,19 @@ u8 common_fuzz_cmplog_stuff(afl_state_t *afl, u8 *out_buf, u32 len) {
 
   if (afl->post_handler) {
 
-    out_buf = afl->post_handler(out_buf, &len);
-    if (!out_buf || !len) return 0;
+    u8 *post_buf = NULL;
+
+    size_t post_len =
+        afl->post_handler(afl->post_data, out_buf, len, &post_buf);
+    if (!post_buf || !post_len) return 0;
+    out_buf = post_buf;
+    len = post_len;
 
   }
 
   write_to_testcase(afl, out_buf, len);
 
-  fault = run_cmplog_target(afl, afl->fsrv.exec_tmout);
+  fault = run_target(afl, &afl->cmplog_fsrv, afl->fsrv.exec_tmout);
 
   if (afl->stop_soon) return 1;
 
diff --git a/src/afl-fuzz-extras.c b/src/afl-fuzz-extras.c
index ff4c0ae2..55146dd9 100644
--- a/src/afl-fuzz-extras.c
+++ b/src/afl-fuzz-extras.c
@@ -55,6 +55,8 @@ void load_extras_file(afl_state_t *afl, u8 *fname, u32 *min_len, u32 *max_len,
   u8 *  lptr;
   u32   cur_line = 0;
 
+  u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
+
   f = fopen(fname, "r");
 
   if (!f) PFATAL("Unable to open '%s'", fname);
@@ -170,8 +172,10 @@ void load_extras_file(afl_state_t *afl, u8 *fname, u32 *min_len, u32 *max_len,
     afl->extras[afl->extras_cnt].len = klen;
 
     if (afl->extras[afl->extras_cnt].len > MAX_DICT_FILE)
-      FATAL("Keyword too big in line %u (%s, limit is %s)", cur_line, DMS(klen),
-            DMS(MAX_DICT_FILE));
+      FATAL(
+          "Keyword too big in line %u (%s, limit is %s)", cur_line,
+          stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), klen),
+          stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), MAX_DICT_FILE));
 
     if (*min_len > klen) *min_len = klen;
     if (*max_len < klen) *max_len = klen;
@@ -193,6 +197,8 @@ void load_extras(afl_state_t *afl, u8 *dir) {
   u32            min_len = MAX_DICT_FILE, max_len = 0, dict_level = 0;
   u8 *           x;
 
+  u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
+
   /* If the name ends with @, extract level and continue. */
 
   if ((x = strchr(dir, '@'))) {
@@ -238,8 +244,10 @@ void load_extras(afl_state_t *afl, u8 *dir) {
     }
 
     if (st.st_size > MAX_DICT_FILE)
-      FATAL("Extra '%s' is too big (%s, limit is %s)", fn, DMS(st.st_size),
-            DMS(MAX_DICT_FILE));
+      FATAL(
+          "Extra '%s' is too big (%s, limit is %s)", fn,
+          stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), st.st_size),
+          stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), MAX_DICT_FILE));
 
     if (min_len > st.st_size) min_len = st.st_size;
     if (max_len < st.st_size) max_len = st.st_size;
@@ -273,11 +281,12 @@ check_and_sort:
         compare_extras_len);
 
   OKF("Loaded %u extra tokens, size range %s to %s.", afl->extras_cnt,
-      DMS(min_len), DMS(max_len));
+      stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), min_len),
+      stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), max_len));
 
   if (max_len > 32)
     WARNF("Some tokens are relatively large (%s) - consider trimming.",
-          DMS(max_len));
+          stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), max_len));
 
   if (afl->extras_cnt > MAX_DET_EXTRAS)
     WARNF("More than %d tokens - will use them probabilistically.",
@@ -296,10 +305,14 @@ static inline u8 memcmp_nocase(u8 *m1, u8 *m2, u32 len) {
 }
 
 /* Maybe add automatic extra. */
+/* Ugly hack: afl state is transfered as u8* because we import data via
+   afl-forkserver.c - which is shared with other afl tools that do not
+   have the afl state struct */
 
-void maybe_add_auto(afl_state_t *afl, u8 *mem, u32 len) {
+void maybe_add_auto(void *afl_tmp, u8 *mem, u32 len) {
 
-  u32 i;
+  afl_state_t *afl = (afl_state_t *)afl_tmp;
+  u32          i;
 
   /* Allow users to specify that they don't want auto dictionaries. */
 
@@ -378,7 +391,7 @@ void maybe_add_auto(afl_state_t *afl, u8 *mem, u32 len) {
 
   } else {
 
-    i = MAX_AUTO_EXTRAS / 2 + UR(afl, (MAX_AUTO_EXTRAS + 1) / 2);
+    i = MAX_AUTO_EXTRAS / 2 + rand_below(afl, (MAX_AUTO_EXTRAS + 1) / 2);
 
     ck_free(afl->a_extras[i].data);
 
@@ -442,7 +455,7 @@ void load_auto(afl_state_t *afl) {
     u8 *fn = alloc_printf("%s/.state/auto_extras/auto_%06u", afl->in_dir, i);
     s32 fd, len;
 
-    fd = open(fn, O_RDONLY, 0600);
+    fd = open(fn, O_RDONLY);
 
     if (fd < 0) {
 
@@ -460,7 +473,7 @@ void load_auto(afl_state_t *afl) {
     if (len < 0) PFATAL("Unable to read from '%s'", fn);
 
     if (len >= MIN_AUTO_EXTRA && len <= MAX_AUTO_EXTRA)
-      maybe_add_auto(afl, tmp, len);
+      maybe_add_auto((u8 *)afl, tmp, len);
 
     close(fd);
     ck_free(fn);
diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c
index 6b5fa24f..54cc81ef 100644
--- a/src/afl-fuzz-init.c
+++ b/src/afl-fuzz-init.c
@@ -24,6 +24,7 @@
  */
 
 #include "afl-fuzz.h"
+#include <limits.h>
 
 #ifdef HAVE_AFFINITY
 
@@ -76,21 +77,16 @@ void bind_to_free_cpu(afl_state_t *afl) {
 
   while ((de = readdir(d))) {
 
-    u8 *  fn;
+    u8    fn[PATH_MAX];
     FILE *f;
     u8    tmp[MAX_LINE];
     u8    has_vmsize = 0;
 
     if (!isdigit(de->d_name[0])) continue;
 
-    fn = alloc_printf("/proc/%s/status", de->d_name);
+    snprintf(fn, PATH_MAX, "/proc/%s/status", de->d_name);
 
-    if (!(f = fopen(fn, "r"))) {
-
-      ck_free(fn);
-      continue;
-
-    }
+    if (!(f = fopen(fn, "r"))) { continue; }
 
     while (fgets(tmp, MAX_LINE, f)) {
 
@@ -111,7 +107,6 @@ void bind_to_free_cpu(afl_state_t *afl) {
 
     }
 
-    ck_free(fn);
     fclose(f);
 
   }
@@ -139,8 +134,15 @@ void bind_to_free_cpu(afl_state_t *afl) {
   for (i = 0; i < proccount; i++) {
 
 #if defined(__FreeBSD__)
-    if (procs[i].ki_oncpu < sizeof(cpu_used) && procs[i].ki_pctcpu > 60)
-      cpu_used[procs[i].ki_oncpu] = 1;
+    if (!strcmp(procs[i].ki_comm, "idle")) continue;
+
+    // fix when ki_oncpu = -1
+    int oncpu;
+    oncpu = procs[i].ki_oncpu;
+    if (oncpu == -1) oncpu = procs[i].ki_lastcpu;
+
+    if (oncpu != -1 && oncpu < sizeof(cpu_used) && procs[i].ki_pctcpu > 60)
+      cpu_used[oncpu] = 1;
 #elif defined(__DragonFly__)
     if (procs[i].kp_lwp.kl_cpuid < sizeof(cpu_used) &&
         procs[i].kp_lwp.kl_pctcpu > 10)
@@ -276,7 +278,9 @@ void setup_post(afl_state_t *afl) {
 
   void *dh;
   u8 *  fn = afl->afl_env.afl_post_library;
+  u8    tbuf[6];
   u32   tlen = 6;
+  strncpy(tbuf, "hello", tlen);
 
   if (!fn) return;
 
@@ -287,10 +291,20 @@ void setup_post(afl_state_t *afl) {
 
   afl->post_handler = dlsym(dh, "afl_postprocess");
   if (!afl->post_handler) FATAL("Symbol 'afl_postprocess' not found.");
+  afl->post_init = dlsym(dh, "afl_postprocess_init");
+  if (!afl->post_init) FATAL("Symbol 'afl_postprocess_init' not found.");
+  afl->post_deinit = dlsym(dh, "afl_postprocess_deinit");
+  if (!afl->post_deinit) FATAL("Symbol 'afl_postprocess_deinit' not found.");
 
   /* Do a quick test. It's better to segfault now than later =) */
 
-  afl->post_handler("hello", &tlen);
+  u8 *post_buf = NULL;
+  afl->post_data = afl->post_init(afl);
+  if (!afl->post_data) FATAL("Could not initialize post handler.");
+
+  size_t post_len = afl->post_handler(afl->post_data, tbuf, tlen, &post_buf);
+  if (!post_len || !post_buf)
+    SAYF("Empty return in test post handler for buf=\"hello\\0\".");
 
   OKF("Postprocessor installed successfully.");
 
@@ -304,7 +318,7 @@ static void shuffle_ptrs(afl_state_t *afl, void **ptrs, u32 cnt) {
 
   for (i = 0; i < cnt - 2; ++i) {
 
-    u32   j = i + UR(afl, cnt - i);
+    u32   j = i + rand_below(afl, cnt - i);
     void *s = ptrs[i];
     ptrs[i] = ptrs[j];
     ptrs[j] = s;
@@ -323,6 +337,8 @@ void read_testcases(afl_state_t *afl) {
   u32             i;
   u8 *            fn1;
 
+  u8 val_buf[2][STRINGIFY_VAL_SIZE_MAX];
+
   /* Auto-detect non-in-place resumption attempts. */
 
   fn1 = alloc_printf("%s/queue", afl->in_dir);
@@ -367,9 +383,10 @@ void read_testcases(afl_state_t *afl) {
 
     struct stat st;
 
+    u8 dfn[PATH_MAX];
+    snprintf(dfn, PATH_MAX, "%s/.state/deterministic_done/%s", afl->in_dir,
+             nl[i]->d_name);
     u8 *fn2 = alloc_printf("%s/%s", afl->in_dir, nl[i]->d_name);
-    u8 *dfn = alloc_printf("%s/.state/deterministic_done/%s", afl->in_dir,
-                           nl[i]->d_name);
 
     u8 passed_det = 0;
 
@@ -383,14 +400,14 @@ void read_testcases(afl_state_t *afl) {
     if (!S_ISREG(st.st_mode) || !st.st_size || strstr(fn2, "/README.txt")) {
 
       ck_free(fn2);
-      ck_free(dfn);
       continue;
 
     }
 
     if (st.st_size > MAX_FILE)
-      FATAL("Test case '%s' is too big (%s, limit is %s)", fn2, DMS(st.st_size),
-            DMS(MAX_FILE));
+      FATAL("Test case '%s' is too big (%s, limit is %s)", fn2,
+            stringify_mem_size(val_buf[0], sizeof(val_buf[0]), st.st_size),
+            stringify_mem_size(val_buf[1], sizeof(val_buf[1]), MAX_FILE));
 
     /* Check for metadata that indicates that deterministic fuzzing
        is complete for this entry. We don't want to repeat deterministic
@@ -398,7 +415,6 @@ void read_testcases(afl_state_t *afl) {
        and probably very time-consuming. */
 
     if (!access(dfn, F_OK)) passed_det = 1;
-    ck_free(dfn);
 
     add_to_queue(afl, fn2, st.st_size, passed_det);
 
@@ -432,11 +448,13 @@ static void check_map_coverage(afl_state_t *afl) {
 
   u32 i;
 
-  if (count_bytes(afl->fsrv.trace_bits) < 100) return;
+  if (count_bytes(afl, afl->fsrv.trace_bits) < 100) return;
 
   for (i = (1 << (MAP_SIZE_POW2 - 1)); i < MAP_SIZE; ++i)
     if (afl->fsrv.trace_bits[i]) return;
 
+  if (afl->fsrv.map_size != MAP_SIZE) return;
+
   WARNF("Recompile binary with newer version of afl to improve coverage!");
 
 }
@@ -553,6 +571,8 @@ void perform_dry_run(afl_state_t *afl) {
 
         if (afl->fsrv.mem_limit) {
 
+          u8 val_buf[STRINGIFY_VAL_SIZE_MAX];
+
           SAYF("\n" cLRD "[-] " cRST
                "Oops, the program crashed with one of the test cases provided. "
                "There are\n"
@@ -593,8 +613,9 @@ void perform_dry_run(afl_state_t *afl) {
                "other options\n"
                "      fail, poke <afl-users@googlegroups.com> for "
                "troubleshooting tips.\n",
-               DMS(afl->fsrv.mem_limit << 20), afl->fsrv.mem_limit - 1,
-               doc_path);
+               stringify_mem_size(val_buf, sizeof(val_buf),
+                                  afl->fsrv.mem_limit << 20),
+               afl->fsrv.mem_limit - 1, doc_path);
 
         } else {
 
@@ -797,7 +818,7 @@ void pivot_inputs(afl_state_t *afl) {
 
 u32 find_start_position(afl_state_t *afl) {
 
-  static u8 tmp[4096];                   /* Ought to be enough for anybody. */
+  u8 tmp[4096] = {0};                    /* Ought to be enough for anybody. */
 
   u8 *fn, *off;
   s32 fd, i;
@@ -834,7 +855,7 @@ u32 find_start_position(afl_state_t *afl) {
 
 void find_timeout(afl_state_t *afl) {
 
-  static u8 tmp[4096];                   /* Ought to be enough for anybody. */
+  u8 tmp[4096] = {0};                    /* Ought to be enough for anybody. */
 
   u8 *fn, *off;
   s32 fd, i;
@@ -902,7 +923,7 @@ static u8 delete_files(u8 *path, u8 *prefix) {
 
 double get_runnable_processes(void) {
 
-  static double res;
+  double res = 0;
 
 #if defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || \
     defined(__NetBSD__) || defined(__DragonFly__)
@@ -1049,7 +1070,7 @@ static void handle_existing_out_dir(afl_state_t *afl) {
 
     /* Let's see how much work is at stake. */
 
-    if (!afl->in_place_resume &&
+    if (!afl->in_place_resume && last_update > start_time2 &&
         last_update - start_time2 > OUTPUT_GRACE * 60) {
 
       SAYF("\n" cLRD "[-] " cRST
@@ -1787,7 +1808,7 @@ void fix_up_sync(afl_state_t *afl) {
 
 static void handle_resize(int sig) {
 
-  LIST_FOREACH(&afl_states, afl_state_t, { el->clear_screen; });
+  LIST_FOREACH(&afl_states, afl_state_t, { el->clear_screen = 1; });
 
 }
 
@@ -1832,8 +1853,6 @@ static void handle_stop_sig(int sig) {
 
     if (el->fsrv.child_pid > 0) kill(el->fsrv.child_pid, SIGKILL);
     if (el->fsrv.fsrv_pid > 0) kill(el->fsrv.fsrv_pid, SIGKILL);
-    if (el->cmplog_child_pid > 0) kill(el->cmplog_child_pid, SIGKILL);
-    if (el->cmplog_fsrv_pid > 0) kill(el->cmplog_fsrv_pid, SIGKILL);
 
   });
 
@@ -1967,7 +1986,7 @@ void check_binary(afl_state_t *afl, u8 *fname) {
 
 #endif                                                       /* ^!__APPLE__ */
 
-  if (!afl->qemu_mode && !afl->unicorn_mode && !afl->dumb_mode &&
+  if (!afl->fsrv.qemu_mode && !afl->unicorn_mode && !afl->dumb_mode &&
       !memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) {
 
     SAYF("\n" cLRD "[-] " cRST
@@ -1994,7 +2013,7 @@ void check_binary(afl_state_t *afl, u8 *fname) {
 
   }
 
-  if ((afl->qemu_mode) &&
+  if ((afl->fsrv.qemu_mode) &&
       memmem(f_data, f_len, SHM_ENV_VAR, strlen(SHM_ENV_VAR) + 1)) {
 
     SAYF("\n" cLRD "[-] " cRST
@@ -2008,7 +2027,7 @@ void check_binary(afl_state_t *afl, u8 *fname) {
 
   }
 
-  if (memmem(f_data, f_len, "libasan.so", 10) ||
+  if (memmem(f_data, f_len, "__asan_init", 11) ||
       memmem(f_data, f_len, "__msan_init", 11))
     afl->fsrv.uses_asan = 1;
 
@@ -2125,11 +2144,6 @@ void setup_signal_handlers(void) {
   sigaction(SIGINT, &sa, NULL);
   sigaction(SIGTERM, &sa, NULL);
 
-  /* Exec timeout notifications. */
-
-  sa.sa_handler = handle_timeout;
-  sigaction(SIGALRM, &sa, NULL);
-
   /* Window resize */
 
   sa.sa_handler = handle_resize;
diff --git a/src/afl-fuzz-misc.c b/src/afl-fuzz-misc.c
deleted file mode 100644
index 29e8bd82..00000000
--- a/src/afl-fuzz-misc.c
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
-   american fuzzy lop++ - misc stuffs from Mordor
-   ----------------------------------------------
-
-   Originally written by Michal Zalewski
-
-   Now maintained by Marc Heuse <mh@mh-sec.de>,
-                        Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
-                        Andrea Fioraldi <andreafioraldi@gmail.com>
-
-   Copyright 2016, 2017 Google Inc. All rights reserved.
-   Copyright 2019-2020 AFLplusplus Project. All rights reserved.
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at:
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   This is the real deal: the program takes an instrumented binary and
-   attempts a variety of basic fuzzing tricks, paying close attention to
-   how they affect the execution path.
-
- */
-
-#include "afl-fuzz.h"
-
-/* Describe integer. Uses 12 cyclic static buffers for return values. The value
-   returned should be five characters or less for all the integers we reasonably
-   expect to see. */
-
-u8 *DI(u64 val) {
-
-  static u8 tmp[12][16];
-  static u8 cur;
-
-  cur = (cur + 1) % 12;
-
-#define CHK_FORMAT(_divisor, _limit_mult, _fmt, _cast)    \
-  do {                                                    \
-                                                          \
-    if (val < (_divisor) * (_limit_mult)) {               \
-                                                          \
-      sprintf(tmp[cur], _fmt, ((_cast)val) / (_divisor)); \
-      return tmp[cur];                                    \
-                                                          \
-    }                                                     \
-                                                          \
-  } while (0)
-
-  /* 0-9999 */
-  CHK_FORMAT(1, 10000, "%llu", u64);
-
-  /* 10.0k - 99.9k */
-  CHK_FORMAT(1000, 99.95, "%0.01fk", double);
-
-  /* 100k - 999k */
-  CHK_FORMAT(1000, 1000, "%lluk", u64);
-
-  /* 1.00M - 9.99M */
-  CHK_FORMAT(1000 * 1000, 9.995, "%0.02fM", double);
-
-  /* 10.0M - 99.9M */
-  CHK_FORMAT(1000 * 1000, 99.95, "%0.01fM", double);
-
-  /* 100M - 999M */
-  CHK_FORMAT(1000 * 1000, 1000, "%lluM", u64);
-
-  /* 1.00G - 9.99G */
-  CHK_FORMAT(1000LL * 1000 * 1000, 9.995, "%0.02fG", double);
-
-  /* 10.0G - 99.9G */
-  CHK_FORMAT(1000LL * 1000 * 1000, 99.95, "%0.01fG", double);
-
-  /* 100G - 999G */
-  CHK_FORMAT(1000LL * 1000 * 1000, 1000, "%lluG", u64);
-
-  /* 1.00T - 9.99G */
-  CHK_FORMAT(1000LL * 1000 * 1000 * 1000, 9.995, "%0.02fT", double);
-
-  /* 10.0T - 99.9T */
-  CHK_FORMAT(1000LL * 1000 * 1000 * 1000, 99.95, "%0.01fT", double);
-
-  /* 100T+ */
-  strcpy(tmp[cur], "infty");
-  return tmp[cur];
-
-}
-
-/* Describe float. Similar to the above, except with a single
-   static buffer. */
-
-u8 *DF(double val) {
-
-  static u8 tmp[16];
-
-  if (val < 99.995) {
-
-    sprintf(tmp, "%0.02f", val);
-    return tmp;
-
-  }
-
-  if (val < 999.95) {
-
-    sprintf(tmp, "%0.01f", val);
-    return tmp;
-
-  }
-
-  return DI((u64)val);
-
-}
-
-/* Describe integer as memory size. */
-
-u8 *DMS(u64 val) {
-
-  static u8 tmp[12][16];
-  static u8 cur;
-
-  cur = (cur + 1) % 12;
-
-  /* 0-9999 */
-  CHK_FORMAT(1, 10000, "%llu B", u64);
-
-  /* 10.0k - 99.9k */
-  CHK_FORMAT(1024, 99.95, "%0.01f kB", double);
-
-  /* 100k - 999k */
-  CHK_FORMAT(1024, 1000, "%llu kB", u64);
-
-  /* 1.00M - 9.99M */
-  CHK_FORMAT(1024 * 1024, 9.995, "%0.02f MB", double);
-
-  /* 10.0M - 99.9M */
-  CHK_FORMAT(1024 * 1024, 99.95, "%0.01f MB", double);
-
-  /* 100M - 999M */
-  CHK_FORMAT(1024 * 1024, 1000, "%llu MB", u64);
-
-  /* 1.00G - 9.99G */
-  CHK_FORMAT(1024LL * 1024 * 1024, 9.995, "%0.02f GB", double);
-
-  /* 10.0G - 99.9G */
-  CHK_FORMAT(1024LL * 1024 * 1024, 99.95, "%0.01f GB", double);
-
-  /* 100G - 999G */
-  CHK_FORMAT(1024LL * 1024 * 1024, 1000, "%llu GB", u64);
-
-  /* 1.00T - 9.99G */
-  CHK_FORMAT(1024LL * 1024 * 1024 * 1024, 9.995, "%0.02f TB", double);
-
-  /* 10.0T - 99.9T */
-  CHK_FORMAT(1024LL * 1024 * 1024 * 1024, 99.95, "%0.01f TB", double);
-
-#undef CHK_FORMAT
-
-  /* 100T+ */
-  strcpy(tmp[cur], "infty");
-  return tmp[cur];
-
-}
-
-/* Describe time delta. Returns one static buffer, 34 chars of less. */
-
-u8 *DTD(u64 cur_ms, u64 event_ms) {
-
-  static u8 tmp[64];
-  u64       delta;
-  s32       t_d, t_h, t_m, t_s;
-
-  if (!event_ms) return "none seen yet";
-
-  delta = cur_ms - event_ms;
-
-  t_d = delta / 1000 / 60 / 60 / 24;
-  t_h = (delta / 1000 / 60 / 60) % 24;
-  t_m = (delta / 1000 / 60) % 60;
-  t_s = (delta / 1000) % 60;
-
-  sprintf(tmp, "%s days, %d hrs, %d min, %d sec", DI(t_d), t_h, t_m, t_s);
-  return tmp;
-
-}
-
diff --git a/src/afl-fuzz-mutators.c b/src/afl-fuzz-mutators.c
index 9071404d..efb1c117 100644
--- a/src/afl-fuzz-mutators.c
+++ b/src/afl-fuzz-mutators.c
@@ -7,6 +7,7 @@
    Now maintained by  Marc Heuse <mh@mh-sec.de>,
                         Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
                         Andrea Fioraldi <andreafioraldi@gmail.com>
+                        Dominik Maier <mail@dmnk.co>
 
    Copyright 2016, 2017 Google Inc. All rights reserved.
    Copyright 2019-2020 AFLplusplus Project. All rights reserved.
@@ -27,13 +28,13 @@
 
 void load_custom_mutator(afl_state_t *, const char *);
 #ifdef USE_PYTHON
-void load_custom_mutator_py(afl_state_t *, const char *);
+void load_custom_mutator_py(afl_state_t *, char *);
 #endif
 
 void setup_custom_mutator(afl_state_t *afl) {
 
   /* Try mutator library first */
-  u8 *fn = getenv("AFL_CUSTOM_MUTATOR_LIBRARY");
+  u8 *fn = afl->afl_env.afl_custom_mutator_library;
 
   if (fn) {
 
@@ -51,7 +52,7 @@ void setup_custom_mutator(afl_state_t *afl) {
 
   /* Try Python module */
 #ifdef USE_PYTHON
-  u8 *module_name = getenv("AFL_PYTHON_MODULE");
+  u8 *module_name = afl->afl_env.afl_python_module;
 
   if (module_name) {
 
@@ -59,17 +60,14 @@ void setup_custom_mutator(afl_state_t *afl) {
       FATAL(
           "MOpt and Python mutator are mutually exclusive. We accept pull "
           "requests that integrates MOpt with the optional mutators "
-          "(custom/radamsa/redquenn/...).");
-
-    if (init_py_module(afl, module_name))
-      FATAL("Failed to initialize Python module");
+          "(custom/radamsa/redqueen/...).");
 
     load_custom_mutator_py(afl, module_name);
 
   }
 
 #else
-  if (getenv("AFL_PYTHON_MODULE"))
+  if (afl->afl_env.afl_python_module)
     FATAL("Your AFL binary was built without Python support");
 #endif
 
@@ -79,18 +77,20 @@ void destroy_custom_mutator(afl_state_t *afl) {
 
   if (afl->mutator) {
 
-    if (afl->mutator->dh)
-      dlclose(afl->mutator->dh);
-    else {
+    afl->mutator->afl_custom_deinit(afl->mutator->data);
 
-      /* Python mutator */
-#ifdef USE_PYTHON
-      finalize_py_module(afl);
-#endif
+    if (afl->mutator->dh) dlclose(afl->mutator->dh);
+
+    if (afl->mutator->pre_save_buf) {
+
+      ck_free(afl->mutator->pre_save_buf);
+      afl->mutator->pre_save_buf = NULL;
+      afl->mutator->pre_save_size = 0;
 
     }
 
     ck_free(afl->mutator);
+    afl->mutator = NULL;
 
   }
 
@@ -100,6 +100,8 @@ void load_custom_mutator(afl_state_t *afl, const char *fn) {
 
   void *dh;
   afl->mutator = ck_alloc(sizeof(struct custom_mutator));
+  afl->mutator->pre_save_buf = NULL;
+  afl->mutator->pre_save_size = 0;
 
   afl->mutator->name = fn;
   ACTF("Loading custom mutator library from '%s'...", fn);
@@ -109,10 +111,15 @@ void load_custom_mutator(afl_state_t *afl, const char *fn) {
   afl->mutator->dh = dh;
 
   /* Mutator */
-  /* "afl_custom_init", optional for backward compatibility */
+  /* "afl_custom_init", required */
   afl->mutator->afl_custom_init = dlsym(dh, "afl_custom_init");
   if (!afl->mutator->afl_custom_init)
-    WARNF("Symbol 'afl_custom_init' not found.");
+    FATAL("Symbol 'afl_custom_init' not found.");
+
+  /* "afl_custom_deinit", required */
+  afl->mutator->afl_custom_deinit = dlsym(dh, "afl_custom_deinit");
+  if (!afl->mutator->afl_custom_deinit)
+    FATAL("Symbol 'afl_custom_deinit' not found.");
 
   /* "afl_custom_fuzz" or "afl_custom_mutator", required */
   afl->mutator->afl_custom_fuzz = dlsym(dh, "afl_custom_fuzz");
@@ -186,59 +193,58 @@ void load_custom_mutator(afl_state_t *afl, const char *fn) {
 
   /* Initialize the custom mutator */
   if (afl->mutator->afl_custom_init)
-    afl->mutator->afl_custom_init(afl, UR(afl, 0xFFFFFFFF));
+    afl->mutator->data =
+        afl->mutator->afl_custom_init(afl, rand_below(afl, 0xFFFFFFFF));
 
 }
 
 u8 trim_case_custom(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
 
-  static u8 tmp[64];
-  static u8 clean_trace[MAP_SIZE];
-
   u8  needs_write = 0, fault = 0;
   u32 trim_exec = 0;
   u32 orig_len = q->len;
 
-  afl->stage_name = tmp;
+  u8 val_buf[STRINGIFY_VAL_SIZE_MAX];
+
+  afl->stage_name = afl->stage_name_buf;
   afl->bytes_trim_in += q->len;
 
   /* Initialize trimming in the custom mutator */
   afl->stage_cur = 0;
-  afl->stage_max = afl->mutator->afl_custom_init_trim(afl, in_buf, q->len);
-
+  afl->stage_max =
+      afl->mutator->afl_custom_init_trim(afl->mutator->data, in_buf, q->len);
+  if (unlikely(afl->stage_max) < 0)
+    FATAL("custom_init_trim error ret: %d", afl->stage_max);
   if (afl->not_on_tty && afl->debug)
     SAYF("[Custom Trimming] START: Max %d iterations, %u bytes", afl->stage_max,
          q->len);
 
   while (afl->stage_cur < afl->stage_max) {
 
-    sprintf(tmp, "ptrim %s", DI(trim_exec));
+    u8 *retbuf = NULL;
 
-    u32 cksum;
+    sprintf(afl->stage_name_buf, "ptrim %s",
+            u_stringify_int(val_buf, trim_exec));
 
-    u8 *   retbuf = NULL;
-    size_t retlen = 0;
+    u32 cksum;
 
-    afl->mutator->afl_custom_trim(afl, &retbuf, &retlen);
+    size_t retlen = afl->mutator->afl_custom_trim(afl->mutator->data, &retbuf);
 
-    if (retlen > orig_len)
+    if (unlikely(!retbuf))
+      FATAL("custom_trim failed (ret %zd)", retlen);
+    else if (unlikely(retlen > orig_len))
       FATAL(
           "Trimmed data returned by custom mutator is larger than original "
           "data");
 
     write_to_testcase(afl, retbuf, retlen);
 
-    fault = run_target(afl, afl->fsrv.exec_tmout);
+    fault = run_target(afl, &afl->fsrv, afl->fsrv.exec_tmout);
     ++afl->trim_execs;
 
-    if (afl->stop_soon || fault == FAULT_ERROR) {
-
-      ck_free(retbuf);
-      goto abort_trimming;
-
-    }
+    if (afl->stop_soon || fault == FAULT_ERROR) { goto abort_trimming; }
 
-    cksum = hash32(afl->fsrv.trace_bits, MAP_SIZE, HASH_CONST);
+    cksum = hash32(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
 
     if (cksum == q->exec_cksum) {
 
@@ -251,12 +257,14 @@ u8 trim_case_custom(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
       if (!needs_write) {
 
         needs_write = 1;
-        memcpy(clean_trace, afl->fsrv.trace_bits, MAP_SIZE);
+        memcpy(afl->clean_trace_custom, afl->fsrv.trace_bits,
+               afl->fsrv.map_size);
 
       }
 
       /* Tell the custom mutator that the trimming was successful */
-      afl->stage_cur = afl->mutator->afl_custom_post_trim(afl, 1);
+      afl->stage_cur =
+          afl->mutator->afl_custom_post_trim(afl->mutator->data, 1);
 
       if (afl->not_on_tty && afl->debug)
         SAYF("[Custom Trimming] SUCCESS: %d/%d iterations (now at %u bytes)",
@@ -265,15 +273,16 @@ u8 trim_case_custom(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
     } else {
 
       /* Tell the custom mutator that the trimming was unsuccessful */
-      afl->stage_cur = afl->mutator->afl_custom_post_trim(afl, 0);
+      afl->stage_cur =
+          afl->mutator->afl_custom_post_trim(afl->mutator->data, 0);
+      if (unlikely(afl->stage_cur < 0))
+        FATAL("Error ret in custom_post_trim: %d", afl->stage_cur);
       if (afl->not_on_tty && afl->debug)
         SAYF("[Custom Trimming] FAILURE: %d/%d iterations", afl->stage_cur,
              afl->stage_max);
 
     }
 
-    ck_free(retbuf);
-
     /* Since this can be slow, update the screen every now and then. */
 
     if (!(trim_exec++ % afl->stats_update_freq)) show_stats(afl);
@@ -299,7 +308,7 @@ u8 trim_case_custom(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
     ck_write(fd, in_buf, q->len, q->fname);
     close(fd);
 
-    memcpy(afl->fsrv.trace_bits, clean_trace, MAP_SIZE);
+    memcpy(afl->fsrv.trace_bits, afl->clean_trace_custom, afl->fsrv.map_size);
     update_bitmap_score(afl, q);
 
   }
@@ -311,53 +320,3 @@ abort_trimming:
 
 }
 
-#ifdef USE_PYTHON
-void load_custom_mutator_py(afl_state_t *afl, const char *module_name) {
-
-  PyObject **py_functions = afl->py_functions;
-
-  afl->mutator = ck_alloc(sizeof(struct custom_mutator));
-
-  afl->mutator->name = module_name;
-  ACTF("Loading Python mutator library from '%s'...", module_name);
-
-  if (py_functions[PY_FUNC_INIT]) afl->mutator->afl_custom_init = init_py;
-
-  /* "afl_custom_fuzz" should not be NULL, but the interface of Python mutator
-     is quite different from the custom mutator. */
-  afl->mutator->afl_custom_fuzz = fuzz_py;
-
-  if (py_functions[PY_FUNC_PRE_SAVE])
-    afl->mutator->afl_custom_pre_save = pre_save_py;
-
-  if (py_functions[PY_FUNC_INIT_TRIM])
-    afl->mutator->afl_custom_init_trim = init_trim_py;
-
-  if (py_functions[PY_FUNC_POST_TRIM])
-    afl->mutator->afl_custom_post_trim = post_trim_py;
-
-  if (py_functions[PY_FUNC_TRIM]) afl->mutator->afl_custom_trim = trim_py;
-
-  if (py_functions[PY_FUNC_HAVOC_MUTATION])
-    afl->mutator->afl_custom_havoc_mutation = havoc_mutation_py;
-
-  if (py_functions[PY_FUNC_HAVOC_MUTATION_PROBABILITY])
-    afl->mutator->afl_custom_havoc_mutation_probability =
-        havoc_mutation_probability_py;
-
-  if (py_functions[PY_FUNC_QUEUE_GET])
-    afl->mutator->afl_custom_queue_get = queue_get_py;
-
-  if (py_functions[PY_FUNC_QUEUE_NEW_ENTRY])
-    afl->mutator->afl_custom_queue_new_entry = queue_new_entry_py;
-
-  OKF("Python mutator '%s' installed successfully.", module_name);
-
-  /* Initialize the custom mutator */
-  if (afl->mutator->afl_custom_init)
-    afl->mutator->afl_custom_init(afl, UR(afl, 0xFFFFFFFF));
-
-}
-
-#endif
-
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index c4d49ec1..4a039a1d 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -31,7 +31,7 @@ int select_algorithm(afl_state_t *afl) {
 
   int i_puppet, j_puppet;
 
-  double sele = ((double)(UR(afl, 10000)) * 0.0001);
+  double sele = ((double)(rand_below(afl, 10000)) * 0.0001);
   j_puppet = 0;
   for (i_puppet = 0; i_puppet < operator_num; ++i_puppet) {
 
@@ -67,9 +67,9 @@ static u32 choose_block_len(afl_state_t *afl, u32 limit) {
   u32 min_value, max_value;
   u32 rlim = MIN(afl->queue_cycle, 3);
 
-  if (!afl->run_over10m) rlim = 1;
+  if (unlikely(!afl->run_over10m)) rlim = 1;
 
-  switch (UR(afl, rlim)) {
+  switch (rand_below(afl, rlim)) {
 
     case 0:
       min_value = 1;
@@ -83,7 +83,7 @@ static u32 choose_block_len(afl_state_t *afl, u32 limit) {
 
     default:
 
-      if (UR(afl, 10)) {
+      if (rand_below(afl, 10)) {
 
         min_value = HAVOC_BLK_MEDIUM;
         max_value = HAVOC_BLK_LARGE;
@@ -99,7 +99,7 @@ static u32 choose_block_len(afl_state_t *afl, u32 limit) {
 
   if (min_value >= limit) min_value = 1;
 
-  return min_value + UR(afl, MIN(max_value, limit) - min_value + 1);
+  return min_value + rand_below(afl, MIN(max_value, limit) - min_value + 1);
 
 }
 
@@ -347,6 +347,9 @@ u8 fuzz_one_original(afl_state_t *afl) {
   u8  a_collect[MAX_AUTO_EXTRA];
   u32 a_len = 0;
 
+/* Not pretty, but saves a lot of writing */
+#define BUF_PARAMS(name) (void **)&afl->name##_buf, &afl->name##_size
+
 #ifdef IGNORE_FINDS
 
   /* In IGNORE_FINDS mode, skip any entries that weren't in the
@@ -356,16 +359,17 @@ u8 fuzz_one_original(afl_state_t *afl) {
 
 #else
 
-  if (afl->mutator && afl->mutator->afl_custom_queue_get) {
+  if (unlikely(afl->mutator) && unlikely(afl->mutator->afl_custom_queue_get)) {
 
     /* The custom mutator will decide to skip this test case or not. */
 
-    if (!afl->mutator->afl_custom_queue_get(afl, afl->queue_cur->fname))
+    if (!afl->mutator->afl_custom_queue_get(afl->mutator->data,
+                                            afl->queue_cur->fname))
       return 1;
 
   }
 
-  if (afl->pending_favored) {
+  if (likely(afl->pending_favored)) {
 
     /* If we have any favored, non-fuzzed new arrivals in the queue,
        possibly skip to them at the expense of already-fuzzed or non-favored
@@ -373,7 +377,7 @@ u8 fuzz_one_original(afl_state_t *afl) {
 
     if (((afl->queue_cur->was_fuzzed > 0 || afl->queue_cur->fuzz_level > 0) ||
          !afl->queue_cur->favored) &&
-        UR(afl, 100) < SKIP_TO_NEW_PROB)
+        rand_below(afl, 100) < SKIP_TO_NEW_PROB)
       return 1;
 
   } else if (!afl->dumb_mode && !afl->queue_cur->favored &&
@@ -387,11 +391,11 @@ u8 fuzz_one_original(afl_state_t *afl) {
     if (afl->queue_cycle > 1 &&
         (afl->queue_cur->fuzz_level == 0 || afl->queue_cur->was_fuzzed)) {
 
-      if (UR(afl, 100) < SKIP_NFAV_NEW_PROB) return 1;
+      if (rand_below(afl, 100) < SKIP_NFAV_NEW_PROB) return 1;
 
     } else {
 
-      if (UR(afl, 100) < SKIP_NFAV_OLD_PROB) return 1;
+      if (rand_below(afl, 100) < SKIP_NFAV_OLD_PROB) return 1;
 
     }
 
@@ -399,7 +403,7 @@ u8 fuzz_one_original(afl_state_t *afl) {
 
 #endif                                                     /* ^IGNORE_FINDS */
 
-  if (afl->not_on_tty) {
+  if (unlikely(afl->not_on_tty)) {
 
     ACTF("Fuzzing test case #%u (%u total, %llu uniq crashes found)...",
          afl->current_entry, afl->queued_paths, afl->unique_crashes);
@@ -411,13 +415,13 @@ u8 fuzz_one_original(afl_state_t *afl) {
 
   fd = open(afl->queue_cur->fname, O_RDONLY);
 
-  if (fd < 0) PFATAL("Unable to open '%s'", afl->queue_cur->fname);
+  if (unlikely(fd < 0)) PFATAL("Unable to open '%s'", afl->queue_cur->fname);
 
   len = afl->queue_cur->len;
 
   orig_in = in_buf = mmap(0, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
 
-  if (orig_in == MAP_FAILED)
+  if (unlikely(orig_in == MAP_FAILED))
     PFATAL("Unable to mmap '%s' with len %d", afl->queue_cur->fname, len);
 
   close(fd);
@@ -426,7 +430,7 @@ u8 fuzz_one_original(afl_state_t *afl) {
      single byte anyway, so it wouldn't give us any performance or memory usage
      benefits. */
 
-  out_buf = ck_alloc_nozero(len);
+  out_buf = ck_maybe_grow(BUF_PARAMS(out), len);
 
   afl->subseq_tmouts = 0;
 
@@ -436,7 +440,7 @@ u8 fuzz_one_original(afl_state_t *afl) {
    * CALIBRATION (only if failed earlier on) *
    *******************************************/
 
-  if (afl->queue_cur->cal_failed) {
+  if (unlikely(afl->queue_cur->cal_failed)) {
 
     u8 res = FAULT_TMOUT;
 
@@ -445,11 +449,12 @@ u8 fuzz_one_original(afl_state_t *afl) {
       res =
           calibrate_case(afl, afl->queue_cur, in_buf, afl->queue_cycle - 1, 0);
 
-      if (res == FAULT_ERROR) FATAL("Unable to execute target application");
+      if (unlikely(res == FAULT_ERROR))
+        FATAL("Unable to execute target application");
 
     }
 
-    if (afl->stop_soon || res != afl->crash_mode) {
+    if (unlikely(afl->stop_soon) || res != afl->crash_mode) {
 
       ++afl->cur_skipped_paths;
       goto abandon_entry;
@@ -466,9 +471,10 @@ u8 fuzz_one_original(afl_state_t *afl) {
 
     u8 res = trim_case(afl, afl->queue_cur, in_buf);
 
-    if (res == FAULT_ERROR) FATAL("Unable to execute target application");
+    if (unlikely(res == FAULT_ERROR))
+      FATAL("Unable to execute target application");
 
-    if (afl->stop_soon) {
+    if (unlikely(afl->stop_soon)) {
 
       ++afl->cur_skipped_paths;
       goto abandon_entry;
@@ -491,9 +497,9 @@ u8 fuzz_one_original(afl_state_t *afl) {
 
   orig_perf = perf_score = calculate_score(afl, afl->queue_cur);
 
-  if (perf_score == 0) goto abandon_entry;
+  if (unlikely(perf_score == 0)) goto abandon_entry;
 
-  if (afl->use_radamsa > 1) goto radamsa_stage;
+  if (unlikely(afl->use_radamsa > 1)) goto radamsa_stage;
 
   if (afl->shm.cmplog_mode && !afl->queue_cur->fully_colorized) {
 
@@ -595,7 +601,7 @@ u8 fuzz_one_original(afl_state_t *afl) {
 
     if (!afl->dumb_mode && (afl->stage_cur & 7) == 7) {
 
-      u32 cksum = hash32(afl->fsrv.trace_bits, MAP_SIZE, HASH_CONST);
+      u32 cksum = hash32(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
 
       if (afl->stage_cur == afl->stage_max - 1 && cksum == prev_cksum) {
 
@@ -607,7 +613,7 @@ u8 fuzz_one_original(afl_state_t *afl) {
         ++a_len;
 
         if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA)
-          maybe_add_auto(afl, a_collect, a_len);
+          maybe_add_auto((u8 *)afl, a_collect, a_len);
 
       } else if (cksum != prev_cksum) {
 
@@ -615,7 +621,7 @@ u8 fuzz_one_original(afl_state_t *afl) {
            worthwhile queued up, and collect that if the answer is yes. */
 
         if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA)
-          maybe_add_auto(afl, a_collect, a_len);
+          maybe_add_auto((u8 *)afl, a_collect, a_len);
 
         a_len = 0;
         prev_cksum = cksum;
@@ -716,7 +722,7 @@ u8 fuzz_one_original(afl_state_t *afl) {
   /* Initialize effector map for the next step (see comments below). Always
      flag first and last byte as doing something. */
 
-  eff_map = ck_alloc(EFF_ALEN(len));
+  eff_map = ck_maybe_grow(BUF_PARAMS(eff), EFF_ALEN(len));
   eff_map[0] = 1;
 
   if (EFF_APOS(len - 1) != 0) {
@@ -755,7 +761,7 @@ u8 fuzz_one_original(afl_state_t *afl) {
          without wasting time on checksums. */
 
       if (!afl->dumb_mode && len >= EFF_MIN_LEN)
-        cksum = hash32(afl->fsrv.trace_bits, MAP_SIZE, HASH_CONST);
+        cksum = hash32(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
       else
         cksum = ~afl->queue_cur->exec_cksum;
 
@@ -1401,7 +1407,7 @@ skip_interest:
          map. */
 
       if ((afl->extras_cnt > MAX_DET_EXTRAS &&
-           UR(afl, afl->extras_cnt) >= MAX_DET_EXTRAS) ||
+           rand_below(afl, afl->extras_cnt) >= MAX_DET_EXTRAS) ||
           afl->extras[j].len > len - i ||
           !memcmp(afl->extras[j].data, out_buf + i, afl->extras[j].len) ||
           !memchr(eff_map + EFF_APOS(i), 1,
@@ -1440,7 +1446,7 @@ skip_interest:
 
   orig_hit_cnt = new_hit_cnt;
 
-  ex_tmp = ck_alloc(len + MAX_DICT_FILE);
+  ex_tmp = ck_maybe_grow(BUF_PARAMS(ex), len + MAX_DICT_FILE);
 
   for (i = 0; i <= len; ++i) {
 
@@ -1463,7 +1469,6 @@ skip_interest:
 
       if (common_fuzz_stuff(afl, ex_tmp, len + afl->extras[j].len)) {
 
-        ck_free(ex_tmp);
         goto abandon_entry;
 
       }
@@ -1477,8 +1482,6 @@ skip_interest:
 
   }
 
-  ck_free(ex_tmp);
-
   new_hit_cnt = afl->queued_paths + afl->unique_crashes;
 
   afl->stage_finds[STAGE_EXTRAS_UI] += new_hit_cnt - orig_hit_cnt;
@@ -1549,8 +1552,8 @@ custom_mutator_stage:
    * CUSTOM MUTATORS *
    *******************/
 
-  if (!afl->mutator) goto havoc_stage;
-  if (!afl->mutator->afl_custom_fuzz) goto havoc_stage;
+  if (likely(!afl->mutator)) goto havoc_stage;
+  if (likely(!afl->mutator->afl_custom_fuzz)) goto havoc_stage;
 
   afl->stage_name = "custom mutator";
   afl->stage_short = "custom";
@@ -1573,7 +1576,7 @@ custom_mutator_stage:
     /* Pick a random other queue entry for passing to external API */
     do {
 
-      tid = UR(afl, afl->queued_paths);
+      tid = rand_below(afl, afl->queued_paths);
 
     } while (tid == afl->current_entry && afl->queued_paths > 1);
 
@@ -1603,19 +1606,24 @@ custom_mutator_stage:
 
     /* Read the additional testcase into a new buffer. */
     fd = open(target->fname, O_RDONLY);
-    if (fd < 0) PFATAL("Unable to open '%s'", target->fname);
-    new_buf = ck_alloc_nozero(target->len);
+    if (unlikely(fd < 0)) PFATAL("Unable to open '%s'", target->fname);
+
+    new_buf = ck_maybe_grow(BUF_PARAMS(out_scratch), target->len);
     ck_read(fd, new_buf, target->len, target->fname);
     close(fd);
 
+    u8 *mutated_buf = NULL;
+
     size_t mutated_size = afl->mutator->afl_custom_fuzz(
-        afl, &out_buf, len, new_buf, target->len, max_seed_size);
+        afl->mutator->data, out_buf, len, &mutated_buf, new_buf, target->len,
+        max_seed_size);
 
-    ck_free(new_buf);
+    if (unlikely(!mutated_buf))
+      FATAL("Error in custom_fuzz. Size returned: %zd", mutated_size);
 
     if (mutated_size > 0) {
 
-      if (common_fuzz_stuff(afl, out_buf, (u32)mutated_size)) {
+      if (common_fuzz_stuff(afl, mutated_buf, (u32)mutated_size)) {
 
         goto abandon_entry;
 
@@ -1639,7 +1647,8 @@ custom_mutator_stage:
 
     }
 
-    if (mutated_size < len) out_buf = ck_realloc(out_buf, len);
+    /* `(afl->)out_buf` may have been changed by the call to custom_fuzz */
+    /* TODO: Only do this when `mutated_buf` == `out_buf`? Branch vs Memcpy. */
     memcpy(out_buf, in_buf, len);
 
   }
@@ -1649,7 +1658,7 @@ custom_mutator_stage:
   afl->stage_finds[STAGE_CUSTOM_MUTATOR] += new_hit_cnt - orig_hit_cnt;
   afl->stage_cycles[STAGE_CUSTOM_MUTATOR] += afl->stage_max;
 
-  if (afl->custom_only) {
+  if (likely(afl->custom_only)) {
 
     /* Skip other stages */
     ret_val = 0;
@@ -1679,8 +1688,8 @@ havoc_stage:
 
     perf_score = orig_perf;
 
-    snprintf(afl->stage_name_buf64, 64, "splice %u", splice_cycle);
-    afl->stage_name = afl->stage_name_buf64;
+    snprintf(afl->stage_name_buf, STAGE_BUF_SIZE, "splice %u", splice_cycle);
+    afl->stage_name = afl->stage_name_buf;
     afl->stage_short = "splice";
     afl->stage_max = SPLICE_HAVOC * perf_score / afl->havoc_div / 100;
 
@@ -1700,7 +1709,7 @@ havoc_stage:
   if (stacked_custom && afl->mutator->afl_custom_havoc_mutation_probability) {
 
     stacked_custom_prob =
-        afl->mutator->afl_custom_havoc_mutation_probability(afl);
+        afl->mutator->afl_custom_havoc_mutation_probability(afl->mutator->data);
     if (stacked_custom_prob > 100)
       FATAL(
           "The probability returned by afl_custom_havoc_mutation_propability "
@@ -1713,34 +1722,49 @@ havoc_stage:
 
   for (afl->stage_cur = 0; afl->stage_cur < afl->stage_max; ++afl->stage_cur) {
 
-    u32 use_stacking = 1 << (1 + UR(afl, HAVOC_STACK_POW2));
+    u32 use_stacking = 1 << (1 + rand_below(afl, HAVOC_STACK_POW2));
 
     afl->stage_cur_val = use_stacking;
 
     for (i = 0; i < use_stacking; ++i) {
 
-      if (stacked_custom && UR(afl, 100) < stacked_custom_prob) {
+      if (stacked_custom && rand_below(afl, 100) < stacked_custom_prob) {
+
+        u8 *   custom_havoc_buf = NULL;
+        size_t new_len = afl->mutator->afl_custom_havoc_mutation(
+            afl->mutator->data, out_buf, temp_len, &custom_havoc_buf, MAX_FILE);
+        if (unlikely(!custom_havoc_buf))
+          FATAL("Error in custom_havoc (return %zd)", new_len);
+        if (likely(new_len > 0 && custom_havoc_buf)) {
 
-        temp_len = afl->mutator->afl_custom_havoc_mutation(afl, &out_buf,
-                                                           temp_len, MAX_FILE);
+          temp_len = new_len;
+          if (out_buf != custom_havoc_buf) {
+
+            ck_maybe_grow(BUF_PARAMS(out), temp_len);
+            memcpy(out_buf, custom_havoc_buf, temp_len);
+
+          }
+
+        }
 
       }
 
-      switch (UR(afl, 15 + ((afl->extras_cnt + afl->a_extras_cnt) ? 2 : 0))) {
+      switch (rand_below(
+          afl, 15 + ((afl->extras_cnt + afl->a_extras_cnt) ? 2 : 0))) {
 
         case 0:
 
           /* Flip a single bit somewhere. Spooky! */
 
-          FLIP_BIT(out_buf, UR(afl, temp_len << 3));
+          FLIP_BIT(out_buf, rand_below(afl, temp_len << 3));
           break;
 
         case 1:
 
           /* Set byte to interesting value. */
 
-          out_buf[UR(afl, temp_len)] =
-              interesting_8[UR(afl, sizeof(interesting_8))];
+          out_buf[rand_below(afl, temp_len)] =
+              interesting_8[rand_below(afl, sizeof(interesting_8))];
           break;
 
         case 2:
@@ -1749,15 +1773,15 @@ havoc_stage:
 
           if (temp_len < 2) break;
 
-          if (UR(afl, 2)) {
+          if (rand_below(afl, 2)) {
 
-            *(u16 *)(out_buf + UR(afl, temp_len - 1)) =
-                interesting_16[UR(afl, sizeof(interesting_16) >> 1)];
+            *(u16 *)(out_buf + rand_below(afl, temp_len - 1)) =
+                interesting_16[rand_below(afl, sizeof(interesting_16) >> 1)];
 
           } else {
 
-            *(u16 *)(out_buf + UR(afl, temp_len - 1)) =
-                SWAP16(interesting_16[UR(afl, sizeof(interesting_16) >> 1)]);
+            *(u16 *)(out_buf + rand_below(afl, temp_len - 1)) = SWAP16(
+                interesting_16[rand_below(afl, sizeof(interesting_16) >> 1)]);
 
           }
 
@@ -1769,15 +1793,15 @@ havoc_stage:
 
           if (temp_len < 4) break;
 
-          if (UR(afl, 2)) {
+          if (rand_below(afl, 2)) {
 
-            *(u32 *)(out_buf + UR(afl, temp_len - 3)) =
-                interesting_32[UR(afl, sizeof(interesting_32) >> 2)];
+            *(u32 *)(out_buf + rand_below(afl, temp_len - 3)) =
+                interesting_32[rand_below(afl, sizeof(interesting_32) >> 2)];
 
           } else {
 
-            *(u32 *)(out_buf + UR(afl, temp_len - 3)) =
-                SWAP32(interesting_32[UR(afl, sizeof(interesting_32) >> 2)]);
+            *(u32 *)(out_buf + rand_below(afl, temp_len - 3)) = SWAP32(
+                interesting_32[rand_below(afl, sizeof(interesting_32) >> 2)]);
 
           }
 
@@ -1787,14 +1811,14 @@ havoc_stage:
 
           /* Randomly subtract from byte. */
 
-          out_buf[UR(afl, temp_len)] -= 1 + UR(afl, ARITH_MAX);
+          out_buf[rand_below(afl, temp_len)] -= 1 + rand_below(afl, ARITH_MAX);
           break;
 
         case 5:
 
           /* Randomly add to byte. */
 
-          out_buf[UR(afl, temp_len)] += 1 + UR(afl, ARITH_MAX);
+          out_buf[rand_below(afl, temp_len)] += 1 + rand_below(afl, ARITH_MAX);
           break;
 
         case 6:
@@ -1803,16 +1827,16 @@ havoc_stage:
 
           if (temp_len < 2) break;
 
-          if (UR(afl, 2)) {
+          if (rand_below(afl, 2)) {
 
-            u32 pos = UR(afl, temp_len - 1);
+            u32 pos = rand_below(afl, temp_len - 1);
 
-            *(u16 *)(out_buf + pos) -= 1 + UR(afl, ARITH_MAX);
+            *(u16 *)(out_buf + pos) -= 1 + rand_below(afl, ARITH_MAX);
 
           } else {
 
-            u32 pos = UR(afl, temp_len - 1);
-            u16 num = 1 + UR(afl, ARITH_MAX);
+            u32 pos = rand_below(afl, temp_len - 1);
+            u16 num = 1 + rand_below(afl, ARITH_MAX);
 
             *(u16 *)(out_buf + pos) =
                 SWAP16(SWAP16(*(u16 *)(out_buf + pos)) - num);
@@ -1827,16 +1851,16 @@ havoc_stage:
 
           if (temp_len < 2) break;
 
-          if (UR(afl, 2)) {
+          if (rand_below(afl, 2)) {
 
-            u32 pos = UR(afl, temp_len - 1);
+            u32 pos = rand_below(afl, temp_len - 1);
 
-            *(u16 *)(out_buf + pos) += 1 + UR(afl, ARITH_MAX);
+            *(u16 *)(out_buf + pos) += 1 + rand_below(afl, ARITH_MAX);
 
           } else {
 
-            u32 pos = UR(afl, temp_len - 1);
-            u16 num = 1 + UR(afl, ARITH_MAX);
+            u32 pos = rand_below(afl, temp_len - 1);
+            u16 num = 1 + rand_below(afl, ARITH_MAX);
 
             *(u16 *)(out_buf + pos) =
                 SWAP16(SWAP16(*(u16 *)(out_buf + pos)) + num);
@@ -1851,16 +1875,16 @@ havoc_stage:
 
           if (temp_len < 4) break;
 
-          if (UR(afl, 2)) {
+          if (rand_below(afl, 2)) {
 
-            u32 pos = UR(afl, temp_len - 3);
+            u32 pos = rand_below(afl, temp_len - 3);
 
-            *(u32 *)(out_buf + pos) -= 1 + UR(afl, ARITH_MAX);
+            *(u32 *)(out_buf + pos) -= 1 + rand_below(afl, ARITH_MAX);
 
           } else {
 
-            u32 pos = UR(afl, temp_len - 3);
-            u32 num = 1 + UR(afl, ARITH_MAX);
+            u32 pos = rand_below(afl, temp_len - 3);
+            u32 num = 1 + rand_below(afl, ARITH_MAX);
 
             *(u32 *)(out_buf + pos) =
                 SWAP32(SWAP32(*(u32 *)(out_buf + pos)) - num);
@@ -1875,16 +1899,16 @@ havoc_stage:
 
           if (temp_len < 4) break;
 
-          if (UR(afl, 2)) {
+          if (rand_below(afl, 2)) {
 
-            u32 pos = UR(afl, temp_len - 3);
+            u32 pos = rand_below(afl, temp_len - 3);
 
-            *(u32 *)(out_buf + pos) += 1 + UR(afl, ARITH_MAX);
+            *(u32 *)(out_buf + pos) += 1 + rand_below(afl, ARITH_MAX);
 
           } else {
 
-            u32 pos = UR(afl, temp_len - 3);
-            u32 num = 1 + UR(afl, ARITH_MAX);
+            u32 pos = rand_below(afl, temp_len - 3);
+            u32 num = 1 + rand_below(afl, ARITH_MAX);
 
             *(u32 *)(out_buf + pos) =
                 SWAP32(SWAP32(*(u32 *)(out_buf + pos)) + num);
@@ -1899,7 +1923,7 @@ havoc_stage:
              why not. We use XOR with 1-255 to eliminate the
              possibility of a no-op. */
 
-          out_buf[UR(afl, temp_len)] ^= 1 + UR(afl, 255);
+          out_buf[rand_below(afl, temp_len)] ^= 1 + rand_below(afl, 255);
           break;
 
         case 11 ... 12: {
@@ -1916,7 +1940,7 @@ havoc_stage:
 
           del_len = choose_block_len(afl, temp_len - 1);
 
-          del_from = UR(afl, temp_len - del_len + 1);
+          del_from = rand_below(afl, temp_len - del_len + 1);
 
           memmove(out_buf + del_from, out_buf + del_from + del_len,
                   temp_len - del_from - del_len);
@@ -1933,14 +1957,14 @@ havoc_stage:
 
             /* Clone bytes (75%) or insert a block of constant bytes (25%). */
 
-            u8  actually_clone = UR(afl, 4);
+            u8  actually_clone = rand_below(afl, 4);
             u32 clone_from, clone_to, clone_len;
             u8 *new_buf;
 
             if (actually_clone) {
 
               clone_len = choose_block_len(afl, temp_len);
-              clone_from = UR(afl, temp_len - clone_len + 1);
+              clone_from = rand_below(afl, temp_len - clone_len + 1);
 
             } else {
 
@@ -1949,9 +1973,10 @@ havoc_stage:
 
             }
 
-            clone_to = UR(afl, temp_len);
+            clone_to = rand_below(afl, temp_len);
 
-            new_buf = ck_alloc_nozero(temp_len + clone_len);
+            new_buf =
+                ck_maybe_grow(BUF_PARAMS(out_scratch), temp_len + clone_len);
 
             /* Head */
 
@@ -1963,15 +1988,17 @@ havoc_stage:
               memcpy(new_buf + clone_to, out_buf + clone_from, clone_len);
             else
               memset(new_buf + clone_to,
-                     UR(afl, 2) ? UR(afl, 256) : out_buf[UR(afl, temp_len)],
+                     rand_below(afl, 2) ? rand_below(afl, 256)
+                                        : out_buf[rand_below(afl, temp_len)],
                      clone_len);
 
             /* Tail */
             memcpy(new_buf + clone_to + clone_len, out_buf + clone_to,
                    temp_len - clone_to);
 
-            ck_free(out_buf);
+            swap_bufs(BUF_PARAMS(out), BUF_PARAMS(out_scratch));
             out_buf = new_buf;
+            new_buf = NULL;
             temp_len += clone_len;
 
           }
@@ -1989,10 +2016,10 @@ havoc_stage:
 
           copy_len = choose_block_len(afl, temp_len - 1);
 
-          copy_from = UR(afl, temp_len - copy_len + 1);
-          copy_to = UR(afl, temp_len - copy_len + 1);
+          copy_from = rand_below(afl, temp_len - copy_len + 1);
+          copy_to = rand_below(afl, temp_len - copy_len + 1);
 
-          if (UR(afl, 4)) {
+          if (rand_below(afl, 4)) {
 
             if (copy_from != copy_to)
               memmove(out_buf + copy_to, out_buf + copy_from, copy_len);
@@ -2000,7 +2027,8 @@ havoc_stage:
           } else
 
             memset(out_buf + copy_to,
-                   UR(afl, 2) ? UR(afl, 256) : out_buf[UR(afl, temp_len)],
+                   rand_below(afl, 2) ? rand_below(afl, 256)
+                                      : out_buf[rand_below(afl, temp_len)],
                    copy_len);
 
           break;
@@ -2014,18 +2042,18 @@ havoc_stage:
 
           /* Overwrite bytes with an extra. */
 
-          if (!afl->extras_cnt || (afl->a_extras_cnt && UR(afl, 2))) {
+          if (!afl->extras_cnt || (afl->a_extras_cnt && rand_below(afl, 2))) {
 
             /* No user-specified extras or odds in our favor. Let's use an
                auto-detected one. */
 
-            u32 use_extra = UR(afl, afl->a_extras_cnt);
+            u32 use_extra = rand_below(afl, afl->a_extras_cnt);
             u32 extra_len = afl->a_extras[use_extra].len;
             u32 insert_at;
 
             if (extra_len > temp_len) break;
 
-            insert_at = UR(afl, temp_len - extra_len + 1);
+            insert_at = rand_below(afl, temp_len - extra_len + 1);
             memcpy(out_buf + insert_at, afl->a_extras[use_extra].data,
                    extra_len);
 
@@ -2033,13 +2061,13 @@ havoc_stage:
 
             /* No auto extras or odds in our favor. Use the dictionary. */
 
-            u32 use_extra = UR(afl, afl->extras_cnt);
+            u32 use_extra = rand_below(afl, afl->extras_cnt);
             u32 extra_len = afl->extras[use_extra].len;
             u32 insert_at;
 
             if (extra_len > temp_len) break;
 
-            insert_at = UR(afl, temp_len - extra_len + 1);
+            insert_at = rand_below(afl, temp_len - extra_len + 1);
             memcpy(out_buf + insert_at, afl->extras[use_extra].data, extra_len);
 
           }
@@ -2050,20 +2078,21 @@ havoc_stage:
 
         case 16: {
 
-          u32 use_extra, extra_len, insert_at = UR(afl, temp_len + 1);
+          u32 use_extra, extra_len, insert_at = rand_below(afl, temp_len + 1);
           u8 *new_buf;
 
           /* Insert an extra. Do the same dice-rolling stuff as for the
              previous case. */
 
-          if (!afl->extras_cnt || (afl->a_extras_cnt && UR(afl, 2))) {
+          if (!afl->extras_cnt || (afl->a_extras_cnt && rand_below(afl, 2))) {
 
-            use_extra = UR(afl, afl->a_extras_cnt);
+            use_extra = rand_below(afl, afl->a_extras_cnt);
             extra_len = afl->a_extras[use_extra].len;
 
             if (temp_len + extra_len >= MAX_FILE) break;
 
-            new_buf = ck_alloc_nozero(temp_len + extra_len);
+            new_buf =
+                ck_maybe_grow(BUF_PARAMS(out_scratch), temp_len + extra_len);
 
             /* Head */
             memcpy(new_buf, out_buf, insert_at);
@@ -2074,12 +2103,13 @@ havoc_stage:
 
           } else {
 
-            use_extra = UR(afl, afl->extras_cnt);
+            use_extra = rand_below(afl, afl->extras_cnt);
             extra_len = afl->extras[use_extra].len;
 
             if (temp_len + extra_len >= MAX_FILE) break;
 
-            new_buf = ck_alloc_nozero(temp_len + extra_len);
+            new_buf =
+                ck_maybe_grow(BUF_PARAMS(out_scratch), temp_len + extra_len);
 
             /* Head */
             memcpy(new_buf, out_buf, insert_at);
@@ -2093,8 +2123,9 @@ havoc_stage:
           memcpy(new_buf + insert_at + extra_len, out_buf + insert_at,
                  temp_len - insert_at);
 
-          ck_free(out_buf);
+          swap_bufs(BUF_PARAMS(out), BUF_PARAMS(out_scratch));
           out_buf = new_buf;
+          new_buf = NULL;
           temp_len += extra_len;
 
           break;
@@ -2110,7 +2141,7 @@ havoc_stage:
     /* out_buf might have been mangled a bit, so let's restore it to its
        original size and shape. */
 
-    if (temp_len < len) out_buf = ck_realloc(out_buf, len);
+    out_buf = ck_maybe_grow(BUF_PARAMS(out), len);
     temp_len = len;
     memcpy(out_buf, in_buf, len);
 
@@ -2172,7 +2203,6 @@ retry_splicing:
 
     if (in_buf != orig_in) {
 
-      ck_free(in_buf);
       in_buf = orig_in;
       len = afl->queue_cur->len;
 
@@ -2182,7 +2212,7 @@ retry_splicing:
 
     do {
 
-      tid = UR(afl, afl->queued_paths);
+      tid = rand_below(afl, afl->queued_paths);
 
     } while (tid == afl->current_entry);
 
@@ -2214,9 +2244,9 @@ retry_splicing:
 
     fd = open(target->fname, O_RDONLY);
 
-    if (fd < 0) PFATAL("Unable to open '%s'", target->fname);
+    if (unlikely(fd < 0)) PFATAL("Unable to open '%s'", target->fname);
 
-    new_buf = ck_alloc_nozero(target->len);
+    new_buf = ck_maybe_grow(BUF_PARAMS(in_scratch), target->len);
 
     ck_read(fd, new_buf, target->len, target->fname);
 
@@ -2228,25 +2258,20 @@ retry_splicing:
 
     locate_diffs(in_buf, new_buf, MIN(len, target->len), &f_diff, &l_diff);
 
-    if (f_diff < 0 || l_diff < 2 || f_diff == l_diff) {
-
-      ck_free(new_buf);
-      goto retry_splicing;
-
-    }
+    if (f_diff < 0 || l_diff < 2 || f_diff == l_diff) { goto retry_splicing; }
 
     /* Split somewhere between the first and last differing byte. */
 
-    split_at = f_diff + UR(afl, l_diff - f_diff);
+    split_at = f_diff + rand_below(afl, l_diff - f_diff);
 
     /* Do the thing. */
 
     len = target->len;
     memcpy(new_buf, in_buf, split_at);
+    swap_bufs(BUF_PARAMS(in), BUF_PARAMS(in_scratch));
     in_buf = new_buf;
 
-    ck_free(out_buf);
-    out_buf = ck_alloc_nozero(len);
+    out_buf = ck_maybe_grow(BUF_PARAMS(out), len);
     memcpy(out_buf, in_buf, len);
 
     goto custom_mutator_stage;
@@ -2263,7 +2288,7 @@ retry_splicing:
 
 radamsa_stage:
 
-  if (!afl->use_radamsa || !afl->radamsa_mutate_ptr) goto abandon_entry;
+  if (likely(!afl->use_radamsa || !afl->radamsa_mutate_ptr)) goto abandon_entry;
 
   afl->stage_name = "radamsa";
   afl->stage_short = "radamsa";
@@ -2274,12 +2299,14 @@ radamsa_stage:
 
   orig_hit_cnt = afl->queued_paths + afl->unique_crashes;
 
-  /* Read the additional testcase into a new buffer. */
-  u8 *save_buf = ck_alloc_nozero(len);
+  /* Read the additional testcase.
+  We'll reuse in_scratch, as it is free at this point.
+  */
+  u8 *save_buf = ck_maybe_grow(BUF_PARAMS(in_scratch), len);
   memcpy(save_buf, out_buf, len);
 
   u32 max_len = len + choose_block_len(afl, HAVOC_BLK_XL);
-  u8 *new_buf = ck_alloc_nozero(max_len);
+  u8 *new_buf = ck_maybe_grow(BUF_PARAMS(out_scratch), max_len);
   u8 *tmp_buf;
 
   for (afl->stage_cur = 0; afl->stage_cur < afl->stage_max; ++afl->stage_cur) {
@@ -2299,19 +2326,10 @@ radamsa_stage:
 
     }
 
-    if (common_fuzz_stuff(afl, tmp_buf, temp_len)) {
-
-      ck_free(save_buf);
-      ck_free(new_buf);
-      goto abandon_entry;
-
-    }
+    if (common_fuzz_stuff(afl, tmp_buf, temp_len)) { goto abandon_entry; }
 
   }
 
-  ck_free(save_buf);
-  ck_free(new_buf);
-
   new_hit_cnt = afl->queued_paths + afl->unique_crashes;
 
   afl->stage_finds[STAGE_RADAMSA] += new_hit_cnt - orig_hit_cnt;
@@ -2341,10 +2359,6 @@ abandon_entry:
 
   munmap(orig_in, afl->queue_cur->len);
 
-  if (in_buf != orig_in) ck_free(in_buf);
-  ck_free(out_buf);
-  ck_free(eff_map);
-
   return ret_val;
 
 #undef FLIP_BIT
@@ -2391,7 +2405,7 @@ u8 mopt_common_fuzzing(afl_state_t *afl, MOpt_globals_t MOpt_globals) {
        cases. */
 
     if ((afl->queue_cur->was_fuzzed || !afl->queue_cur->favored) &&
-        UR(afl, 100) < SKIP_TO_NEW_PROB)
+        rand_below(afl, 100) < SKIP_TO_NEW_PROB)
       return 1;
 
   } else if (!afl->dumb_mode && !afl->queue_cur->favored &&
@@ -2404,11 +2418,11 @@ u8 mopt_common_fuzzing(afl_state_t *afl, MOpt_globals_t MOpt_globals) {
 
     if (afl->queue_cycle > 1 && !afl->queue_cur->was_fuzzed) {
 
-      if (UR(afl, 100) < SKIP_NFAV_NEW_PROB) return 1;
+      if (rand_below(afl, 100) < SKIP_NFAV_NEW_PROB) return 1;
 
     } else {
 
-      if (UR(afl, 100) < SKIP_NFAV_OLD_PROB) return 1;
+      if (rand_below(afl, 100) < SKIP_NFAV_OLD_PROB) return 1;
 
     }
 
@@ -2443,7 +2457,7 @@ u8 mopt_common_fuzzing(afl_state_t *afl, MOpt_globals_t MOpt_globals) {
      single byte anyway, so it wouldn't give us any performance or memory usage
      benefits. */
 
-  out_buf = ck_alloc_nozero(len);
+  out_buf = ck_maybe_grow(BUF_PARAMS(out), len);
 
   afl->subseq_tmouts = 0;
 
@@ -2612,7 +2626,7 @@ u8 mopt_common_fuzzing(afl_state_t *afl, MOpt_globals_t MOpt_globals) {
 
     if (!afl->dumb_mode && (afl->stage_cur & 7) == 7) {
 
-      u32 cksum = hash32(afl->fsrv.trace_bits, MAP_SIZE, HASH_CONST);
+      u32 cksum = hash32(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
 
       if (afl->stage_cur == afl->stage_max - 1 && cksum == prev_cksum) {
 
@@ -2624,7 +2638,7 @@ u8 mopt_common_fuzzing(afl_state_t *afl, MOpt_globals_t MOpt_globals) {
         ++a_len;
 
         if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA)
-          maybe_add_auto(afl, a_collect, a_len);
+          maybe_add_auto((u8 *)afl, a_collect, a_len);
 
       } else if (cksum != prev_cksum) {
 
@@ -2632,7 +2646,7 @@ u8 mopt_common_fuzzing(afl_state_t *afl, MOpt_globals_t MOpt_globals) {
            worthwhile queued up, and collect that if the answer is yes. */
 
         if (a_len >= MIN_AUTO_EXTRA && a_len <= MAX_AUTO_EXTRA)
-          maybe_add_auto(afl, a_collect, a_len);
+          maybe_add_auto((u8 *)afl, a_collect, a_len);
 
         a_len = 0;
         prev_cksum = cksum;
@@ -2733,7 +2747,7 @@ u8 mopt_common_fuzzing(afl_state_t *afl, MOpt_globals_t MOpt_globals) {
   /* Initialize effector map for the next step (see comments below). Always
          flag first and last byte as doing something. */
 
-  eff_map = ck_alloc(EFF_ALEN(len));
+  eff_map = ck_maybe_grow(BUF_PARAMS(eff), EFF_ALEN(len));
   eff_map[0] = 1;
 
   if (EFF_APOS(len - 1) != 0) {
@@ -2772,7 +2786,7 @@ u8 mopt_common_fuzzing(afl_state_t *afl, MOpt_globals_t MOpt_globals) {
          without wasting time on checksums. */
 
       if (!afl->dumb_mode && len >= EFF_MIN_LEN)
-        cksum = hash32(afl->fsrv.trace_bits, MAP_SIZE, HASH_CONST);
+        cksum = hash32(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
       else
         cksum = ~afl->queue_cur->exec_cksum;
 
@@ -3418,7 +3432,7 @@ skip_interest:
          map. */
 
       if ((afl->extras_cnt > MAX_DET_EXTRAS &&
-           UR(afl, afl->extras_cnt) >= MAX_DET_EXTRAS) ||
+           rand_below(afl, afl->extras_cnt) >= MAX_DET_EXTRAS) ||
           afl->extras[j].len > len - i ||
           !memcmp(afl->extras[j].data, out_buf + i, afl->extras[j].len) ||
           !memchr(eff_map + EFF_APOS(i), 1,
@@ -3457,7 +3471,7 @@ skip_interest:
 
   orig_hit_cnt = new_hit_cnt;
 
-  ex_tmp = ck_alloc(len + MAX_DICT_FILE);
+  ex_tmp = ck_maybe_grow(BUF_PARAMS(ex), len + MAX_DICT_FILE);
 
   for (i = 0; i <= len; ++i) {
 
@@ -3480,7 +3494,6 @@ skip_interest:
 
       if (common_fuzz_stuff(afl, ex_tmp, len + afl->extras[j].len)) {
 
-        ck_free(ex_tmp);
         goto abandon_entry;
 
       }
@@ -3494,8 +3507,6 @@ skip_interest:
 
   }                                                  /* for i = 0; i <= len */
 
-  ck_free(ex_tmp);
-
   new_hit_cnt = afl->queued_paths + afl->unique_crashes;
 
   afl->stage_finds[STAGE_EXTRAS_UI] += new_hit_cnt - orig_hit_cnt;
@@ -3584,9 +3595,9 @@ pacemaker_fuzzing:
 
     perf_score = orig_perf;
 
-    snprintf(afl->stage_name_buf64, 64, MOpt_globals.splice_stageformat,
-             splice_cycle);
-    afl->stage_name = afl->stage_name_buf64;
+    snprintf(afl->stage_name_buf, STAGE_BUF_SIZE,
+             MOpt_globals.splice_stageformat, splice_cycle);
+    afl->stage_name = afl->stage_name_buf;
     afl->stage_short = MOpt_globals.splice_stagenameshort;
     afl->stage_max = SPLICE_HAVOC * perf_score / afl->havoc_div / 100;
 
@@ -3605,7 +3616,8 @@ pacemaker_fuzzing:
         afl->orig_hit_cnt_puppet = afl->queued_paths + afl->unique_crashes;
         afl->last_limit_time_start = get_cur_time();
         afl->SPLICE_CYCLES_puppet =
-            (UR(afl, SPLICE_CYCLES_puppet_up - SPLICE_CYCLES_puppet_low + 1) +
+            (rand_below(
+                 afl, SPLICE_CYCLES_puppet_up - SPLICE_CYCLES_puppet_low + 1) +
              SPLICE_CYCLES_puppet_low);
 
       }
@@ -3634,9 +3646,9 @@ pacemaker_fuzzing:
       } else {
 
         perf_score = orig_perf;
-        snprintf(afl->stage_name_buf64, 64, MOpt_globals.splice_stageformat,
-                 splice_cycle);
-        afl->stage_name = afl->stage_name_buf64;
+        snprintf(afl->stage_name_buf, STAGE_BUF_SIZE,
+                 MOpt_globals.splice_stageformat, splice_cycle);
+        afl->stage_name = afl->stage_name_buf;
         afl->stage_short = MOpt_globals.splice_stagenameshort;
         afl->stage_max = SPLICE_HAVOC * perf_score / afl->havoc_div / 100;
 
@@ -3653,7 +3665,7 @@ pacemaker_fuzzing:
       for (afl->stage_cur = 0; afl->stage_cur < afl->stage_max;
            ++afl->stage_cur) {
 
-        u32 use_stacking = 1 << (1 + UR(afl, HAVOC_STACK_POW2));
+        u32 use_stacking = 1 << (1 + rand_below(afl, HAVOC_STACK_POW2));
 
         afl->stage_cur_val = use_stacking;
 
@@ -3669,13 +3681,13 @@ pacemaker_fuzzing:
 
             case 0:
               /* Flip a single bit somewhere. Spooky! */
-              FLIP_BIT(out_buf, UR(afl, temp_len << 3));
+              FLIP_BIT(out_buf, rand_below(afl, temp_len << 3));
               MOpt_globals.cycles_v2[STAGE_FLIP1] += 1;
               break;
 
             case 1:
               if (temp_len < 2) break;
-              temp_len_puppet = UR(afl, (temp_len << 3) - 1);
+              temp_len_puppet = rand_below(afl, (temp_len << 3) - 1);
               FLIP_BIT(out_buf, temp_len_puppet);
               FLIP_BIT(out_buf, temp_len_puppet + 1);
               MOpt_globals.cycles_v2[STAGE_FLIP2] += 1;
@@ -3683,7 +3695,7 @@ pacemaker_fuzzing:
 
             case 2:
               if (temp_len < 2) break;
-              temp_len_puppet = UR(afl, (temp_len << 3) - 3);
+              temp_len_puppet = rand_below(afl, (temp_len << 3) - 3);
               FLIP_BIT(out_buf, temp_len_puppet);
               FLIP_BIT(out_buf, temp_len_puppet + 1);
               FLIP_BIT(out_buf, temp_len_puppet + 2);
@@ -3693,55 +3705,57 @@ pacemaker_fuzzing:
 
             case 3:
               if (temp_len < 4) break;
-              out_buf[UR(afl, temp_len)] ^= 0xFF;
+              out_buf[rand_below(afl, temp_len)] ^= 0xFF;
               MOpt_globals.cycles_v2[STAGE_FLIP8] += 1;
               break;
 
             case 4:
               if (temp_len < 8) break;
-              *(u16 *)(out_buf + UR(afl, temp_len - 1)) ^= 0xFFFF;
+              *(u16 *)(out_buf + rand_below(afl, temp_len - 1)) ^= 0xFFFF;
               MOpt_globals.cycles_v2[STAGE_FLIP16] += 1;
               break;
 
             case 5:
               if (temp_len < 8) break;
-              *(u32 *)(out_buf + UR(afl, temp_len - 3)) ^= 0xFFFFFFFF;
+              *(u32 *)(out_buf + rand_below(afl, temp_len - 3)) ^= 0xFFFFFFFF;
               MOpt_globals.cycles_v2[STAGE_FLIP32] += 1;
               break;
 
             case 6:
-              out_buf[UR(afl, temp_len)] -= 1 + UR(afl, ARITH_MAX);
-              out_buf[UR(afl, temp_len)] += 1 + UR(afl, ARITH_MAX);
+              out_buf[rand_below(afl, temp_len)] -=
+                  1 + rand_below(afl, ARITH_MAX);
+              out_buf[rand_below(afl, temp_len)] +=
+                  1 + rand_below(afl, ARITH_MAX);
               MOpt_globals.cycles_v2[STAGE_ARITH8] += 1;
               break;
 
             case 7:
               /* Randomly subtract from word, random endian. */
               if (temp_len < 8) break;
-              if (UR(afl, 2)) {
+              if (rand_below(afl, 2)) {
 
-                u32 pos = UR(afl, temp_len - 1);
-                *(u16 *)(out_buf + pos) -= 1 + UR(afl, ARITH_MAX);
+                u32 pos = rand_below(afl, temp_len - 1);
+                *(u16 *)(out_buf + pos) -= 1 + rand_below(afl, ARITH_MAX);
 
               } else {
 
-                u32 pos = UR(afl, temp_len - 1);
-                u16 num = 1 + UR(afl, ARITH_MAX);
+                u32 pos = rand_below(afl, temp_len - 1);
+                u16 num = 1 + rand_below(afl, ARITH_MAX);
                 *(u16 *)(out_buf + pos) =
                     SWAP16(SWAP16(*(u16 *)(out_buf + pos)) - num);
 
               }
 
               /* Randomly add to word, random endian. */
-              if (UR(afl, 2)) {
+              if (rand_below(afl, 2)) {
 
-                u32 pos = UR(afl, temp_len - 1);
-                *(u16 *)(out_buf + pos) += 1 + UR(afl, ARITH_MAX);
+                u32 pos = rand_below(afl, temp_len - 1);
+                *(u16 *)(out_buf + pos) += 1 + rand_below(afl, ARITH_MAX);
 
               } else {
 
-                u32 pos = UR(afl, temp_len - 1);
-                u16 num = 1 + UR(afl, ARITH_MAX);
+                u32 pos = rand_below(afl, temp_len - 1);
+                u16 num = 1 + rand_below(afl, ARITH_MAX);
                 *(u16 *)(out_buf + pos) =
                     SWAP16(SWAP16(*(u16 *)(out_buf + pos)) + num);
 
@@ -3753,15 +3767,15 @@ pacemaker_fuzzing:
             case 8:
               /* Randomly subtract from dword, random endian. */
               if (temp_len < 8) break;
-              if (UR(afl, 2)) {
+              if (rand_below(afl, 2)) {
 
-                u32 pos = UR(afl, temp_len - 3);
-                *(u32 *)(out_buf + pos) -= 1 + UR(afl, ARITH_MAX);
+                u32 pos = rand_below(afl, temp_len - 3);
+                *(u32 *)(out_buf + pos) -= 1 + rand_below(afl, ARITH_MAX);
 
               } else {
 
-                u32 pos = UR(afl, temp_len - 3);
-                u32 num = 1 + UR(afl, ARITH_MAX);
+                u32 pos = rand_below(afl, temp_len - 3);
+                u32 num = 1 + rand_below(afl, ARITH_MAX);
                 *(u32 *)(out_buf + pos) =
                     SWAP32(SWAP32(*(u32 *)(out_buf + pos)) - num);
 
@@ -3769,15 +3783,15 @@ pacemaker_fuzzing:
 
               /* Randomly add to dword, random endian. */
               // if (temp_len < 4) break;
-              if (UR(afl, 2)) {
+              if (rand_below(afl, 2)) {
 
-                u32 pos = UR(afl, temp_len - 3);
-                *(u32 *)(out_buf + pos) += 1 + UR(afl, ARITH_MAX);
+                u32 pos = rand_below(afl, temp_len - 3);
+                *(u32 *)(out_buf + pos) += 1 + rand_below(afl, ARITH_MAX);
 
               } else {
 
-                u32 pos = UR(afl, temp_len - 3);
-                u32 num = 1 + UR(afl, ARITH_MAX);
+                u32 pos = rand_below(afl, temp_len - 3);
+                u32 num = 1 + rand_below(afl, ARITH_MAX);
                 *(u32 *)(out_buf + pos) =
                     SWAP32(SWAP32(*(u32 *)(out_buf + pos)) + num);
 
@@ -3789,23 +3803,25 @@ pacemaker_fuzzing:
             case 9:
               /* Set byte to interesting value. */
               if (temp_len < 4) break;
-              out_buf[UR(afl, temp_len)] =
-                  interesting_8[UR(afl, sizeof(interesting_8))];
+              out_buf[rand_below(afl, temp_len)] =
+                  interesting_8[rand_below(afl, sizeof(interesting_8))];
               MOpt_globals.cycles_v2[STAGE_INTEREST8] += 1;
               break;
 
             case 10:
               /* Set word to interesting value, randomly choosing endian. */
               if (temp_len < 8) break;
-              if (UR(afl, 2)) {
+              if (rand_below(afl, 2)) {
 
-                *(u16 *)(out_buf + UR(afl, temp_len - 1)) =
-                    interesting_16[UR(afl, sizeof(interesting_16) >> 1)];
+                *(u16 *)(out_buf + rand_below(afl, temp_len - 1)) =
+                    interesting_16[rand_below(afl,
+                                              sizeof(interesting_16) >> 1)];
 
               } else {
 
-                *(u16 *)(out_buf + UR(afl, temp_len - 1)) = SWAP16(
-                    interesting_16[UR(afl, sizeof(interesting_16) >> 1)]);
+                *(u16 *)(out_buf + rand_below(afl, temp_len - 1)) =
+                    SWAP16(interesting_16[rand_below(
+                        afl, sizeof(interesting_16) >> 1)]);
 
               }
 
@@ -3817,15 +3833,17 @@ pacemaker_fuzzing:
 
               if (temp_len < 8) break;
 
-              if (UR(afl, 2)) {
+              if (rand_below(afl, 2)) {
 
-                *(u32 *)(out_buf + UR(afl, temp_len - 3)) =
-                    interesting_32[UR(afl, sizeof(interesting_32) >> 2)];
+                *(u32 *)(out_buf + rand_below(afl, temp_len - 3)) =
+                    interesting_32[rand_below(afl,
+                                              sizeof(interesting_32) >> 2)];
 
               } else {
 
-                *(u32 *)(out_buf + UR(afl, temp_len - 3)) = SWAP32(
-                    interesting_32[UR(afl, sizeof(interesting_32) >> 2)]);
+                *(u32 *)(out_buf + rand_below(afl, temp_len - 3)) =
+                    SWAP32(interesting_32[rand_below(
+                        afl, sizeof(interesting_32) >> 2)]);
 
               }
 
@@ -3838,7 +3856,7 @@ pacemaker_fuzzing:
                  why not. We use XOR with 1-255 to eliminate the
                  possibility of a no-op. */
 
-              out_buf[UR(afl, temp_len)] ^= 1 + UR(afl, 255);
+              out_buf[rand_below(afl, temp_len)] ^= 1 + rand_below(afl, 255);
               MOpt_globals.cycles_v2[STAGE_RANDOMBYTE] += 1;
               break;
 
@@ -3856,7 +3874,7 @@ pacemaker_fuzzing:
 
               del_len = choose_block_len(afl, temp_len - 1);
 
-              del_from = UR(afl, temp_len - del_len + 1);
+              del_from = rand_below(afl, temp_len - del_len + 1);
 
               memmove(out_buf + del_from, out_buf + del_from + del_len,
                       temp_len - del_from - del_len);
@@ -3874,14 +3892,14 @@ pacemaker_fuzzing:
                 /* Clone bytes (75%) or insert a block of constant bytes (25%).
                  */
 
-                u8  actually_clone = UR(afl, 4);
+                u8  actually_clone = rand_below(afl, 4);
                 u32 clone_from, clone_to, clone_len;
                 u8 *new_buf;
 
                 if (actually_clone) {
 
                   clone_len = choose_block_len(afl, temp_len);
-                  clone_from = UR(afl, temp_len - clone_len + 1);
+                  clone_from = rand_below(afl, temp_len - clone_len + 1);
 
                 } else {
 
@@ -3890,9 +3908,10 @@ pacemaker_fuzzing:
 
                 }
 
-                clone_to = UR(afl, temp_len);
+                clone_to = rand_below(afl, temp_len);
 
-                new_buf = ck_alloc_nozero(temp_len + clone_len);
+                new_buf = ck_maybe_grow(BUF_PARAMS(out_scratch),
+                                        temp_len + clone_len);
 
                 /* Head */
 
@@ -3904,14 +3923,16 @@ pacemaker_fuzzing:
                   memcpy(new_buf + clone_to, out_buf + clone_from, clone_len);
                 else
                   memset(new_buf + clone_to,
-                         UR(afl, 2) ? UR(afl, 256) : out_buf[UR(afl, temp_len)],
+                         rand_below(afl, 2)
+                             ? rand_below(afl, 256)
+                             : out_buf[rand_below(afl, temp_len)],
                          clone_len);
 
                 /* Tail */
                 memcpy(new_buf + clone_to + clone_len, out_buf + clone_to,
                        temp_len - clone_to);
 
-                ck_free(out_buf);
+                swap_bufs(BUF_PARAMS(out), BUF_PARAMS(out_scratch));
                 out_buf = new_buf;
                 temp_len += clone_len;
                 MOpt_globals.cycles_v2[STAGE_Clone75] += 1;
@@ -3931,10 +3952,10 @@ pacemaker_fuzzing:
 
               copy_len = choose_block_len(afl, temp_len - 1);
 
-              copy_from = UR(afl, temp_len - copy_len + 1);
-              copy_to = UR(afl, temp_len - copy_len + 1);
+              copy_from = rand_below(afl, temp_len - copy_len + 1);
+              copy_to = rand_below(afl, temp_len - copy_len + 1);
 
-              if (UR(afl, 4)) {
+              if (rand_below(afl, 4)) {
 
                 if (copy_from != copy_to)
                   memmove(out_buf + copy_to, out_buf + copy_from, copy_len);
@@ -3942,7 +3963,8 @@ pacemaker_fuzzing:
               } else
 
                 memset(out_buf + copy_to,
-                       UR(afl, 2) ? UR(afl, 256) : out_buf[UR(afl, temp_len)],
+                       rand_below(afl, 2) ? rand_below(afl, 256)
+                                          : out_buf[rand_below(afl, temp_len)],
                        copy_len);
               MOpt_globals.cycles_v2[STAGE_OverWrite75] += 1;
               break;
@@ -3963,7 +3985,7 @@ pacemaker_fuzzing:
         /* out_buf might have been mangled a bit, so let's restore it to its
            original size and shape. */
 
-        if (temp_len < len) out_buf = ck_realloc(out_buf, len);
+        out_buf = ck_maybe_grow(BUF_PARAMS(out), len);
         temp_len = len;
         memcpy(out_buf, in_buf, len);
 
@@ -4041,7 +4063,6 @@ pacemaker_fuzzing:
 
         if (in_buf != orig_in) {
 
-          ck_free(in_buf);
           in_buf = orig_in;
           len = afl->queue_cur->len;
 
@@ -4052,7 +4073,7 @@ pacemaker_fuzzing:
 
         do {
 
-          tid = UR(afl, afl->queued_paths);
+          tid = rand_below(afl, afl->queued_paths);
 
         } while (tid == afl->current_entry);
 
@@ -4086,7 +4107,7 @@ pacemaker_fuzzing:
 
         if (fd < 0) PFATAL("Unable to open '%s'", target->fname);
 
-        new_buf = ck_alloc_nozero(target->len);
+        new_buf = ck_maybe_grow(BUF_PARAMS(in_scratch), target->len);
 
         ck_read(fd, new_buf, target->len, target->fname);
 
@@ -4100,22 +4121,21 @@ pacemaker_fuzzing:
 
         if (f_diff < 0 || l_diff < 2 || f_diff == l_diff) {
 
-          ck_free(new_buf);
           goto retry_splicing_puppet;
 
         }
 
         /* Split somewhere between the first and last differing byte. */
 
-        split_at = f_diff + UR(afl, l_diff - f_diff);
+        split_at = f_diff + rand_below(afl, l_diff - f_diff);
 
         /* Do the thing. */
 
         len = target->len;
         memcpy(new_buf, in_buf, split_at);
+        swap_bufs(BUF_PARAMS(in), BUF_PARAMS(in_scratch));
         in_buf = new_buf;
-        ck_free(out_buf);
-        out_buf = ck_alloc_nozero(len);
+        out_buf = ck_maybe_grow(BUF_PARAMS(out), len);
         memcpy(out_buf, in_buf, len);
 
         goto havoc_stage_puppet;
@@ -4131,7 +4151,8 @@ pacemaker_fuzzing:
 
       if (splice_cycle >= afl->SPLICE_CYCLES_puppet)
         afl->SPLICE_CYCLES_puppet =
-            (UR(afl, SPLICE_CYCLES_puppet_up - SPLICE_CYCLES_puppet_low + 1) +
+            (rand_below(
+                 afl, SPLICE_CYCLES_puppet_up - SPLICE_CYCLES_puppet_low + 1) +
              SPLICE_CYCLES_puppet_low);
 
       afl->splicing_with = -1;
@@ -4149,10 +4170,6 @@ pacemaker_fuzzing:
 
       munmap(orig_in, afl->queue_cur->len);
 
-      if (in_buf != orig_in) ck_free(in_buf);
-      ck_free(out_buf);
-      ck_free(eff_map);
-
       if (afl->key_puppet == 1) {
 
         if (unlikely(
@@ -4374,18 +4391,13 @@ u8 fuzz_one(afl_state_t *afl) {
   int key_val_lv = 0;
 
 #ifdef _AFL_DOCUMENT_MUTATIONS
-  if (afl->do_document == 0) {
 
-    char *fn = alloc_printf("%s/mutations", afl->out_dir);
-    if (fn) {
-
-      afl->do_document = mkdir(fn, 0700);  // if it exists we do not care
-      afl->do_document = 1;
-      ck_free(fn);
-
-    } else
+  u8 path_buf[PATH_MAX];
+  if (afl->do_document == 0) {
 
-      PFATAL("malloc()");
+    snprintf(path_buf, PATH_MAX, "%s/mutations", afl->out_dir);
+    afl->do_document = mkdir(path_buf, 0700);  // if it exists we do not care
+    afl->do_document = 1;
 
   } else {
 
@@ -4413,5 +4425,7 @@ u8 fuzz_one(afl_state_t *afl) {
 
   return key_val_lv;
 
+#undef BUF_PARAMS
+
 }
 
diff --git a/src/afl-fuzz-python.c b/src/afl-fuzz-python.c
index 595c1ed0..12c3a09d 100644
--- a/src/afl-fuzz-python.c
+++ b/src/afl-fuzz-python.c
@@ -28,9 +28,93 @@
 /* Python stuff */
 #ifdef USE_PYTHON
 
-int init_py_module(afl_state_t *afl, u8 *module_name) {
+static void *unsupported(afl_state_t *afl, unsigned int seed) {
 
-  if (!module_name) return 1;
+  FATAL("Python Mutator cannot be called twice yet");
+  return NULL;
+
+}
+
+/* sorry for this makro...
+it just fills in `&py_mutator->something_buf, &py_mutator->something_size`. */
+#define BUF_PARAMS(name)                              \
+  (void **)&((py_mutator_t *)py_mutator)->name##_buf, \
+      &((py_mutator_t *)py_mutator)->name##_size
+
+size_t fuzz_py(void *py_mutator, u8 *buf, size_t buf_size, u8 **out_buf,
+               u8 *add_buf, size_t add_buf_size, size_t max_size) {
+
+  size_t    mutated_size;
+  PyObject *py_args, *py_value;
+  py_args = PyTuple_New(3);
+  py_mutator_t *py = (py_mutator_t *)py_mutator;
+
+  /* buf */
+  py_value = PyByteArray_FromStringAndSize(buf, buf_size);
+  if (!py_value) {
+
+    Py_DECREF(py_args);
+    FATAL("Failed to convert arguments");
+
+  }
+
+  PyTuple_SetItem(py_args, 0, py_value);
+
+  /* add_buf */
+  py_value = PyByteArray_FromStringAndSize(add_buf, add_buf_size);
+  if (!py_value) {
+
+    Py_DECREF(py_args);
+    FATAL("Failed to convert arguments");
+
+  }
+
+  PyTuple_SetItem(py_args, 1, py_value);
+
+  /* max_size */
+#if PY_MAJOR_VERSION >= 3
+  py_value = PyLong_FromLong(max_size);
+#else
+  py_value = PyInt_FromLong(max_size);
+#endif
+  if (!py_value) {
+
+    Py_DECREF(py_args);
+    FATAL("Failed to convert arguments");
+
+  }
+
+  PyTuple_SetItem(py_args, 2, py_value);
+
+  py_value = PyObject_CallObject(py->py_functions[PY_FUNC_FUZZ], py_args);
+
+  Py_DECREF(py_args);
+
+  if (py_value != NULL) {
+
+    mutated_size = PyByteArray_Size(py_value);
+
+    *out_buf = ck_maybe_grow(BUF_PARAMS(fuzz), mutated_size);
+
+    memcpy(*out_buf, PyByteArray_AsString(py_value), mutated_size);
+    Py_DECREF(py_value);
+    return mutated_size;
+
+  } else {
+
+    PyErr_Print();
+    FATAL("python custom fuzz: call failed");
+
+  }
+
+}
+
+static py_mutator_t *init_py_module(afl_state_t *afl, u8 *module_name) {
+
+  if (!module_name) return NULL;
+
+  py_mutator_t *py = calloc(1, sizeof(py_mutator_t));
+  if (!py) PFATAL("Could not allocate memory for python mutator!");
 
   Py_Initialize();
 
@@ -40,17 +124,18 @@ int init_py_module(afl_state_t *afl, u8 *module_name) {
   PyObject *py_name = PyString_FromString(module_name);
 #endif
 
-  afl->py_module = PyImport_Import(py_name);
+  py->py_module = PyImport_Import(py_name);
   Py_DECREF(py_name);
 
-  PyObject * py_module = afl->py_module;
-  PyObject **py_functions = afl->py_functions;
+  PyObject * py_module = py->py_module;
+  PyObject **py_functions = py->py_functions;
 
-  if (afl->py_module != NULL) {
+  if (py_module != NULL) {
 
     u8 py_notrim = 0, py_idx;
-    py_functions[PY_FUNC_INIT] = PyObject_GetAttrString(afl->py_module, "init");
-    py_functions[PY_FUNC_FUZZ] = PyObject_GetAttrString(afl->py_module, "fuzz");
+    /* init, required */
+    py_functions[PY_FUNC_INIT] = PyObject_GetAttrString(py_module, "init");
+    py_functions[PY_FUNC_FUZZ] = PyObject_GetAttrString(py_module, "fuzz");
     py_functions[PY_FUNC_PRE_SAVE] =
         PyObject_GetAttrString(py_module, "pre_save");
     py_functions[PY_FUNC_INIT_TRIM] =
@@ -66,6 +151,7 @@ int init_py_module(afl_state_t *afl, u8 *module_name) {
         PyObject_GetAttrString(py_module, "queue_get");
     py_functions[PY_FUNC_QUEUE_NEW_ENTRY] =
         PyObject_GetAttrString(py_module, "queue_new_entry");
+    py_functions[PY_FUNC_DEINIT] = PyObject_GetAttrString(py_module, "deinit");
 
     for (py_idx = 0; py_idx < PY_FUNC_COUNT; ++py_idx) {
 
@@ -96,7 +182,7 @@ int init_py_module(afl_state_t *afl, u8 *module_name) {
                   "Cannot find/call function with index %d in external "
                   "Python module.\n",
                   py_idx);
-          return 1;
+          return NULL;
 
         }
 
@@ -119,23 +205,27 @@ int init_py_module(afl_state_t *afl, u8 *module_name) {
 
     PyErr_Print();
     fprintf(stderr, "Failed to load \"%s\"\n", module_name);
-    return 1;
+    return NULL;
 
   }
 
-  return 0;
+  return py;
 
 }
 
-void finalize_py_module(afl_state_t *afl) {
+void finalize_py_module(void *py_mutator) {
 
-  if (afl->py_module != NULL) {
+  py_mutator_t *py = (py_mutator_t *)py_mutator;
+
+  if (py->py_module != NULL) {
+
+    deinit_py(py_mutator);
 
     u32 i;
     for (i = 0; i < PY_FUNC_COUNT; ++i)
-      Py_XDECREF(afl->py_functions[i]);
+      Py_XDECREF(py->py_functions[i]);
 
-    Py_DECREF(afl->py_module);
+    Py_DECREF(py->py_module);
 
   }
 
@@ -143,7 +233,8 @@ void finalize_py_module(afl_state_t *afl) {
 
 }
 
-void init_py(afl_state_t *afl, unsigned int seed) {
+static void init_py(afl_state_t *afl, py_mutator_t *py_mutator,
+                    unsigned int seed) {
 
   PyObject *py_args, *py_value;
 
@@ -158,14 +249,14 @@ void init_py(afl_state_t *afl, unsigned int seed) {
   if (!py_value) {
 
     Py_DECREF(py_args);
-    fprintf(stderr, "Cannot convert argument\n");
-    return;
+    FATAL("Cannot convert argument in python init.");
 
   }
 
   PyTuple_SetItem(py_args, 0, py_value);
 
-  py_value = PyObject_CallObject(afl->py_functions[PY_FUNC_INIT], py_args);
+  py_value =
+      PyObject_CallObject(py_mutator->py_functions[PY_FUNC_INIT], py_args);
 
   Py_DECREF(py_args);
 
@@ -173,115 +264,133 @@ void init_py(afl_state_t *afl, unsigned int seed) {
 
     PyErr_Print();
     fprintf(stderr, "Call failed\n");
-    return;
+    FATAL("Custom py mutator INIT failed.");
 
   }
 
 }
 
-size_t fuzz_py(afl_state_t *afl, u8 **buf, size_t buf_size, u8 *add_buf,
-               size_t add_buf_size, size_t max_size) {
+void deinit_py(void *py_mutator) {
 
-  size_t    mutated_size;
   PyObject *py_args, *py_value;
-  py_args = PyTuple_New(3);
 
-  /* buf */
-  py_value = PyByteArray_FromStringAndSize(*buf, buf_size);
-  if (!py_value) {
+  py_args = PyTuple_New(0);
+  py_value = PyObject_CallObject(
+      ((py_mutator_t *)py_mutator)->py_functions[PY_FUNC_DEINIT], py_args);
+  Py_DECREF(py_args);
 
-    Py_DECREF(py_args);
-    FATAL("Failed to convert arguments");
+  if (py_value != NULL) {
+
+    Py_DECREF(py_value);
+
+  } else {
+
+    PyErr_Print();
+    FATAL("Call failed");
 
   }
 
-  PyTuple_SetItem(py_args, 0, py_value);
+}
 
-  /* add_buf */
-  py_value = PyByteArray_FromStringAndSize(add_buf, add_buf_size);
-  if (!py_value) {
+void load_custom_mutator_py(afl_state_t *afl, char *module_name) {
 
-    Py_DECREF(py_args);
-    FATAL("Failed to convert arguments");
+  afl->mutator = ck_alloc(sizeof(struct custom_mutator));
+  afl->mutator->pre_save_buf = NULL;
+  afl->mutator->pre_save_size = 0;
 
-  }
+  afl->mutator->name = module_name;
+  ACTF("Loading Python mutator library from '%s'...", module_name);
 
-  PyTuple_SetItem(py_args, 1, py_value);
+  py_mutator_t *py_mutator;
+  py_mutator = init_py_module(afl, module_name);
+  afl->mutator->data = py_mutator;
+  if (!py_mutator) { FATAL("Failed to load python mutator."); }
 
-  /* max_size */
-#if PY_MAJOR_VERSION >= 3
-  py_value = PyLong_FromLong(max_size);
-#else
-  py_value = PyInt_FromLong(max_size);
-#endif
-  if (!py_value) {
+  PyObject **py_functions = py_mutator->py_functions;
 
-    Py_DECREF(py_args);
-    FATAL("Failed to convert arguments");
+  if (py_functions[PY_FUNC_INIT]) afl->mutator->afl_custom_init = unsupported;
 
-  }
+  if (py_functions[PY_FUNC_DEINIT]) afl->mutator->afl_custom_deinit = deinit_py;
 
-  PyTuple_SetItem(py_args, 2, py_value);
+  /* "afl_custom_fuzz" should not be NULL, but the interface of Python mutator
+     is quite different from the custom mutator. */
+  afl->mutator->afl_custom_fuzz = fuzz_py;
 
-  py_value = PyObject_CallObject(afl->py_functions[PY_FUNC_FUZZ], py_args);
+  if (py_functions[PY_FUNC_PRE_SAVE])
+    afl->mutator->afl_custom_pre_save = pre_save_py;
 
-  Py_DECREF(py_args);
+  if (py_functions[PY_FUNC_INIT_TRIM])
+    afl->mutator->afl_custom_init_trim = init_trim_py;
 
-  if (py_value != NULL) {
+  if (py_functions[PY_FUNC_POST_TRIM])
+    afl->mutator->afl_custom_post_trim = post_trim_py;
 
-    mutated_size = PyByteArray_Size(py_value);
-    if (buf_size < mutated_size) *buf = ck_realloc(*buf, mutated_size);
+  if (py_functions[PY_FUNC_TRIM]) afl->mutator->afl_custom_trim = trim_py;
 
-    memcpy(*buf, PyByteArray_AsString(py_value), mutated_size);
-    Py_DECREF(py_value);
-    return mutated_size;
+  if (py_functions[PY_FUNC_HAVOC_MUTATION])
+    afl->mutator->afl_custom_havoc_mutation = havoc_mutation_py;
 
-  } else {
+  if (py_functions[PY_FUNC_HAVOC_MUTATION_PROBABILITY])
+    afl->mutator->afl_custom_havoc_mutation_probability =
+        havoc_mutation_probability_py;
 
-    PyErr_Print();
-    FATAL("Call failed");
+  if (py_functions[PY_FUNC_QUEUE_GET])
+    afl->mutator->afl_custom_queue_get = queue_get_py;
 
-  }
+  if (py_functions[PY_FUNC_QUEUE_NEW_ENTRY])
+    afl->mutator->afl_custom_queue_new_entry = queue_new_entry_py;
+
+  OKF("Python mutator '%s' installed successfully.", module_name);
+
+  /* Initialize the custom mutator */
+  init_py(afl, py_mutator, rand_below(afl, 0xFFFFFFFF));
 
 }
 
-size_t pre_save_py(afl_state_t *afl, u8 *buf, size_t buf_size, u8 **out_buf) {
+size_t pre_save_py(void *py_mutator, u8 *buf, size_t buf_size, u8 **out_buf) {
+
+  size_t        py_out_buf_size;
+  PyObject *    py_args, *py_value;
+  py_mutator_t *py = (py_mutator_t *)py_mutator;
 
-  size_t    out_buf_size;
-  PyObject *py_args, *py_value;
   py_args = PyTuple_New(1);
   py_value = PyByteArray_FromStringAndSize(buf, buf_size);
   if (!py_value) {
 
     Py_DECREF(py_args);
-    FATAL("Failed to convert arguments");
+    FATAL("Failed to convert arguments in custom pre_save");
 
   }
 
   PyTuple_SetItem(py_args, 0, py_value);
 
-  py_value = PyObject_CallObject(afl->py_functions[PY_FUNC_PRE_SAVE], py_args);
+  py_value = PyObject_CallObject(
+      ((py_mutator_t *)py_mutator)->py_functions[PY_FUNC_PRE_SAVE], py_args);
 
   Py_DECREF(py_args);
 
   if (py_value != NULL) {
 
-    out_buf_size = PyByteArray_Size(py_value);
-    *out_buf = malloc(out_buf_size);
-    memcpy(*out_buf, PyByteArray_AsString(py_value), out_buf_size);
+    py_out_buf_size = PyByteArray_Size(py_value);
+
+    ck_maybe_grow(BUF_PARAMS(pre_save), py_out_buf_size);
+
+    memcpy(py->pre_save_buf, PyByteArray_AsString(py_value), py_out_buf_size);
     Py_DECREF(py_value);
-    return out_buf_size;
+
+    *out_buf = py->pre_save_buf;
+    return py_out_buf_size;
 
   } else {
 
     PyErr_Print();
-    FATAL("Call failed");
+    FATAL("Python custom mutator: pre_save call failed.");
 
   }
 
 }
 
-u32 init_trim_py(afl_state_t *afl, u8 *buf, size_t buf_size) {
+s32 init_trim_py(void *py_mutator, u8 *buf, size_t buf_size) {
 
   PyObject *py_args, *py_value;
 
@@ -296,7 +405,8 @@ u32 init_trim_py(afl_state_t *afl, u8 *buf, size_t buf_size) {
 
   PyTuple_SetItem(py_args, 0, py_value);
 
-  py_value = PyObject_CallObject(afl->py_functions[PY_FUNC_INIT_TRIM], py_args);
+  py_value = PyObject_CallObject(
+      ((py_mutator_t *)py_mutator)->py_functions[PY_FUNC_INIT_TRIM], py_args);
   Py_DECREF(py_args);
 
   if (py_value != NULL) {
@@ -318,7 +428,7 @@ u32 init_trim_py(afl_state_t *afl, u8 *buf, size_t buf_size) {
 
 }
 
-u32 post_trim_py(afl_state_t *afl, u8 success) {
+s32 post_trim_py(void *py_mutator, u8 success) {
 
   PyObject *py_args, *py_value;
 
@@ -334,7 +444,8 @@ u32 post_trim_py(afl_state_t *afl, u8 success) {
 
   PyTuple_SetItem(py_args, 0, py_value);
 
-  py_value = PyObject_CallObject(afl->py_functions[PY_FUNC_POST_TRIM], py_args);
+  py_value = PyObject_CallObject(
+      ((py_mutator_t *)py_mutator)->py_functions[PY_FUNC_POST_TRIM], py_args);
   Py_DECREF(py_args);
 
   if (py_value != NULL) {
@@ -356,19 +467,21 @@ u32 post_trim_py(afl_state_t *afl, u8 success) {
 
 }
 
-void trim_py(afl_state_t *afl, u8 **out_buf, size_t *out_buf_size) {
+size_t trim_py(void *py_mutator, u8 **out_buf) {
 
   PyObject *py_args, *py_value;
+  size_t    ret;
 
   py_args = PyTuple_New(0);
-  py_value = PyObject_CallObject(afl->py_functions[PY_FUNC_TRIM], py_args);
+  py_value = PyObject_CallObject(
+      ((py_mutator_t *)py_mutator)->py_functions[PY_FUNC_TRIM], py_args);
   Py_DECREF(py_args);
 
   if (py_value != NULL) {
 
-    *out_buf_size = PyByteArray_Size(py_value);
-    *out_buf = malloc(*out_buf_size);
-    memcpy(*out_buf, PyByteArray_AsString(py_value), *out_buf_size);
+    ret = PyByteArray_Size(py_value);
+    *out_buf = ck_maybe_grow(BUF_PARAMS(trim), ret);
+    memcpy(*out_buf, PyByteArray_AsString(py_value), ret);
     Py_DECREF(py_value);
 
   } else {
@@ -378,17 +491,19 @@ void trim_py(afl_state_t *afl, u8 **out_buf, size_t *out_buf_size) {
 
   }
 
+  return ret;
+
 }
 
-size_t havoc_mutation_py(afl_state_t *afl, u8 **buf, size_t buf_size,
-                         size_t max_size) {
+size_t havoc_mutation_py(void *py_mutator, u8 *buf, size_t buf_size,
+                         u8 **out_buf, size_t max_size) {
 
   size_t    mutated_size;
   PyObject *py_args, *py_value;
   py_args = PyTuple_New(2);
 
   /* buf */
-  py_value = PyByteArray_FromStringAndSize(*buf, buf_size);
+  py_value = PyByteArray_FromStringAndSize(buf, buf_size);
   if (!py_value) {
 
     Py_DECREF(py_args);
@@ -413,17 +528,28 @@ size_t havoc_mutation_py(afl_state_t *afl, u8 **buf, size_t buf_size,
 
   PyTuple_SetItem(py_args, 1, py_value);
 
-  py_value =
-      PyObject_CallObject(afl->py_functions[PY_FUNC_HAVOC_MUTATION], py_args);
+  py_value = PyObject_CallObject(
+      ((py_mutator_t *)py_mutator)->py_functions[PY_FUNC_HAVOC_MUTATION],
+      py_args);
 
   Py_DECREF(py_args);
 
   if (py_value != NULL) {
 
     mutated_size = PyByteArray_Size(py_value);
-    if (buf_size < mutated_size) *buf = ck_realloc(*buf, mutated_size);
+    if (mutated_size <= buf_size) {
+
+      /* We reuse the input buf here. */
+      *out_buf = buf;
 
-    memcpy(*buf, PyByteArray_AsString(py_value), mutated_size);
+    } else {
+
+      /* A new buf is needed... */
+      *out_buf = ck_maybe_grow(BUF_PARAMS(havoc), mutated_size);
+
+    }
+
+    memcpy(*out_buf, PyByteArray_AsString(py_value), mutated_size);
 
     Py_DECREF(py_value);
     return mutated_size;
@@ -437,13 +563,15 @@ size_t havoc_mutation_py(afl_state_t *afl, u8 **buf, size_t buf_size,
 
 }
 
-u8 havoc_mutation_probability_py(afl_state_t *afl) {
+u8 havoc_mutation_probability_py(void *py_mutator) {
 
   PyObject *py_args, *py_value;
 
   py_args = PyTuple_New(0);
   py_value = PyObject_CallObject(
-      afl->py_functions[PY_FUNC_HAVOC_MUTATION_PROBABILITY], py_args);
+      ((py_mutator_t *)py_mutator)
+          ->py_functions[PY_FUNC_HAVOC_MUTATION_PROBABILITY],
+      py_args);
   Py_DECREF(py_args);
 
   if (py_value != NULL) {
@@ -461,7 +589,7 @@ u8 havoc_mutation_probability_py(afl_state_t *afl) {
 
 }
 
-u8 queue_get_py(afl_state_t *afl, const u8 *filename) {
+u8 queue_get_py(void *py_mutator, const u8 *filename) {
 
   PyObject *py_args, *py_value;
 
@@ -483,7 +611,8 @@ u8 queue_get_py(afl_state_t *afl, const u8 *filename) {
   PyTuple_SetItem(py_args, 0, py_value);
 
   // Call Python function
-  py_value = PyObject_CallObject(afl->py_functions[PY_FUNC_QUEUE_GET], py_args);
+  py_value = PyObject_CallObject(
+      ((py_mutator_t *)py_mutator)->py_functions[PY_FUNC_QUEUE_GET], py_args);
   Py_DECREF(py_args);
 
   if (py_value != NULL) {
@@ -509,7 +638,7 @@ u8 queue_get_py(afl_state_t *afl, const u8 *filename) {
 
 }
 
-void queue_new_entry_py(afl_state_t *afl, const u8 *filename_new_queue,
+void queue_new_entry_py(void *py_mutator, const u8 *filename_new_queue,
                         const u8 *filename_orig_queue) {
 
   PyObject *py_args, *py_value;
@@ -552,8 +681,9 @@ void queue_new_entry_py(afl_state_t *afl, const u8 *filename_new_queue,
   PyTuple_SetItem(py_args, 1, py_value);
 
   // Call
-  py_value =
-      PyObject_CallObject(afl->py_functions[PY_FUNC_QUEUE_NEW_ENTRY], py_args);
+  py_value = PyObject_CallObject(
+      ((py_mutator_t *)py_mutator)->py_functions[PY_FUNC_QUEUE_NEW_ENTRY],
+      py_args);
   Py_DECREF(py_args);
 
   if (py_value == NULL) {
@@ -565,5 +695,7 @@ void queue_new_entry_py(afl_state_t *afl, const u8 *filename_new_queue,
 
 }
 
+#undef BUF_PARAMS
+
 #endif                                                        /* USE_PYTHON */
 
diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c
index cfeab798..346c2639 100644
--- a/src/afl-fuzz-queue.c
+++ b/src/afl-fuzz-queue.c
@@ -23,6 +23,7 @@
  */
 
 #include "afl-fuzz.h"
+#include <limits.h>
 
 /* Mark deterministic checks as done for a particular queue entry. We use the
    .state file to avoid repeating deterministic fuzzing when resuming aborted
@@ -30,18 +31,16 @@
 
 void mark_as_det_done(afl_state_t *afl, struct queue_entry *q) {
 
-  u8 *fn = strrchr(q->fname, '/');
+  u8  fn[PATH_MAX];
   s32 fd;
 
-  fn = alloc_printf("%s/queue/.state/deterministic_done/%s", afl->out_dir,
-                    fn + 1);
+  snprintf(fn, PATH_MAX, "%s/queue/.state/deterministic_done/%s", afl->out_dir,
+           strrchr(q->fname, '/') + 1);
 
   fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
   if (fd < 0) PFATAL("Unable to create '%s'", fn);
   close(fd);
 
-  ck_free(fn);
-
   q->passed_det = 1;
 
 }
@@ -51,10 +50,13 @@ void mark_as_det_done(afl_state_t *afl, struct queue_entry *q) {
 
 void mark_as_variable(afl_state_t *afl, struct queue_entry *q) {
 
-  u8 *fn = strrchr(q->fname, '/') + 1, *ldest;
+  u8 fn[PATH_MAX];
+  u8 ldest[PATH_MAX];
+
+  u8 *fn_name = strrchr(q->fname, '/') + 1;
 
-  ldest = alloc_printf("../../%s", fn);
-  fn = alloc_printf("%s/queue/.state/variable_behavior/%s", afl->out_dir, fn);
+  sprintf(ldest, "../../%s", fn_name);
+  sprintf(fn, "%s/queue/.state/variable_behavior/%s", afl->out_dir, fn_name);
 
   if (symlink(ldest, fn)) {
 
@@ -64,9 +66,6 @@ void mark_as_variable(afl_state_t *afl, struct queue_entry *q) {
 
   }
 
-  ck_free(ldest);
-  ck_free(fn);
-
   q->var_behavior = 1;
 
 }
@@ -76,14 +75,14 @@ void mark_as_variable(afl_state_t *afl, struct queue_entry *q) {
 
 void mark_as_redundant(afl_state_t *afl, struct queue_entry *q, u8 state) {
 
-  u8 *fn;
+  u8 fn[PATH_MAX];
 
   if (state == q->fs_redundant) return;
 
   q->fs_redundant = state;
 
-  fn = strrchr(q->fname, '/');
-  fn = alloc_printf("%s/queue/.state/redundant_edges/%s", afl->out_dir, fn + 1);
+  sprintf(fn, "%s/queue/.state/redundant_edges/%s", afl->out_dir,
+          strrchr(q->fname, '/') + 1);
 
   if (state) {
 
@@ -99,8 +98,6 @@ void mark_as_redundant(afl_state_t *afl, struct queue_entry *q, u8 state) {
 
   }
 
-  ck_free(fn);
-
 }
 
 /* Append new test case to the queue. */
@@ -114,6 +111,7 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
   q->depth = afl->cur_depth + 1;
   q->passed_det = passed_det;
   q->n_fuzz = 1;
+  q->trace_mini = NULL;
 
   if (q->depth > afl->max_depth) afl->max_depth = q->depth;
 
@@ -147,7 +145,8 @@ void add_to_queue(afl_state_t *afl, u8 *fname, u32 len, u8 passed_det) {
     /* At the initialization stage, queue_cur is NULL */
     if (afl->queue_cur) fname_orig = afl->queue_cur->fname;
 
-    afl->mutator->afl_custom_queue_new_entry(afl, fname, fname_orig);
+    afl->mutator->afl_custom_queue_new_entry(afl->mutator->data, fname,
+                                             fname_orig);
 
   }
 
@@ -185,35 +184,50 @@ void destroy_queue(afl_state_t *afl) {
 void update_bitmap_score(afl_state_t *afl, struct queue_entry *q) {
 
   u32 i;
-  u64 fav_factor = q->exec_us * q->len;
-  u64 fuzz_p2 = next_p2(q->n_fuzz);
+  u64 fav_factor;
+  u64 fuzz_p2 = next_pow2(q->n_fuzz);
+
+  if (afl->schedule == MMOPT || afl->schedule == RARE ||
+      unlikely(afl->fixed_seed))
+    fav_factor = q->len << 2;
+  else
+    fav_factor = q->exec_us * q->len;
 
   /* For every byte set in afl->fsrv.trace_bits[], see if there is a previous
      winner, and how it compares to us. */
-
-  for (i = 0; i < MAP_SIZE; ++i)
+  for (i = 0; i < afl->fsrv.map_size; ++i)
 
     if (afl->fsrv.trace_bits[i]) {
 
       if (afl->top_rated[i]) {
 
         /* Faster-executing or smaller test cases are favored. */
-        u64 top_rated_fuzz_p2 = next_p2(afl->top_rated[i]->n_fuzz);
-        u64 top_rated_fav_factor =
-            afl->top_rated[i]->exec_us * afl->top_rated[i]->len;
+        u64 top_rated_fav_factor;
+        u64 top_rated_fuzz_p2 = next_pow2(afl->top_rated[i]->n_fuzz);
 
-        if (fuzz_p2 > top_rated_fuzz_p2) {
+        if (afl->schedule == MMOPT || afl->schedule == RARE ||
+            unlikely(afl->fixed_seed))
+          top_rated_fav_factor = afl->top_rated[i]->len << 2;
+        else
+          top_rated_fav_factor =
+              afl->top_rated[i]->exec_us * afl->top_rated[i]->len;
 
+        if (fuzz_p2 > top_rated_fuzz_p2)
           continue;
+        else if (fuzz_p2 == top_rated_fuzz_p2)
+          if (fav_factor > top_rated_fav_factor) continue;
 
-        } else if (fuzz_p2 == top_rated_fuzz_p2) {
+        if (afl->schedule == MMOPT || afl->schedule == RARE ||
+            unlikely(afl->fixed_seed)) {
 
-          if (fav_factor > top_rated_fav_factor) continue;
+          if (fav_factor > afl->top_rated[i]->len << 2) continue;
 
-        }
+        } else {
 
-        if (fav_factor > afl->top_rated[i]->exec_us * afl->top_rated[i]->len)
-          continue;
+          if (fav_factor > afl->top_rated[i]->exec_us * afl->top_rated[i]->len)
+            continue;
+
+        }
 
         /* Looks like we're going to win. Decrease ref count for the
            previous winner, discard its afl->fsrv.trace_bits[] if necessary. */
@@ -234,8 +248,10 @@ void update_bitmap_score(afl_state_t *afl, struct queue_entry *q) {
 
       if (!q->trace_mini) {
 
-        q->trace_mini = ck_alloc(MAP_SIZE >> 3);
-        minimize_bits(q->trace_mini, afl->fsrv.trace_bits);
+        u32 len = (afl->fsrv.map_size >> 3);
+        if (len == 0) len = 1;
+        q->trace_mini = ck_alloc(len);
+        minimize_bits(afl, q->trace_mini, afl->fsrv.trace_bits);
 
       }
 
@@ -254,14 +270,17 @@ void update_bitmap_score(afl_state_t *afl, struct queue_entry *q) {
 void cull_queue(afl_state_t *afl) {
 
   struct queue_entry *q;
-  static u8           temp_v[MAP_SIZE >> 3];
+  u32                 len = (afl->fsrv.map_size >> 3);
   u32                 i;
+  u8                  temp_v[MAP_SIZE >> 3];
+
+  if (len == 0) len = 1;
 
   if (afl->dumb_mode || !afl->score_changed) return;
 
   afl->score_changed = 0;
 
-  memset(temp_v, 255, MAP_SIZE >> 3);
+  memset(temp_v, 255, len);
 
   afl->queued_favored = 0;
   afl->pending_favored = 0;
@@ -278,10 +297,10 @@ void cull_queue(afl_state_t *afl) {
   /* Let's see if anything in the bitmap isn't captured in temp_v.
      If yes, and if it has a afl->top_rated[] contender, let's use it. */
 
-  for (i = 0; i < MAP_SIZE; ++i)
+  for (i = 0; i < afl->fsrv.map_size; ++i)
     if (afl->top_rated[i] && (temp_v[i >> 3] & (1 << (i & 7)))) {
 
-      u32 j = MAP_SIZE >> 3;
+      u32 j = len;
 
       /* Remove all bits belonging to the current entry from temp_v. */
 
@@ -328,7 +347,8 @@ u32 calculate_score(afl_state_t *afl, struct queue_entry *q) {
   // Longer execution time means longer work on the input, the deeper in
   // coverage, the better the fuzzing, right? -mh
 
-  if (afl->schedule != MMOPT) {
+  if (afl->schedule != MMOPT && afl->schedule != RARE &&
+      likely(!afl->fixed_seed)) {
 
     if (q->exec_us * 0.1 > avg_exec_us)
       perf_score = 10;
@@ -438,7 +458,7 @@ u32 calculate_score(afl_state_t *afl, struct queue_entry *q) {
       if (q->fuzz_level < 16)
         factor = ((u32)(1 << q->fuzz_level)) / (fuzz == 0 ? 1 : fuzz);
       else
-        factor = MAX_FACTOR / (fuzz == 0 ? 1 : next_p2(fuzz));
+        factor = MAX_FACTOR / (fuzz == 0 ? 1 : next_pow2(fuzz));
       break;
 
     case LIN: factor = q->fuzz_level / (fuzz == 0 ? 1 : fuzz); break;
@@ -448,8 +468,29 @@ u32 calculate_score(afl_state_t *afl, struct queue_entry *q) {
       break;
 
     case MMOPT:
+      /* -- this was a more complex setup, which is good, but competed with
+         -- rare. the simpler algo however is good when rare is not.
+        // the newer the entry, the higher the pref_score
+        perf_score *= (1 + (double)((double)q->depth /
+        (double)afl->queued_paths));
+        // with special focus on the last 8 entries
+        if (afl->max_depth - q->depth < 8) perf_score *= (1 + ((8 -
+        (afl->max_depth - q->depth)) / 5));
+      */
+      // put focus on the last 5 entries
+      if (afl->max_depth - q->depth < 5) perf_score *= 2;
+
+      break;
+
+    case RARE:
 
-      if (afl->max_depth - q->depth < 5) perf_score *= 1.5;
+      // increase the score for every bitmap byte for which this entry
+      // is the top contender
+      perf_score += (q->tc_ref * 10);
+      // the more often fuzz result paths are equal to this queue entry,
+      // reduce its value
+      perf_score *=
+          (1 - (double)((double)q->n_fuzz / (double)afl->total_execs));
 
       break;
 
diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c
index c8d54ce2..ba24890b 100644
--- a/src/afl-fuzz-redqueen.c
+++ b/src/afl-fuzz-redqueen.c
@@ -88,7 +88,7 @@ static u8 get_exec_checksum(afl_state_t *afl, u8 *buf, u32 len, u32 *cksum) {
 
   if (unlikely(common_fuzz_stuff(afl, buf, len))) return 1;
 
-  *cksum = hash32(afl->fsrv.trace_bits, MAP_SIZE, HASH_CONST);
+  *cksum = hash32(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
   return 0;
 
 }
@@ -97,7 +97,7 @@ static void rand_replace(afl_state_t *afl, u8 *buf, u32 len) {
 
   u32 i;
   for (i = 0; i < len; ++i)
-    buf[i] = UR(afl, 256);
+    buf[i] = rand_below(afl, 256);
 
 }
 
@@ -337,7 +337,7 @@ static void try_to_add_to_dict(afl_state_t *afl, u64 v, u8 shape) {
 
   }
 
-  maybe_add_auto(afl, (u8 *)&v, shape);
+  maybe_add_auto((u8 *)afl, (u8 *)&v, shape);
 
   u64 rev;
   switch (shape) {
@@ -345,15 +345,15 @@ static void try_to_add_to_dict(afl_state_t *afl, u64 v, u8 shape) {
     case 1: break;
     case 2:
       rev = SWAP16((u16)v);
-      maybe_add_auto(afl, (u8 *)&rev, shape);
+      maybe_add_auto((u8 *)afl, (u8 *)&rev, shape);
       break;
     case 4:
       rev = SWAP32((u32)v);
-      maybe_add_auto(afl, (u8 *)&rev, shape);
+      maybe_add_auto((u8 *)afl, (u8 *)&rev, shape);
       break;
     case 8:
       rev = SWAP64(v);
-      maybe_add_auto(afl, (u8 *)&rev, shape);
+      maybe_add_auto((u8 *)afl, (u8 *)&rev, shape);
       break;
 
   }
@@ -511,8 +511,8 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u32 len) {
     if (fails == 8) {
 
       if (afl->pass_stats[key].total == 0) {
-        maybe_add_auto(afl, o->v0, SHAPE_BYTES(h->shape));
-        maybe_add_auto(afl, o->v1, SHAPE_BYTES(h->shape));
+        maybe_add_auto((u8 *)afl, o->v0, SHAPE_BYTES(h->shape));
+        maybe_add_auto((u8 *)afl, o->v1, SHAPE_BYTES(h->shape));
       }
 
     }
@@ -569,9 +569,10 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len,
       afl->shm.cmp_map->headers[k].hits = 0;
     
     if (afl->shm.cmp_map->headers[k].type == CMP_TYPE_INS)
-      afl->stage_max += MIN(afl->shm.cmp_map->headers[k].hits, CMP_MAP_H);
+      afl->stage_max += MIN((u32)afl->shm.cmp_map->headers[k].hits, CMP_MAP_H);
     else
-      afl->stage_max += MIN(afl->shm.cmp_map->headers[k].hits, CMP_MAP_RTN_H);
+      afl->stage_max +=
+          MIN((u32)afl->shm.cmp_map->headers[k].hits, CMP_MAP_RTN_H);
 
   }
 
diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c
index cdec75e8..1ddd7e1a 100644
--- a/src/afl-fuzz-run.c
+++ b/src/afl-fuzz-run.c
@@ -27,78 +27,62 @@
 #include <sys/time.h>
 #include <signal.h>
 
-/* Execute target application, monitoring for timeouts. Return status
-   information. The called program will update afl->fsrv.trace_bits[]. */
-
-void timeout_handle(union sigval timer_data) {
+#include "cmplog.h"
 
-  pid_t child_pid = timer_data.sival_int;
-  if (child_pid > 0) kill(child_pid, SIGKILL);
-
-}
+/* Execute target application, monitoring for timeouts. Return status
+   information. The called program will update afl->fsrv->trace_bits. */
 
-u8 run_target(afl_state_t *afl, u32 timeout) {
+u8 run_target(afl_state_t *afl, afl_forkserver_t *fsrv, u32 timeout) {
 
   s32 res;
-  int sret;
-
-  fd_set readfds;
-
-  static struct timeval it;
-  static u32            prev_timed_out = 0;
+  u32 exec_ms;
 
   int status = 0;
   u32 tb4;
 
-  afl->fsrv.child_timed_out = 0;
+  fsrv->child_timed_out = 0;
 
-  /* After this memset, afl->fsrv.trace_bits[] are effectively volatile, so we
+  /* After this memset, fsrv->trace_bits[] are effectively volatile, so we
      must prevent any earlier operations from venturing into that
      territory. */
 
-  memset(afl->fsrv.trace_bits, 0, MAP_SIZE);
+  memset(fsrv->trace_bits, 0, fsrv->map_size);
 
   MEM_BARRIER();
-
+  
   /* we have the fork server (or faux server) up and running, so simply
       tell it to have at it, and then read back PID. */
 
-  if ((res = write(afl->fsrv.fsrv_ctl_fd, &prev_timed_out, 4)) != 4) {
+  if ((res = write(fsrv->fsrv_ctl_fd, &fsrv->prev_timed_out, 4)) != 4) {
 
     if (afl->stop_soon) return 0;
     RPFATAL(res, "Unable to request new process from fork server (OOM?)");
 
   }
 
-  if ((res = read(afl->fsrv.fsrv_st_fd, &afl->fsrv.child_pid, 4)) != 4) {
+  if ((res = read(fsrv->fsrv_st_fd, &fsrv->child_pid, 4)) != 4) {
 
     if (afl->stop_soon) return 0;
     RPFATAL(res, "Unable to request new process from fork server (OOM?)");
 
   }
 
-  if (afl->fsrv.child_pid <= 0) FATAL("Fork server is misbehaving (OOM?)");
+  if (fsrv->child_pid <= 0) FATAL("Fork server is misbehaving (OOM?)");
 
-  /* use select to monitor the forkserver for timeouts. */
+  exec_ms = read_timed(fsrv->fsrv_st_fd, &status, 4, timeout, &afl->stop_soon);
 
-  FD_ZERO(&readfds);
-  FD_SET(afl->fsrv.fsrv_st_fd, &readfds);
-  it.tv_sec = ((timeout) / 1000);
-  it.tv_usec = ((timeout) % 1000) * 1000;
-
-  sret = select(afl->fsrv.fsrv_st_fd + 1, &readfds, NULL, NULL, &it);
-
-  if (sret == 0) {
+  if (exec_ms > timeout) {
 
     /* If there was no response from forkserver after timeout seconds,
     we kill the child. The forkserver should inform us afterwards */
 
-    kill(afl->fsrv.child_pid, SIGKILL);
-    afl->fsrv.child_timed_out = 1;
+    kill(fsrv->child_pid, SIGKILL);
+    fsrv->child_timed_out = 1;
+    if (read(fsrv->fsrv_st_fd, &status, 4) < 4) exec_ms = 0;
 
   }
 
-  if ((res = read(afl->fsrv.fsrv_st_fd, &status, 4)) != 4) {
+  if (!exec_ms) {
 
     if (afl->stop_soon) return 0;
     SAYF("\n" cLRD "[-] " cRST
@@ -121,30 +105,30 @@ u8 run_target(afl_state_t *afl, u32 timeout) {
          "\n\n"
          "If all else fails you can disable the fork server via "
          "AFL_NO_FORKSRV=1.\n",
-         afl->fsrv.mem_limit);
+         fsrv->mem_limit);
     RPFATAL(res, "Unable to communicate with fork server");
 
   }
 
-  if (!WIFSTOPPED(status)) afl->fsrv.child_pid = 0;
+  if (!WIFSTOPPED(status)) fsrv->child_pid = 0;
 
   ++afl->total_execs;
 
-  /* Any subsequent operations on afl->fsrv.trace_bits must not be moved by the
-     compiler below this point. Past this location, afl->fsrv.trace_bits[]
+  /* Any subsequent operations on fsrv->trace_bits must not be moved by the
+     compiler below this point. Past this location, fsrv->trace_bits[]
      behave very normally and do not have to be treated as volatile. */
 
   MEM_BARRIER();
 
-  tb4 = *(u32 *)afl->fsrv.trace_bits;
+  tb4 = *(u32 *)fsrv->trace_bits;
 
 #ifdef WORD_SIZE_64
-  classify_counts((u64 *)afl->fsrv.trace_bits);
+  classify_counts(afl, (u64 *)fsrv->trace_bits);
 #else
-  classify_counts((u32 *)afl->fsrv.trace_bits);
+  classify_counts(afl, (u32 *)fsrv->trace_bits);
 #endif                                                     /* ^WORD_SIZE_64 */
 
-  prev_timed_out = afl->fsrv.child_timed_out;
+  fsrv->prev_timed_out = fsrv->child_timed_out;
 
   /* Report outcome to caller. */
 
@@ -152,7 +136,7 @@ u8 run_target(afl_state_t *afl, u32 timeout) {
 
     afl->kill_signal = WTERMSIG(status);
 
-    if (afl->fsrv.child_timed_out && afl->kill_signal == SIGKILL)
+    if (fsrv->child_timed_out && afl->kill_signal == SIGKILL)
       return FAULT_TMOUT;
 
     return FAULT_CRASH;
@@ -162,7 +146,7 @@ u8 run_target(afl_state_t *afl, u32 timeout) {
   /* A somewhat nasty hack for MSAN, which doesn't support abort_on_error and
      must use a special exit code. */
 
-  if (afl->fsrv.uses_asan && WEXITSTATUS(status) == MSAN_ERROR) {
+  if (fsrv->uses_asan && WEXITSTATUS(status) == MSAN_ERROR) {
 
     afl->kill_signal = 0;
     return FAULT_CRASH;
@@ -185,20 +169,16 @@ void write_to_testcase(afl_state_t *afl, void *mem, u32 len) {
   s32 fd = afl->fsrv.out_fd;
 
 #ifdef _AFL_DOCUMENT_MUTATIONS
-  s32   doc_fd;
-  char *fn = alloc_printf("%s/mutations/%09u:%s", afl->out_dir,
+  s32  doc_fd;
+  char fn[PATH_MAX];
+  snprintf(fn, PATH_MAX, ("%s/mutations/%09u:%s", afl->out_dir,
                           afl->document_counter++, describe_op(afl, 0));
-  if (fn != NULL) {
-
-    if ((doc_fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600)) >= 0) {
-
-      if (write(doc_fd, mem, len) != len)
-        PFATAL("write to mutation file failed: %s", fn);
-      close(doc_fd);
 
-    }
+  if ((doc_fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600)) >= 0) {
 
-    ck_free(fn);
+    if (write(doc_fd, mem, len) != len)
+      PFATAL("write to mutation file failed: %s", fn);
+    close(doc_fd);
 
   }
 
@@ -223,16 +203,22 @@ void write_to_testcase(afl_state_t *afl, void *mem, u32 len) {
 
     lseek(fd, 0, SEEK_SET);
 
-  if (afl->mutator && afl->mutator->afl_custom_pre_save) {
+  if (unlikely(afl->mutator && afl->mutator->afl_custom_pre_save)) {
+
+    u8 *new_buf = NULL;
+
+    size_t new_size = afl->mutator->afl_custom_pre_save(afl->mutator->data, mem,
+                                                        len, &new_buf);
+
+    if (unlikely(!new_buf))
+      FATAL("Custom_pre_save failed (ret: %lu)", (long unsigned)new_size);
 
-    u8 *   new_data;
-    size_t new_size =
-        afl->mutator->afl_custom_pre_save(afl, mem, len, &new_data);
-    ck_write(fd, new_data, new_size, afl->fsrv.out_file);
-    ck_free(new_data);
+    /* everything as planned. use the new data. */
+    ck_write(fd, new_buf, new_size, afl->fsrv.out_file);
 
   } else {
 
+    /* boring uncustom. */
     ck_write(fd, mem, len, afl->fsrv.out_file);
 
   }
@@ -299,8 +285,6 @@ static void write_with_gap(afl_state_t *afl, void *mem, u32 len, u32 skip_at,
 u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem,
                   u32 handicap, u8 from_queue) {
 
-  static u8 first_trace[MAP_SIZE];
-
   u8 fault = 0, new_bits = 0, var_detected = 0,
      first_run = (q->exec_cksum == 0);
 
@@ -326,12 +310,22 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem,
   /* Make sure the forkserver is up before we do anything, and let's not
      count its spin-up time toward binary calibration. */
 
-  if (!afl->fsrv.fsrv_pid) afl_fsrv_start(&afl->fsrv, afl->argv);
-  if (afl->dumb_mode != 1 && !afl->no_forkserver && !afl->cmplog_fsrv_pid &&
-      afl->shm.cmplog_mode)
-    init_cmplog_forkserver(afl);
+  if (!afl->fsrv.fsrv_pid) {
+
+    if (afl->fsrv.cmplog_binary &&
+        afl->fsrv.init_child_func != cmplog_exec_child) {
+
+      FATAL("BUG in afl-fuzz detected. Cmplog mode not set correctly.");
+
+    }
+
+    afl_fsrv_start(&afl->fsrv, afl->argv, &afl->stop_soon,
+                   afl->afl_env.afl_debug_child_output);
+
+  }
 
-  if (q->exec_cksum) memcpy(first_trace, afl->fsrv.trace_bits, MAP_SIZE);
+  if (q->exec_cksum)
+    memcpy(afl->first_trace, afl->fsrv.trace_bits, afl->fsrv.map_size);
 
   start_us = get_cur_time_us();
 
@@ -344,7 +338,7 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem,
 
     write_to_testcase(afl, use_mem, q->len);
 
-    fault = run_target(afl, use_tmout);
+    fault = run_target(afl, &afl->fsrv, use_tmout);
 
     /* afl->stop_soon is set by the handler for Ctrl+C. When it's pressed,
        we want to bail out quickly. */
@@ -352,14 +346,14 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem,
     if (afl->stop_soon || fault != afl->crash_mode) goto abort_calibration;
 
     if (!afl->dumb_mode && !afl->stage_cur &&
-        !count_bytes(afl->fsrv.trace_bits)) {
+        !count_bytes(afl, afl->fsrv.trace_bits)) {
 
       fault = FAULT_NOINST;
       goto abort_calibration;
 
     }
 
-    cksum = hash32(afl->fsrv.trace_bits, MAP_SIZE, HASH_CONST);
+    cksum = hash32(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
 
     if (q->exec_cksum != cksum) {
 
@@ -370,23 +364,21 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem,
 
         u32 i;
 
-        for (i = 0; i < MAP_SIZE; ++i) {
-
-          if (!afl->var_bytes[i] && first_trace[i] != afl->fsrv.trace_bits[i]) {
+        for (i = 0; i < afl->fsrv.map_size; ++i) {
 
+          if (unlikely(!afl->var_bytes[i]) &&
+              unlikely(afl->first_trace[i] != afl->fsrv.trace_bits[i]))
             afl->var_bytes[i] = 1;
-            afl->stage_max = CAL_CYCLES_LONG;
-
-          }
 
         }
 
         var_detected = 1;
+        afl->stage_max = CAL_CYCLES_LONG;
 
       } else {
 
         q->exec_cksum = cksum;
-        memcpy(first_trace, afl->fsrv.trace_bits, MAP_SIZE);
+        memcpy(afl->first_trace, afl->fsrv.trace_bits, afl->fsrv.map_size);
 
       }
 
@@ -403,7 +395,7 @@ u8 calibrate_case(afl_state_t *afl, struct queue_entry *q, u8 *use_mem,
      This is used for fuzzing air time calculations in calculate_score(). */
 
   q->exec_us = (stop_us - start_us) / afl->stage_max;
-  q->bitmap_size = count_bytes(afl->fsrv.trace_bits);
+  q->bitmap_size = count_bytes(afl, afl->fsrv.trace_bits);
   q->handicap = handicap;
   q->cal_failed = 0;
 
@@ -431,7 +423,7 @@ abort_calibration:
 
   if (var_detected) {
 
-    afl->var_byte_count = count_bytes(afl->var_bytes);
+    afl->var_byte_count = count_bytes(afl, afl->var_bytes);
 
     if (!q->var_behavior) {
 
@@ -471,8 +463,6 @@ void sync_fuzzers(afl_state_t *afl) {
 
   while ((sd_ent = readdir(sd))) {
 
-    static u8 stage_tmp[128];
-
     DIR *          qd;
     struct dirent *qd_ent;
     u8 *           qd_path, *qd_synced_path;
@@ -511,13 +501,14 @@ void sync_fuzzers(afl_state_t *afl) {
 
     /* Show stats */
 
-    sprintf(stage_tmp, "sync %u", ++sync_cnt);
-    afl->stage_name = stage_tmp;
+    snprintf(afl->stage_name_buf, STAGE_BUF_SIZE, "sync %u", ++sync_cnt);
+
+    afl->stage_name = afl->stage_name_buf;
     afl->stage_cur = 0;
     afl->stage_max = 0;
 
-    /* For every file queued by this fuzzer, parse ID and see if we have looked
-       at it before; exec a test case if not. */
+    /* For every file queued by this fuzzer, parse ID and see if we have
+       looked at it before; exec a test case if not. */
 
     while ((qd_ent = readdir(qd))) {
 
@@ -564,7 +555,7 @@ void sync_fuzzers(afl_state_t *afl) {
 
         write_to_testcase(afl, mem, st.st_size);
 
-        fault = run_target(afl, afl->fsrv.exec_tmout);
+        fault = run_target(afl, &afl->fsrv, afl->fsrv.exec_tmout);
 
         if (afl->stop_soon) goto close_sync;
 
@@ -608,26 +599,25 @@ u8 trim_case(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
   if (afl->mutator && afl->mutator->afl_custom_trim)
     return trim_case_custom(afl, q, in_buf);
 
-  static u8 tmp[64];
-  static u8 clean_trace[MAP_SIZE];
-
   u8  needs_write = 0, fault = 0;
   u32 trim_exec = 0;
   u32 remove_len;
   u32 len_p2;
 
+  u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
+
   /* Although the trimmer will be less useful when variable behavior is
      detected, it will still work to some extent, so we don't check for
      this. */
 
   if (q->len < 5) return 0;
 
-  afl->stage_name = tmp;
+  afl->stage_name = afl->stage_name_buf;
   afl->bytes_trim_in += q->len;
 
   /* Select initial chunk len, starting with large steps. */
 
-  len_p2 = next_p2(q->len);
+  len_p2 = next_pow2(q->len);
 
   remove_len = MAX(len_p2 / TRIM_START_STEPS, TRIM_MIN_BYTES);
 
@@ -638,7 +628,9 @@ u8 trim_case(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
 
     u32 remove_pos = remove_len;
 
-    sprintf(tmp, "trim %s/%s", DI(remove_len), DI(remove_len));
+    sprintf(afl->stage_name_buf, "trim %s/%s",
+            u_stringify_int(val_bufs[0], remove_len),
+            u_stringify_int(val_bufs[1], remove_len));
 
     afl->stage_cur = 0;
     afl->stage_max = q->len / remove_len;
@@ -650,14 +642,15 @@ u8 trim_case(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
 
       write_with_gap(afl, in_buf, q->len, remove_pos, trim_avail);
 
-      fault = run_target(afl, afl->fsrv.exec_tmout);
+      fault = run_target(afl, &afl->fsrv, afl->fsrv.exec_tmout);
       ++afl->trim_execs;
 
       if (afl->stop_soon || fault == FAULT_ERROR) goto abort_trimming;
 
-      /* Note that we don't keep track of crashes or hangs here; maybe TODO? */
+      /* Note that we don't keep track of crashes or hangs here; maybe TODO?
+       */
 
-      cksum = hash32(afl->fsrv.trace_bits, MAP_SIZE, HASH_CONST);
+      cksum = hash32(afl->fsrv.trace_bits, afl->fsrv.map_size, HASH_CONST);
 
       /* If the deletion had no impact on the trace, make it permanent. This
          isn't perfect for variable-path inputs, but we're just making a
@@ -669,7 +662,7 @@ u8 trim_case(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
         u32 move_tail = q->len - remove_pos - trim_avail;
 
         q->len -= trim_avail;
-        len_p2 = next_p2(q->len);
+        len_p2 = next_pow2(q->len);
 
         memmove(in_buf + remove_pos, in_buf + remove_pos + trim_avail,
                 move_tail);
@@ -680,7 +673,7 @@ u8 trim_case(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
         if (!needs_write) {
 
           needs_write = 1;
-          memcpy(clean_trace, afl->fsrv.trace_bits, MAP_SIZE);
+          memcpy(afl->clean_trace, afl->fsrv.trace_bits, afl->fsrv.map_size);
 
         }
 
@@ -722,7 +715,7 @@ u8 trim_case(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
     ck_write(fd, in_buf, q->len, q->fname);
     close(fd);
 
-    memcpy(afl->fsrv.trace_bits, clean_trace, MAP_SIZE);
+    memcpy(afl->fsrv.trace_bits, afl->clean_trace, afl->fsrv.map_size);
     update_bitmap_score(afl, q);
 
   }
@@ -744,14 +737,19 @@ u8 common_fuzz_stuff(afl_state_t *afl, u8 *out_buf, u32 len) {
 
   if (afl->post_handler) {
 
-    out_buf = afl->post_handler(out_buf, &len);
-    if (!out_buf || !len) return 0;
+    u8 *post_buf = NULL;
+
+    size_t post_len =
+        afl->post_handler(afl->post_data, out_buf, len, &post_buf);
+    if (!post_buf || !post_len) return 0;
+    out_buf = post_buf;
+    len = post_len;
 
   }
 
   write_to_testcase(afl, out_buf, len);
 
-  fault = run_target(afl, afl->fsrv.exec_tmout);
+  fault = run_target(afl, &afl->fsrv, afl->fsrv.exec_tmout);
 
   if (afl->stop_soon) return 1;
 
diff --git a/src/afl-fuzz-globals.c b/src/afl-fuzz-state.c
index efffa749..f58345fb 100644
--- a/src/afl-fuzz-globals.c
+++ b/src/afl-fuzz-state.c
@@ -30,10 +30,9 @@ s8  interesting_8[] = {INTERESTING_8};
 s16 interesting_16[] = {INTERESTING_8, INTERESTING_16};
 s32 interesting_32[] = {INTERESTING_8, INTERESTING_16, INTERESTING_32};
 
-char *power_names[POWER_SCHEDULES_NUM] = {"explore", "fast",    "coe",  "lin",
-                                          "quad",    "exploit", "mmopt"};
+char *power_names[POWER_SCHEDULES_NUM] = {
 
-u8 *doc_path = NULL;                    /* gath to documentation dir        */
+    "explore", "fast", "coe", "lin", "quad", "exploit", "mmopt", "rare"};
 
 /* Initialize MOpt "globals" for this afl state */
 
@@ -78,6 +77,10 @@ list_t afl_states = {.element_prealloc_count = 0};
 
 void afl_state_init(afl_state_t *afl) {
 
+  /* thanks to this memset, growing vars like out_buf
+  and out_size are NULL/0 by default. */
+  memset(afl, 0, sizeof(afl_state_t));
+
   afl->w_init = 0.9;
   afl->w_end = 0.3;
   afl->g_max = 5000;
@@ -96,6 +99,10 @@ void afl_state_init(afl_state_t *afl) {
 
   afl->fsrv.use_stdin = 1;
 
+  afl->fsrv.map_size = MAP_SIZE;
+  afl->fsrv.function_opt = (u8 *)afl;
+  afl->fsrv.function_ptr = &maybe_add_auto;
+
   afl->cal_cycles = CAL_CYCLES;
   afl->cal_cycles_long = CAL_CYCLES_LONG;
 
@@ -114,6 +121,29 @@ void afl_state_init(afl_state_t *afl) {
   afl->fsrv.child_pid = -1;
   afl->fsrv.out_dir_fd = -1;
 
+  afl->cmplog_prev_timed_out = 0;
+
+  /* statis file */
+  afl->last_bitmap_cvg = 0;
+  afl->last_stability = 0;
+  afl->last_eps = 0;
+
+  /* plot file saves from last run */
+  afl->plot_prev_qp = 0;
+  afl->plot_prev_pf = 0;
+  afl->plot_prev_pnf = 0;
+  afl->plot_prev_ce = 0;
+  afl->plot_prev_md = 0;
+  afl->plot_prev_qc = 0;
+  afl->plot_prev_uc = 0;
+  afl->plot_prev_uh = 0;
+
+  afl->stats_last_stats_ms = 0;
+  afl->stats_last_plot_ms = 0;
+  afl->stats_last_ms = 0;
+  afl->stats_last_execs = 0;
+  afl->stats_avg_exec = -1;
+
   init_mopt_globals(afl);
 
   list_append(&afl_states, afl);
@@ -254,6 +284,13 @@ void read_afl_environment(afl_state_t *afl, char **envp) {
             afl->afl_env.afl_autoresume =
                 get_afl_env(afl_environment_variables[i]) ? 1 : 0;
 
+          } else if (!strncmp(env, "AFL_CAL_FAST",
+
+                              afl_environment_variable_len)) {
+
+            afl->afl_env.afl_cal_fast =
+                get_afl_env(afl_environment_variables[i]) ? 1 : 0;
+
           } else if (!strncmp(env, "AFL_TMPDIR",
 
                               afl_environment_variable_len)) {
@@ -321,6 +358,17 @@ void read_afl_environment(afl_state_t *afl, char **envp) {
 
 void afl_state_deinit(afl_state_t *afl) {
 
+  if (afl->post_deinit) afl->post_deinit(afl->post_data);
+  if (afl->in_place_resume) ck_free(afl->in_dir);
+  if (afl->sync_id) ck_free(afl->out_dir);
+
+  free(afl->out_buf);
+  free(afl->out_scratch_buf);
+  free(afl->eff_buf);
+  free(afl->in_buf);
+  free(afl->in_scratch_buf);
+  free(afl->ex_buf);
+
   list_remove(&afl_states, afl);
 
 }
diff --git a/src/afl-fuzz-stats.c b/src/afl-fuzz-stats.c
index 63cca14d..2e680dbb 100644
--- a/src/afl-fuzz-stats.c
+++ b/src/afl-fuzz-stats.c
@@ -24,25 +24,27 @@
  */
 
 #include "afl-fuzz.h"
+#include <limits.h>
 
 /* Update stats file for unattended monitoring. */
 
 void write_stats_file(afl_state_t *afl, double bitmap_cvg, double stability,
                       double eps) {
 
-  static double        last_bcvg, last_stab, last_eps;
-  static struct rusage rus;
+  struct rusage rus;
 
-  u8 *  fn = alloc_printf("%s/fuzzer_stats", afl->out_dir);
-  s32   fd;
-  FILE *f;
+  unsigned long long int cur_time = get_cur_time();
+  u8                     fn[PATH_MAX];
+  s32                    fd;
+  FILE *                 f;
+  uint32_t               t_bytes = count_non_255_bytes(afl, afl->virgin_bits);
+
+  snprintf(fn, PATH_MAX, "%s/fuzzer_stats", afl->out_dir);
 
   fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
 
   if (fd < 0) PFATAL("Unable to create '%s'", fn);
 
-  ck_free(fn);
-
   f = fdopen(fd, "w");
 
   if (!f) PFATAL("fdopen() failed");
@@ -52,15 +54,15 @@ void write_stats_file(afl_state_t *afl, double bitmap_cvg, double stability,
 
   if (!bitmap_cvg && !stability && !eps) {
 
-    bitmap_cvg = last_bcvg;
-    stability = last_stab;
-    eps = last_eps;
+    bitmap_cvg = afl->last_bitmap_cvg;
+    stability = afl->last_stability;
+    eps = afl->last_eps;
 
   } else {
 
-    last_bcvg = bitmap_cvg;
-    last_stab = stability;
-    last_eps = eps;
+    afl->last_bitmap_cvg = bitmap_cvg;
+    afl->last_stability = stability;
+    afl->last_eps = eps;
 
   }
 
@@ -70,8 +72,10 @@ void write_stats_file(afl_state_t *afl, double bitmap_cvg, double stability,
       f,
       "start_time        : %llu\n"
       "last_update       : %llu\n"
+      "run_time          : %llu\n"
       "fuzzer_pid        : %d\n"
       "cycles_done       : %llu\n"
+      "cycles_wo_finds   : %llu\n"
       "execs_done        : %llu\n"
       "execs_per_sec     : %0.02f\n"
       //          "real_execs_per_sec: %0.02f\n"  // damn the name is too long
@@ -93,17 +97,20 @@ void write_stats_file(afl_state_t *afl, double bitmap_cvg, double stability,
       "last_hang         : %llu\n"
       "execs_since_crash : %llu\n"
       "exec_timeout      : %u\n"
-      "slowest_exec_ms   : %llu\n"
+      "slowest_exec_ms   : %u\n"
       "peak_rss_mb       : %lu\n"
+      "edges_found       : %u\n"
+      "var_byte_count    : %u\n"
       "afl_banner        : %s\n"
       "afl_version       : " VERSION
       "\n"
       "target_mode       : %s%s%s%s%s%s%s%s\n"
       "command_line      : %s\n",
-      afl->start_time / 1000, get_cur_time() / 1000, getpid(),
-      afl->queue_cycle ? (afl->queue_cycle - 1) : 0, afl->total_execs,
-      /*eps,*/ afl->total_execs /
-          ((double)(get_cur_time() - afl->start_time) / 1000),
+      afl->start_time / 1000, cur_time / 1000,
+      (cur_time - afl->start_time) / 1000, getpid(),
+      afl->queue_cycle ? (afl->queue_cycle - 1) : 0, afl->cycles_wo_finds,
+      afl->total_execs,
+      afl->total_execs / ((double)(get_cur_time() - afl->start_time) / 1000),
       afl->queued_paths, afl->queued_favored, afl->queued_discovered,
       afl->queued_imported, afl->max_depth, afl->current_entry,
       afl->pending_favored, afl->pending_not_fuzzed, afl->queued_variable,
@@ -116,12 +123,13 @@ void write_stats_file(afl_state_t *afl, double bitmap_cvg, double stability,
 #else
       (unsigned long int)(rus.ru_maxrss >> 10),
 #endif
-      afl->use_banner, afl->unicorn_mode ? "unicorn" : "",
-      afl->qemu_mode ? "qemu " : "", afl->dumb_mode ? " dumb " : "",
-      afl->no_forkserver ? "no_fsrv " : "", afl->crash_mode ? "crash " : "",
+      t_bytes, afl->var_byte_count, afl->use_banner,
+      afl->unicorn_mode ? "unicorn" : "", afl->fsrv.qemu_mode ? "qemu " : "",
+      afl->dumb_mode ? " dumb " : "", afl->no_forkserver ? "no_fsrv " : "",
+      afl->crash_mode ? "crash " : "",
       afl->persistent_mode ? "persistent " : "",
       afl->deferred_mode ? "deferred " : "",
-      (afl->unicorn_mode || afl->qemu_mode || afl->dumb_mode ||
+      (afl->unicorn_mode || afl->fsrv.qemu_mode || afl->dumb_mode ||
        afl->no_forkserver || afl->crash_mode || afl->persistent_mode ||
        afl->deferred_mode)
           ? ""
@@ -137,23 +145,24 @@ void write_stats_file(afl_state_t *afl, double bitmap_cvg, double stability,
 
 void maybe_update_plot_file(afl_state_t *afl, double bitmap_cvg, double eps) {
 
-  static u32 prev_qp, prev_pf, prev_pnf, prev_ce, prev_md;
-  static u64 prev_qc, prev_uc, prev_uh;
-
-  if (prev_qp == afl->queued_paths && prev_pf == afl->pending_favored &&
-      prev_pnf == afl->pending_not_fuzzed && prev_ce == afl->current_entry &&
-      prev_qc == afl->queue_cycle && prev_uc == afl->unique_crashes &&
-      prev_uh == afl->unique_hangs && prev_md == afl->max_depth)
+  if (afl->plot_prev_qp == afl->queued_paths &&
+      afl->plot_prev_pf == afl->pending_favored &&
+      afl->plot_prev_pnf == afl->pending_not_fuzzed &&
+      afl->plot_prev_ce == afl->current_entry &&
+      afl->plot_prev_qc == afl->queue_cycle &&
+      afl->plot_prev_uc == afl->unique_crashes &&
+      afl->plot_prev_uh == afl->unique_hangs &&
+      afl->plot_prev_md == afl->max_depth)
     return;
 
-  prev_qp = afl->queued_paths;
-  prev_pf = afl->pending_favored;
-  prev_pnf = afl->pending_not_fuzzed;
-  prev_ce = afl->current_entry;
-  prev_qc = afl->queue_cycle;
-  prev_uc = afl->unique_crashes;
-  prev_uh = afl->unique_hangs;
-  prev_md = afl->max_depth;
+  afl->plot_prev_qp = afl->queued_paths;
+  afl->plot_prev_pf = afl->pending_favored;
+  afl->plot_prev_pnf = afl->pending_not_fuzzed;
+  afl->plot_prev_ce = afl->current_entry;
+  afl->plot_prev_qc = afl->queue_cycle;
+  afl->plot_prev_uc = afl->unique_crashes;
+  afl->plot_prev_uh = afl->unique_hangs;
+  afl->plot_prev_md = afl->max_depth;
 
   /* Fields in the file:
 
@@ -192,21 +201,47 @@ static void check_term_size(afl_state_t *afl) {
 
 void show_stats(afl_state_t *afl) {
 
-  static u64    last_stats_ms, last_plot_ms, last_ms, last_execs;
-  static double avg_exec;
-  double        t_byte_ratio, stab_ratio;
+  double t_byte_ratio, stab_ratio;
 
   u64 cur_ms;
   u32 t_bytes, t_bits;
 
   u32 banner_len, banner_pad;
   u8  tmp[256];
+  u8  time_tmp[64];
+
+  u8 val_buf[8][STRINGIFY_VAL_SIZE_MAX];
+#define IB(i) (val_buf[(i)])
 
   cur_ms = get_cur_time();
 
+  if (afl->most_time_key) {
+
+    if (afl->most_time * 1000 < cur_ms - afl->start_time) {
+
+      afl->most_time_key = 2;
+      afl->stop_soon = 2;
+
+    }
+
+  }
+
+  if (afl->most_execs_key == 1) {
+
+    if (afl->most_execs <= afl->total_execs) {
+
+      afl->most_execs_key = 2;
+      afl->stop_soon = 2;
+
+    }
+
+  }
+
   /* If not enough time has passed since last UI update, bail out. */
 
-  if (cur_ms - last_ms < 1000 / UI_TARGET_HZ && !afl->force_ui_update) return;
+  if (cur_ms - afl->stats_last_ms < 1000 / UI_TARGET_HZ &&
+      !afl->force_ui_update)
+    return;
 
   /* Check if we're past the 10 minute mark. */
 
@@ -214,49 +249,51 @@ void show_stats(afl_state_t *afl) {
 
   /* Calculate smoothed exec speed stats. */
 
-  if (!last_execs) {
+  if (!afl->stats_last_execs) {
 
-    avg_exec = ((double)afl->total_execs) * 1000 / (cur_ms - afl->start_time);
+    afl->stats_avg_exec =
+        ((double)afl->total_execs) * 1000 / (cur_ms - afl->start_time);
 
   } else {
 
-    double cur_avg =
-        ((double)(afl->total_execs - last_execs)) * 1000 / (cur_ms - last_ms);
+    double cur_avg = ((double)(afl->total_execs - afl->stats_last_execs)) *
+                     1000 / (cur_ms - afl->stats_last_ms);
 
     /* If there is a dramatic (5x+) jump in speed, reset the indicator
        more quickly. */
 
-    if (cur_avg * 5 < avg_exec || cur_avg / 5 > avg_exec) avg_exec = cur_avg;
+    if (cur_avg * 5 < afl->stats_avg_exec || cur_avg / 5 > afl->stats_avg_exec)
+      afl->stats_avg_exec = cur_avg;
 
-    avg_exec = avg_exec * (1.0 - 1.0 / AVG_SMOOTHING) +
-               cur_avg * (1.0 / AVG_SMOOTHING);
+    afl->stats_avg_exec = afl->stats_avg_exec * (1.0 - 1.0 / AVG_SMOOTHING) +
+                          cur_avg * (1.0 / AVG_SMOOTHING);
 
   }
 
-  last_ms = cur_ms;
-  last_execs = afl->total_execs;
+  afl->stats_last_ms = cur_ms;
+  afl->stats_last_execs = afl->total_execs;
 
   /* Tell the callers when to contact us (as measured in execs). */
 
-  afl->stats_update_freq = avg_exec / (UI_TARGET_HZ * 10);
+  afl->stats_update_freq = afl->stats_avg_exec / (UI_TARGET_HZ * 10);
   if (!afl->stats_update_freq) afl->stats_update_freq = 1;
 
   /* Do some bitmap stats. */
 
-  t_bytes = count_non_255_bytes(afl->virgin_bits);
-  t_byte_ratio = ((double)t_bytes * 100) / MAP_SIZE;
+  t_bytes = count_non_255_bytes(afl, afl->virgin_bits);
+  t_byte_ratio = ((double)t_bytes * 100) / afl->fsrv.map_size;
 
-  if (t_bytes)
-    stab_ratio = 100 - ((double)afl->var_byte_count) * 100 / t_bytes;
+  if (likely(t_bytes) && unlikely(afl->var_byte_count))
+    stab_ratio = 100 - (((double)afl->var_byte_count * 100) / t_bytes);
   else
     stab_ratio = 100;
 
   /* Roughly every minute, update fuzzer stats and save auto tokens. */
 
-  if (cur_ms - last_stats_ms > STATS_UPDATE_SEC * 1000) {
+  if (cur_ms - afl->stats_last_stats_ms > STATS_UPDATE_SEC * 1000) {
 
-    last_stats_ms = cur_ms;
-    write_stats_file(afl, t_byte_ratio, stab_ratio, avg_exec);
+    afl->stats_last_stats_ms = cur_ms;
+    write_stats_file(afl, t_byte_ratio, stab_ratio, afl->stats_avg_exec);
     save_auto(afl);
     write_bitmap(afl);
 
@@ -264,10 +301,10 @@ void show_stats(afl_state_t *afl) {
 
   /* Every now and then, write plot data. */
 
-  if (cur_ms - last_plot_ms > PLOT_UPDATE_SEC * 1000) {
+  if (cur_ms - afl->stats_last_plot_ms > PLOT_UPDATE_SEC * 1000) {
 
-    last_plot_ms = cur_ms;
-    maybe_update_plot_file(afl, t_byte_ratio, avg_exec);
+    afl->stats_last_plot_ms = cur_ms;
+    maybe_update_plot_file(afl, t_byte_ratio, afl->stats_avg_exec);
 
   }
 
@@ -290,7 +327,7 @@ void show_stats(afl_state_t *afl) {
 
   /* Compute some mildly useful bitmap stats. */
 
-  t_bits = (MAP_SIZE << 3) - count_bits(afl->virgin_bits);
+  t_bits = (afl->fsrv.map_size << 3) - count_bits(afl, afl->virgin_bits);
 
   /* Now, for the visuals... */
 
@@ -384,9 +421,10 @@ void show_stats(afl_state_t *afl) {
 
   }
 
+  u_stringify_time_diff(time_tmp, cur_ms, afl->start_time);
   SAYF(bV bSTOP "        run time : " cRST "%-33s " bSTG bV bSTOP
                 "  cycles done : %s%-5s " bSTG              bV "\n",
-       DTD(cur_ms, afl->start_time), tmp, DI(afl->queue_cycle - 1));
+       time_tmp, tmp, u_stringify_int(IB(0), afl->queue_cycle - 1));
 
   /* We want to warn people about not seeing new paths after a full cycle,
      except when resuming fuzzing or running in non-instrumented mode. */
@@ -395,8 +433,8 @@ void show_stats(afl_state_t *afl) {
       (afl->last_path_time || afl->resuming_fuzz || afl->queue_cycle == 1 ||
        afl->in_bitmap || afl->crash_mode)) {
 
-    SAYF(bV bSTOP "   last new path : " cRST "%-33s ",
-         DTD(cur_ms, afl->last_path_time));
+    u_stringify_time_diff(time_tmp, cur_ms, afl->last_path_time);
+    SAYF(bV bSTOP "   last new path : " cRST "%-33s ", time_tmp);
 
   } else {
 
@@ -413,25 +451,26 @@ void show_stats(afl_state_t *afl) {
   }
 
   SAYF(bSTG bV bSTOP "  total paths : " cRST "%-5s " bSTG bV "\n",
-       DI(afl->queued_paths));
+       u_stringify_int(IB(0), afl->queued_paths));
 
   /* Highlight crashes in red if found, denote going over the KEEP_UNIQUE_CRASH
      limit with a '+' appended to the count. */
 
-  sprintf(tmp, "%s%s", DI(afl->unique_crashes),
+  sprintf(tmp, "%s%s", u_stringify_int(IB(0), afl->unique_crashes),
           (afl->unique_crashes >= KEEP_UNIQUE_CRASH) ? "+" : "");
 
+  u_stringify_time_diff(time_tmp, cur_ms, afl->last_crash_time);
   SAYF(bV bSTOP " last uniq crash : " cRST "%-33s " bSTG bV bSTOP
                 " uniq crashes : %s%-6s" bSTG               bV "\n",
-       DTD(cur_ms, afl->last_crash_time), afl->unique_crashes ? cLRD : cRST,
-       tmp);
+       time_tmp, afl->unique_crashes ? cLRD : cRST, tmp);
 
-  sprintf(tmp, "%s%s", DI(afl->unique_hangs),
+  sprintf(tmp, "%s%s", u_stringify_int(IB(0), afl->unique_hangs),
           (afl->unique_hangs >= KEEP_UNIQUE_HANG) ? "+" : "");
 
+  u_stringify_time_diff(time_tmp, cur_ms, afl->last_hang_time);
   SAYF(bV bSTOP "  last uniq hang : " cRST "%-33s " bSTG bV bSTOP
                 "   uniq hangs : " cRST "%-6s" bSTG         bV "\n",
-       DTD(cur_ms, afl->last_hang_time), tmp);
+       time_tmp, tmp);
 
   SAYF(bVR bH bSTOP            cCYA
        " cycle progress " bSTG bH10 bH5 bH2 bH2 bHB bH bSTOP cCYA
@@ -441,21 +480,22 @@ void show_stats(afl_state_t *afl) {
      together, but then cram them into a fixed-width field - so we need to
      put them in a temporary buffer first. */
 
-  sprintf(tmp, "%s%s%u (%0.01f%%)", DI(afl->current_entry),
+  sprintf(tmp, "%s%s%u (%0.01f%%)", u_stringify_int(IB(0), afl->current_entry),
           afl->queue_cur->favored ? "." : "*", afl->queue_cur->fuzz_level,
           ((double)afl->current_entry * 100) / afl->queued_paths);
 
   SAYF(bV bSTOP "  now processing : " cRST "%-16s " bSTG bV bSTOP, tmp);
 
   sprintf(tmp, "%0.02f%% / %0.02f%%",
-          ((double)afl->queue_cur->bitmap_size) * 100 / MAP_SIZE, t_byte_ratio);
+          ((double)afl->queue_cur->bitmap_size) * 100 / afl->fsrv.map_size,
+          t_byte_ratio);
 
   SAYF("    map density : %s%-21s" bSTG bV "\n",
        t_byte_ratio > 70 ? cLRD
                          : ((t_bytes < 200 && !afl->dumb_mode) ? cPIN : cRST),
        tmp);
 
-  sprintf(tmp, "%s (%0.02f%%)", DI(afl->cur_skipped_paths),
+  sprintf(tmp, "%s (%0.02f%%)", u_stringify_int(IB(0), afl->cur_skipped_paths),
           ((double)afl->cur_skipped_paths * 100) / afl->queued_paths);
 
   SAYF(bV bSTOP " paths timed out : " cRST "%-16s " bSTG bV, tmp);
@@ -468,7 +508,7 @@ void show_stats(afl_state_t *afl) {
        " stage progress " bSTG bH10 bH5 bH2 bH2 bX bH bSTOP cCYA
        " findings in depth " bSTG bH10 bH5 bH2 bH2 bVL "\n");
 
-  sprintf(tmp, "%s (%0.02f%%)", DI(afl->queued_favored),
+  sprintf(tmp, "%s (%0.02f%%)", u_stringify_int(IB(0), afl->queued_favored),
           ((double)afl->queued_favored) * 100 / afl->queued_paths);
 
   /* Yeah... it's still going on... halp? */
@@ -479,58 +519,61 @@ void show_stats(afl_state_t *afl) {
 
   if (!afl->stage_max) {
 
-    sprintf(tmp, "%s/-", DI(afl->stage_cur));
+    sprintf(tmp, "%s/-", u_stringify_int(IB(0), afl->stage_cur));
 
   } else {
 
-    sprintf(tmp, "%s/%s (%0.02f%%)", DI(afl->stage_cur), DI(afl->stage_max),
+    sprintf(tmp, "%s/%s (%0.02f%%)", u_stringify_int(IB(0), afl->stage_cur),
+            u_stringify_int(IB(1), afl->stage_max),
             ((double)afl->stage_cur) * 100 / afl->stage_max);
 
   }
 
   SAYF(bV bSTOP " stage execs : " cRST "%-21s" bSTG bV bSTOP, tmp);
 
-  sprintf(tmp, "%s (%0.02f%%)", DI(afl->queued_with_cov),
+  sprintf(tmp, "%s (%0.02f%%)", u_stringify_int(IB(0), afl->queued_with_cov),
           ((double)afl->queued_with_cov) * 100 / afl->queued_paths);
 
   SAYF("  new edges on : " cRST "%-22s" bSTG bV "\n", tmp);
 
-  sprintf(tmp, "%s (%s%s unique)", DI(afl->total_crashes),
-          DI(afl->unique_crashes),
+  sprintf(tmp, "%s (%s%s unique)", u_stringify_int(IB(0), afl->total_crashes),
+          u_stringify_int(IB(1), afl->unique_crashes),
           (afl->unique_crashes >= KEEP_UNIQUE_CRASH) ? "+" : "");
 
   if (afl->crash_mode) {
 
     SAYF(bV bSTOP " total execs : " cRST "%-20s " bSTG bV bSTOP
                   "   new crashes : %s%-22s" bSTG         bV "\n",
-         DI(afl->total_execs), afl->unique_crashes ? cLRD : cRST, tmp);
+         u_stringify_int(IB(0), afl->total_execs),
+         afl->unique_crashes ? cLRD : cRST, tmp);
 
   } else {
 
     SAYF(bV bSTOP " total execs : " cRST "%-20s " bSTG bV bSTOP
                   " total crashes : %s%-22s" bSTG         bV "\n",
-         DI(afl->total_execs), afl->unique_crashes ? cLRD : cRST, tmp);
+         u_stringify_int(IB(0), afl->total_execs),
+         afl->unique_crashes ? cLRD : cRST, tmp);
 
   }
 
   /* Show a warning about slow execution. */
 
-  if (avg_exec < 100) {
+  if (afl->stats_avg_exec < 100) {
 
-    sprintf(tmp, "%s/sec (%s)", DF(avg_exec),
-            avg_exec < 20 ? "zzzz..." : "slow!");
+    sprintf(tmp, "%s/sec (%s)", u_stringify_float(IB(0), afl->stats_avg_exec),
+            afl->stats_avg_exec < 20 ? "zzzz..." : "slow!");
 
     SAYF(bV bSTOP "  exec speed : " cLRD "%-20s ", tmp);
 
   } else {
 
-    sprintf(tmp, "%s/sec", DF(avg_exec));
+    sprintf(tmp, "%s/sec", u_stringify_float(IB(0), afl->stats_avg_exec));
     SAYF(bV bSTOP "  exec speed : " cRST "%-20s ", tmp);
 
   }
 
-  sprintf(tmp, "%s (%s%s unique)", DI(afl->total_tmouts),
-          DI(afl->unique_tmouts),
+  sprintf(tmp, "%s (%s%s unique)", u_stringify_int(IB(0), afl->total_tmouts),
+          u_stringify_int(IB(1), afl->unique_tmouts),
           (afl->unique_hangs >= KEEP_UNIQUE_HANG) ? "+" : "");
 
   SAYF(bSTG bV bSTOP "  total tmouts : " cRST "%-22s" bSTG bV "\n", tmp);
@@ -547,70 +590,81 @@ void show_stats(afl_state_t *afl) {
 
   } else {
 
-    sprintf(
-        tmp, "%s/%s, %s/%s, %s/%s", DI(afl->stage_finds[STAGE_FLIP1]),
-        DI(afl->stage_cycles[STAGE_FLIP1]), DI(afl->stage_finds[STAGE_FLIP2]),
-        DI(afl->stage_cycles[STAGE_FLIP2]), DI(afl->stage_finds[STAGE_FLIP4]),
-        DI(afl->stage_cycles[STAGE_FLIP4]));
+    sprintf(tmp, "%s/%s, %s/%s, %s/%s",
+            u_stringify_int(IB(0), afl->stage_finds[STAGE_FLIP1]),
+            u_stringify_int(IB(1), afl->stage_cycles[STAGE_FLIP1]),
+            u_stringify_int(IB(2), afl->stage_finds[STAGE_FLIP2]),
+            u_stringify_int(IB(3), afl->stage_cycles[STAGE_FLIP2]),
+            u_stringify_int(IB(4), afl->stage_finds[STAGE_FLIP4]),
+            u_stringify_int(IB(5), afl->stage_cycles[STAGE_FLIP4]));
 
   }
 
   SAYF(bV bSTOP "   bit flips : " cRST "%-36s " bSTG bV bSTOP
                 "    levels : " cRST "%-10s" bSTG       bV "\n",
-       tmp, DI(afl->max_depth));
+       tmp, u_stringify_int(IB(0), afl->max_depth));
 
   if (!afl->skip_deterministic)
-    sprintf(
-        tmp, "%s/%s, %s/%s, %s/%s", DI(afl->stage_finds[STAGE_FLIP8]),
-        DI(afl->stage_cycles[STAGE_FLIP8]), DI(afl->stage_finds[STAGE_FLIP16]),
-        DI(afl->stage_cycles[STAGE_FLIP16]), DI(afl->stage_finds[STAGE_FLIP32]),
-        DI(afl->stage_cycles[STAGE_FLIP32]));
+    sprintf(tmp, "%s/%s, %s/%s, %s/%s",
+            u_stringify_int(IB(0), afl->stage_finds[STAGE_FLIP8]),
+            u_stringify_int(IB(1), afl->stage_cycles[STAGE_FLIP8]),
+            u_stringify_int(IB(2), afl->stage_finds[STAGE_FLIP16]),
+            u_stringify_int(IB(3), afl->stage_cycles[STAGE_FLIP16]),
+            u_stringify_int(IB(4), afl->stage_finds[STAGE_FLIP32]),
+            u_stringify_int(IB(5), afl->stage_cycles[STAGE_FLIP32]));
 
   SAYF(bV bSTOP "  byte flips : " cRST "%-36s " bSTG bV bSTOP
                 "   pending : " cRST "%-10s" bSTG       bV "\n",
-       tmp, DI(afl->pending_not_fuzzed));
+       tmp, u_stringify_int(IB(0), afl->pending_not_fuzzed));
 
   if (!afl->skip_deterministic)
-    sprintf(tmp, "%s/%s, %s/%s, %s/%s", DI(afl->stage_finds[STAGE_ARITH8]),
-            DI(afl->stage_cycles[STAGE_ARITH8]),
-            DI(afl->stage_finds[STAGE_ARITH16]),
-            DI(afl->stage_cycles[STAGE_ARITH16]),
-            DI(afl->stage_finds[STAGE_ARITH32]),
-            DI(afl->stage_cycles[STAGE_ARITH32]));
+    sprintf(tmp, "%s/%s, %s/%s, %s/%s",
+            u_stringify_int(IB(0), afl->stage_finds[STAGE_ARITH8]),
+            u_stringify_int(IB(1), afl->stage_cycles[STAGE_ARITH8]),
+            u_stringify_int(IB(2), afl->stage_finds[STAGE_ARITH16]),
+            u_stringify_int(IB(3), afl->stage_cycles[STAGE_ARITH16]),
+            u_stringify_int(IB(4), afl->stage_finds[STAGE_ARITH32]),
+            u_stringify_int(IB(5), afl->stage_cycles[STAGE_ARITH32]));
 
   SAYF(bV bSTOP " arithmetics : " cRST "%-36s " bSTG bV bSTOP
                 "  pend fav : " cRST "%-10s" bSTG       bV "\n",
-       tmp, DI(afl->pending_favored));
+       tmp, u_stringify_int(IB(0), afl->pending_favored));
 
   if (!afl->skip_deterministic)
-    sprintf(tmp, "%s/%s, %s/%s, %s/%s", DI(afl->stage_finds[STAGE_INTEREST8]),
-            DI(afl->stage_cycles[STAGE_INTEREST8]),
-            DI(afl->stage_finds[STAGE_INTEREST16]),
-            DI(afl->stage_cycles[STAGE_INTEREST16]),
-            DI(afl->stage_finds[STAGE_INTEREST32]),
-            DI(afl->stage_cycles[STAGE_INTEREST32]));
+    sprintf(tmp, "%s/%s, %s/%s, %s/%s",
+            u_stringify_int(IB(0), afl->stage_finds[STAGE_INTEREST8]),
+            u_stringify_int(IB(1), afl->stage_cycles[STAGE_INTEREST8]),
+            u_stringify_int(IB(2), afl->stage_finds[STAGE_INTEREST16]),
+            u_stringify_int(IB(3), afl->stage_cycles[STAGE_INTEREST16]),
+            u_stringify_int(IB(4), afl->stage_finds[STAGE_INTEREST32]),
+            u_stringify_int(IB(5), afl->stage_cycles[STAGE_INTEREST32]));
 
   SAYF(bV bSTOP "  known ints : " cRST "%-36s " bSTG bV bSTOP
                 " own finds : " cRST "%-10s" bSTG       bV "\n",
-       tmp, DI(afl->queued_discovered));
+       tmp, u_stringify_int(IB(0), afl->queued_discovered));
 
   if (!afl->skip_deterministic)
-    sprintf(tmp, "%s/%s, %s/%s, %s/%s", DI(afl->stage_finds[STAGE_EXTRAS_UO]),
-            DI(afl->stage_cycles[STAGE_EXTRAS_UO]),
-            DI(afl->stage_finds[STAGE_EXTRAS_UI]),
-            DI(afl->stage_cycles[STAGE_EXTRAS_UI]),
-            DI(afl->stage_finds[STAGE_EXTRAS_AO]),
-            DI(afl->stage_cycles[STAGE_EXTRAS_AO]));
+    sprintf(tmp, "%s/%s, %s/%s, %s/%s",
+            u_stringify_int(IB(0), afl->stage_finds[STAGE_EXTRAS_UO]),
+            u_stringify_int(IB(1), afl->stage_cycles[STAGE_EXTRAS_UO]),
+            u_stringify_int(IB(2), afl->stage_finds[STAGE_EXTRAS_UI]),
+            u_stringify_int(IB(3), afl->stage_cycles[STAGE_EXTRAS_UI]),
+            u_stringify_int(IB(4), afl->stage_finds[STAGE_EXTRAS_AO]),
+            u_stringify_int(IB(5), afl->stage_cycles[STAGE_EXTRAS_AO]));
 
   SAYF(bV bSTOP "  dictionary : " cRST "%-36s " bSTG bV bSTOP
                 "  imported : " cRST "%-10s" bSTG       bV "\n",
-       tmp, afl->sync_id ? DI(afl->queued_imported) : (u8 *)"n/a");
-
-  sprintf(
-      tmp, "%s/%s, %s/%s, %s/%s", DI(afl->stage_finds[STAGE_HAVOC]),
-      DI(afl->stage_cycles[STAGE_HAVOC]), DI(afl->stage_finds[STAGE_SPLICE]),
-      DI(afl->stage_cycles[STAGE_SPLICE]), DI(afl->stage_finds[STAGE_RADAMSA]),
-      DI(afl->stage_cycles[STAGE_RADAMSA]));
+       tmp,
+       afl->sync_id ? u_stringify_int(IB(0), afl->queued_imported)
+                    : (u8 *)"n/a");
+
+  sprintf(tmp, "%s/%s, %s/%s, %s/%s",
+          u_stringify_int(IB(0), afl->stage_finds[STAGE_HAVOC]),
+          u_stringify_int(IB(2), afl->stage_cycles[STAGE_HAVOC]),
+          u_stringify_int(IB(3), afl->stage_finds[STAGE_SPLICE]),
+          u_stringify_int(IB(4), afl->stage_cycles[STAGE_SPLICE]),
+          u_stringify_int(IB(5), afl->stage_finds[STAGE_RADAMSA]),
+          u_stringify_int(IB(6), afl->stage_cycles[STAGE_RADAMSA]));
 
   SAYF(bV bSTOP "   havoc/rad : " cRST "%-36s " bSTG bV bSTOP, tmp);
 
@@ -631,23 +685,25 @@ void show_stats(afl_state_t *afl) {
   if (afl->shm.cmplog_mode) {
 
     sprintf(tmp, "%s/%s, %s/%s, %s/%s, %s/%s",
-            DI(afl->stage_finds[STAGE_PYTHON]),
-            DI(afl->stage_cycles[STAGE_PYTHON]),
-            DI(afl->stage_finds[STAGE_CUSTOM_MUTATOR]),
-            DI(afl->stage_cycles[STAGE_CUSTOM_MUTATOR]),
-            DI(afl->stage_finds[STAGE_COLORIZATION]),
-            DI(afl->stage_cycles[STAGE_COLORIZATION]),
-            DI(afl->stage_finds[STAGE_ITS]), DI(afl->stage_cycles[STAGE_ITS]));
+            u_stringify_int(IB(0), afl->stage_finds[STAGE_PYTHON]),
+            u_stringify_int(IB(1), afl->stage_cycles[STAGE_PYTHON]),
+            u_stringify_int(IB(2), afl->stage_finds[STAGE_CUSTOM_MUTATOR]),
+            u_stringify_int(IB(3), afl->stage_cycles[STAGE_CUSTOM_MUTATOR]),
+            u_stringify_int(IB(4), afl->stage_finds[STAGE_COLORIZATION]),
+            u_stringify_int(IB(5), afl->stage_cycles[STAGE_COLORIZATION]),
+            u_stringify_int(IB(6), afl->stage_finds[STAGE_ITS]),
+            u_stringify_int(IB(7), afl->stage_cycles[STAGE_ITS]));
 
     SAYF(bV bSTOP "   custom/rq : " cRST "%-36s " bSTG bVR bH20 bH2 bH bRB "\n",
          tmp);
 
   } else {
 
-    sprintf(tmp, "%s/%s, %s/%s", DI(afl->stage_finds[STAGE_PYTHON]),
-            DI(afl->stage_cycles[STAGE_PYTHON]),
-            DI(afl->stage_finds[STAGE_CUSTOM_MUTATOR]),
-            DI(afl->stage_cycles[STAGE_CUSTOM_MUTATOR]));
+    sprintf(tmp, "%s/%s, %s/%s",
+            u_stringify_int(IB(0), afl->stage_finds[STAGE_PYTHON]),
+            u_stringify_int(IB(1), afl->stage_cycles[STAGE_PYTHON]),
+            u_stringify_int(IB(2), afl->stage_finds[STAGE_CUSTOM_MUTATOR]),
+            u_stringify_int(IB(3), afl->stage_cycles[STAGE_CUSTOM_MUTATOR]));
 
     SAYF(bV bSTOP "   py/custom : " cRST "%-36s " bSTG bVR bH20 bH2 bH bRB "\n",
          tmp);
@@ -663,7 +719,7 @@ void show_stats(afl_state_t *afl) {
     sprintf(tmp, "%0.02f%%/%s, ",
             ((double)(afl->bytes_trim_in - afl->bytes_trim_out)) * 100 /
                 afl->bytes_trim_in,
-            DI(afl->trim_execs));
+            u_stringify_int(IB(0), afl->trim_execs));
 
   }
 
@@ -688,8 +744,9 @@ void show_stats(afl_state_t *afl) {
 
   if (afl->mutator) {
 
-    sprintf(tmp, "%s/%s", DI(afl->stage_finds[STAGE_CUSTOM_MUTATOR]),
-            DI(afl->stage_cycles[STAGE_CUSTOM_MUTATOR]));
+    sprintf(tmp, "%s/%s",
+            u_stringify_int(IB(0), afl->stage_finds[STAGE_CUSTOM_MUTATOR]),
+            u_stringify_int(IB(1), afl->stage_cycles[STAGE_CUSTOM_MUTATOR]));
     SAYF(bV bSTOP " custom mut. : " cRST "%-36s " bSTG bV RESET_G1, tmp);
 
   } else {
@@ -702,6 +759,8 @@ void show_stats(afl_state_t *afl) {
 
   if (afl->cpu_core_count) {
 
+    char *spacing = SP10, snap[24] = " " cLGN "snapshot" cRST " ";
+
     double cur_runnable = get_runnable_processes();
     u32    cur_utilization = cur_runnable * 100 / afl->cpu_core_count;
 
@@ -716,23 +775,25 @@ void show_stats(afl_state_t *afl) {
 
     if (!afl->no_cpu_meter_red && cur_utilization >= 150) cpu_color = cLRD;
 
+    if (afl->fsrv.snapshot) spacing = snap;
+
 #ifdef HAVE_AFFINITY
 
     if (afl->cpu_aff >= 0) {
 
-      SAYF(SP10 cGRA "[cpu%03u:%s%3u%%" cGRA "]\r" cRST, MIN(afl->cpu_aff, 999),
-           cpu_color, MIN(cur_utilization, 999));
+      SAYF("%s" cGRA "[cpu%03u:%s%3u%%" cGRA "]\r" cRST, spacing,
+           MIN(afl->cpu_aff, 999), cpu_color, MIN(cur_utilization, 999));
 
     } else {
 
-      SAYF(SP10 cGRA "   [cpu:%s%3u%%" cGRA "]\r" cRST, cpu_color,
+      SAYF("%s" cGRA "   [cpu:%s%3u%%" cGRA "]\r" cRST, spacing, cpu_color,
            MIN(cur_utilization, 999));
 
     }
 
 #else
 
-    SAYF(SP10 cGRA "   [cpu:%s%3u%%" cGRA "]\r" cRST, cpu_color,
+    SAYF("%s" cGRA "   [cpu:%s%3u%%" cGRA "]\r" cRST, spacing, cpu_color,
          MIN(cur_utilization, 999));
 
 #endif                                                    /* ^HAVE_AFFINITY */
@@ -744,6 +805,8 @@ void show_stats(afl_state_t *afl) {
   /* Last line */
   SAYF(SET_G1 "\n" bSTG bLB bH30 bH20 bH2 bRB bSTOP cRST RESET_G1);
 
+#undef IB
+
   /* Hallelujah! */
 
   fflush(0);
@@ -762,6 +825,9 @@ void show_init_stats(afl_state_t *afl) {
   u64                 avg_us = 0;
   u32                 max_len = 0;
 
+  u8 val_bufs[4][STRINGIFY_VAL_SIZE_MAX];
+#define IB(i) val_bufs[(i)], sizeof(val_bufs[(i)])
+
   if (afl->total_cal_cycles) avg_us = afl->total_cal_us / afl->total_cal_cycles;
 
   while (q) {
@@ -780,7 +846,7 @@ void show_init_stats(afl_state_t *afl) {
 
   SAYF("\n");
 
-  if (avg_us > ((afl->qemu_mode || afl->unicorn_mode) ? 50000 : 10000))
+  if (avg_us > ((afl->fsrv.qemu_mode || afl->unicorn_mode) ? 50000 : 10000))
     WARNF(cLRD "The target binary is pretty slow! See %s/perf_tips.md.",
           doc_path);
 
@@ -797,10 +863,10 @@ void show_init_stats(afl_state_t *afl) {
 
     if (max_len > 50 * 1024)
       WARNF(cLRD "Some test cases are huge (%s) - see %s/perf_tips.md!",
-            DMS(max_len), doc_path);
+            stringify_mem_size(IB(0), max_len), doc_path);
     else if (max_len > 10 * 1024)
-      WARNF("Some test cases are big (%s) - see %s/perf_tips.md.", DMS(max_len),
-            doc_path);
+      WARNF("Some test cases are big (%s) - see %s/perf_tips.md.",
+            stringify_mem_size(IB(0), max_len), doc_path);
 
     if (afl->useless_at_start && !afl->in_bitmap)
       WARNF(cLRD "Some test cases look useless. Consider using a smaller set.");
@@ -824,7 +890,8 @@ void show_init_stats(afl_state_t *afl) {
       max_bits,
       ((double)afl->total_bitmap_size) /
           (afl->total_bitmap_entries ? afl->total_bitmap_entries : 1),
-      DI(min_us), DI(max_us), DI(avg_us));
+      stringify_int(IB(0), min_us), stringify_int(IB(1), max_us),
+      stringify_int(IB(2), avg_us));
 
   if (!afl->timeout_given) {
 
@@ -868,6 +935,7 @@ void show_init_stats(afl_state_t *afl) {
     afl->hang_tmout = MIN(EXEC_TIMEOUT, afl->fsrv.exec_tmout * 2 + 100);
 
   OKF("All set and ready to roll!");
+#undef IB
 
 }
 
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index b89bccb4..2320be5a 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -24,8 +24,7 @@
  */
 
 #include "afl-fuzz.h"
-
-u8 be_quiet = 0;
+#include "cmplog.h"
 
 static u8 *get_libradamsa_path(u8 *own_loc) {
 
@@ -96,8 +95,8 @@ static void usage(afl_state_t *afl, u8 *argv0, int more_help) {
       "Execution control settings:\n"
       "  -p schedule   - power schedules recompute a seed's performance "
       "score.\n"
-      "                  <explore (default), fast, coe, lin, quad, exploit, "
-      "mmopt>\n"
+      "                  <explore(default), fast, coe, lin, quad, exploit, "
+      "mmopt, rare>\n"
       "                  see docs/power_schedules.md\n"
       "  -f file       - location read by the fuzzed program (stdin)\n"
       "  -t msec       - timeout for each run (auto-scaled, 50-%d ms)\n"
@@ -130,12 +129,11 @@ static void usage(afl_state_t *afl, u8 *argv0, int more_help) {
 
       "Testing settings:\n"
       "  -s seed       - use a fixed seed for the RNG\n"
-      "  -V seconds    - fuzz for a maximum total time of seconds then "
-      "terminate\n"
-      "  -E execs      - fuzz for a maximum number of total executions then "
+      "  -V seconds    - fuzz for a specific time then terminate\n"
+      "  -E execs      - fuzz for a approx. no of total executions then "
       "terminate\n"
-      "  Note: -V/-E are not precise, they are checked after a queue entry "
-      "is done\n  which can be many minutes/execs later\n\n"
+      "                  Note: not precise and can have several more "
+      "executions.\n\n"
 
       "Other stuff:\n"
       "  -T text       - text banner to show on the screen\n"
@@ -145,7 +143,7 @@ static void usage(afl_state_t *afl, u8 *argv0, int more_help) {
       "  -B bitmap.txt - mutate a specific test case, use the out/fuzz_bitmap "
       "file\n"
       "  -C            - crash exploration mode (the peruvian rabbit thing)\n"
-      "  -e ext        - File extension for the temporarily generated test "
+      "  -e ext        - file extension for the temporarily generated test "
       "case\n\n",
       argv0, EXEC_TIMEOUT, MEM_LIMIT);
 
@@ -167,6 +165,7 @@ static void usage(afl_state_t *afl, u8 *argv0, int more_help) {
       "AFL_FORCE_UI: force showing the status screen (for virtual consoles)\n"
       "AFL_NO_CPU_RED: avoid red color for showing very high cpu usage\n"
       "AFL_SKIP_CPUFREQ: do not warn about variable cpu clocking\n"
+      "AFL_NO_SNAPSHOT: do not use the snapshot feature (if the snapshot lkm is loaded)\n"
       "AFL_NO_FORKSRV: run target via execve instead of using the forkserver\n"
       "AFL_NO_ARITH: skip arithmetic mutations in deterministic stage\n"
       "AFL_SHUFFLE_QUEUE: reorder the input queue randomly on startup\n"
@@ -199,6 +198,8 @@ static void usage(afl_state_t *afl, u8 *argv0, int more_help) {
 #ifdef USE_PYTHON
   SAYF("Compiled with %s module support, see docs/custom_mutator.md\n",
        (char *)PYTHON_VERSION);
+#else
+  SAYF("Compiled without python module support\n");
 #endif
 
   SAYF("For additional help please consult %s/README.md\n\n", doc_path);
@@ -212,6 +213,8 @@ static void usage(afl_state_t *afl, u8 *argv0, int more_help) {
 
 static int stricmp(char const *a, char const *b) {
 
+  if (!a || !b) FATAL("Null reference");
+
   for (;; ++a, ++b) {
 
     int d;
@@ -230,8 +233,7 @@ int main(int argc, char **argv_orig, char **envp) {
   u64    prev_queued = 0;
   u32    sync_interval_cnt = 0, seek_to, show_help = 0;
   u8 *   extras_dir = 0;
-  u8     mem_limit_given = 0;
-  u8     exit_1 = !!get_afl_env("AFL_BENCH_JUST_ONE");
+  u8     mem_limit_given = 0, exit_1 = 0;
   char **use_argv;
 
   struct timeval  tv;
@@ -245,12 +247,14 @@ int main(int argc, char **argv_orig, char **envp) {
   afl_state_init(afl);
   afl_fsrv_init(&afl->fsrv);
 
+  if (get_afl_env("AFL_DEBUG")) afl->debug = 1;
   read_afl_environment(afl, envp);
+  exit_1 = !!afl->afl_env.afl_bench_just_one;
 
   SAYF(cCYA "afl-fuzz" VERSION cRST
             " based on afl by Michal Zalewski and a big online community\n");
 
-  doc_path = access(DOC_PATH, F_OK) ? (u8 *)"docs" : doc_path;
+  doc_path = access(DOC_PATH, F_OK) != 0 ? (u8 *)"docs" : (u8 *)DOC_PATH;
 
   gettimeofday(&tv, &tz);
   afl->init_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
@@ -304,6 +308,10 @@ int main(int argc, char **argv_orig, char **envp) {
 
           afl->schedule = MMOPT;
 
+        } else if (!stricmp(optarg, "rare")) {
+
+          afl->schedule = RARE;
+
         } else if (!stricmp(optarg, "explore") || !stricmp(optarg, "default") ||
 
                    !stricmp(optarg, "normal") || !stricmp(optarg, "afl")) {
@@ -493,8 +501,8 @@ int main(int argc, char **argv_orig, char **envp) {
 
       case 'Q':                                                /* QEMU mode */
 
-        if (afl->qemu_mode) FATAL("Multiple -Q options not supported");
-        afl->qemu_mode = 1;
+        if (afl->fsrv.qemu_mode) FATAL("Multiple -Q options not supported");
+        afl->fsrv.qemu_mode = 1;
 
         if (!mem_limit_given) afl->fsrv.mem_limit = MEM_LIMIT_QEMU;
 
@@ -519,7 +527,7 @@ int main(int argc, char **argv_orig, char **envp) {
       case 'W':                                           /* Wine+QEMU mode */
 
         if (afl->use_wine) FATAL("Multiple -W options not supported");
-        afl->qemu_mode = 1;
+        afl->fsrv.qemu_mode = 1;
         afl->use_wine = 1;
 
         if (!mem_limit_given) afl->fsrv.mem_limit = 0;
@@ -682,13 +690,14 @@ int main(int argc, char **argv_orig, char **envp) {
   OKF("MOpt Mutator from github.com/puppet-meteor/MOpt-AFL");
 
   if (afl->sync_id && afl->force_deterministic &&
-      getenv("AFL_CUSTOM_MUTATOR_ONLY"))
+      afl->afl_env.afl_custom_mutator_only)
     WARNF(
         "Using -M master with the AFL_CUSTOM_MUTATOR_ONLY mutator options will "
         "result in no deterministic mutations being done!");
 
   if (afl->fixed_seed) OKF("Running with fixed seed: %u", (u32)afl->init_seed);
   srandom((u32)afl->init_seed);
+  srand((u32)afl->init_seed);  // in case it is a different implementation
 
   if (afl->use_radamsa) {
 
@@ -719,6 +728,16 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
+#if defined(__SANITIZE_ADDRESS__)
+  if (afl->fsrv.mem_limit) {
+
+    WARNF("in the ASAN build we disable all memory limits");
+    afl->fsrv.mem_limit = 0;
+
+  }
+
+#endif
+
   setup_signal_handlers();
   check_asan_opts();
 
@@ -732,7 +751,7 @@ int main(int argc, char **argv_orig, char **envp) {
   if (afl->dumb_mode) {
 
     if (afl->crash_mode) FATAL("-C and -n are mutually exclusive");
-    if (afl->qemu_mode) FATAL("-Q and -n are mutually exclusive");
+    if (afl->fsrv.qemu_mode) FATAL("-Q and -n are mutually exclusive");
     if (afl->unicorn_mode) FATAL("-U and -n are mutually exclusive");
 
   }
@@ -760,8 +779,9 @@ int main(int argc, char **argv_orig, char **envp) {
     case LIN: OKF("Using linear power schedule (LIN)"); break;
     case QUAD: OKF("Using quadratic power schedule (QUAD)"); break;
     case MMOPT: OKF("Using modified MOpt power schedule (MMOPT)"); break;
+    case RARE: OKF("Using rare edge focus power schedule (RARE)"); break;
     case EXPLORE:
-      OKF("Using exploration-based constant power schedule (EXPLORE)");
+      OKF("Using exploration-based constant power schedule (EXPLORE, default)");
       break;
     default: FATAL("Unknown power schedule"); break;
 
@@ -799,7 +819,7 @@ int main(int argc, char **argv_orig, char **envp) {
 
   if (afl->afl_env.afl_preload) {
 
-    if (afl->qemu_mode) {
+    if (afl->fsrv.qemu_mode) {
 
       u8 *qemu_preload = getenv("QEMU_SET_ENV");
       u8 *afl_preload = getenv("AFL_PRELOAD");
@@ -845,7 +865,7 @@ int main(int argc, char **argv_orig, char **envp) {
   check_if_tty(afl);
   if (afl->afl_env.afl_force_ui) afl->not_on_tty = 0;
 
-  if (get_afl_env("AFL_CAL_FAST")) {
+  if (afl->afl_env.afl_cal_fast) {
 
     /* Use less calibration cycles, for slow applications */
     afl->cal_cycles = 3;
@@ -853,8 +873,6 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
-  if (get_afl_env("AFL_DEBUG")) afl->debug = 1;
-
   if (afl->afl_env.afl_custom_mutator_only) {
 
     /* This ensures we don't proceed to havoc/splice */
@@ -902,21 +920,21 @@ int main(int argc, char **argv_orig, char **envp) {
   if ((afl->tmp_dir = afl->afl_env.afl_tmpdir) != NULL &&
       !afl->in_place_resume) {
 
-    char tmpfile[afl->file_extension ? strlen(afl->tmp_dir) + 1 + 10 + 1 +
-                                           strlen(afl->file_extension) + 1
-                                     : strlen(afl->tmp_dir) + 1 + 10 + 1];
+    char tmpfile[PATH_MAX];
+
     if (afl->file_extension) {
 
-      sprintf(tmpfile, "%s/.cur_input.%s", afl->tmp_dir, afl->file_extension);
+      snprintf(tmpfile, PATH_MAX, "%s/.cur_input.%s", afl->tmp_dir,
+               afl->file_extension);
 
     } else {
 
-      sprintf(tmpfile, "%s/.cur_input", afl->tmp_dir);
+      snprintf(tmpfile, PATH_MAX, "%s/.cur_input", afl->tmp_dir);
 
     }
 
-    if (access(tmpfile, F_OK) !=
-        -1)  // there is still a race condition here, but well ...
+    /* there is still a race condition here, but well ... */
+    if (access(tmpfile, F_OK) != -1)
       FATAL(
           "AFL_TMPDIR already has an existing temporary input file: %s - if "
           "this is not from another instance, then just remove the file.",
@@ -968,7 +986,7 @@ int main(int argc, char **argv_orig, char **envp) {
 
     if (afl->unicorn_mode)
       FATAL("CmpLog and Unicorn mode are not compatible at the moment, sorry");
-    if (!afl->qemu_mode) check_binary(afl, afl->cmplog_binary);
+    if (!afl->fsrv.qemu_mode) check_binary(afl, afl->cmplog_binary);
 
   }
 
@@ -976,7 +994,7 @@ int main(int argc, char **argv_orig, char **envp) {
 
   afl->start_time = get_cur_time();
 
-  if (afl->qemu_mode) {
+  if (afl->fsrv.qemu_mode) {
 
     if (afl->use_wine)
       use_argv = get_wine_argv(argv[0], &afl->fsrv.target_path, argc - optind,
@@ -992,6 +1010,21 @@ int main(int argc, char **argv_orig, char **envp) {
   }
 
   afl->argv = use_argv;
+
+  if (afl->cmplog_binary) {
+
+    ACTF("Spawning cmplog forkserver");
+    afl_fsrv_init_dup(&afl->cmplog_fsrv, &afl->fsrv);
+    // TODO: this is semi-nice
+    afl->cmplog_fsrv.trace_bits = afl->fsrv.trace_bits;
+    afl->cmplog_fsrv.qemu_mode = afl->fsrv.qemu_mode;
+    afl->cmplog_fsrv.cmplog_binary = afl->cmplog_binary;
+    afl->cmplog_fsrv.init_child_func = cmplog_exec_child;
+    afl_fsrv_start(&afl->cmplog_fsrv, afl->argv, &afl->stop_soon,
+                   afl->afl_env.afl_debug_child_output);
+
+  }
+
   perform_dry_run(afl);
 
   cull_queue(afl);
@@ -1040,9 +1073,9 @@ int main(int argc, char **argv_orig, char **envp) {
 
       }
 
-      show_stats(afl);
+      // show_stats(afl);
 
-      if (afl->not_on_tty) {
+      if (unlikely(afl->not_on_tty)) {
 
         ACTF("Entering queue cycle %llu.", afl->queue_cycle);
         fflush(stdout);
@@ -1086,64 +1119,15 @@ int main(int argc, char **argv_orig, char **envp) {
     afl->queue_cur = afl->queue_cur->next;
     ++afl->current_entry;
 
-    if (afl->most_time_key == 1) {
-
-      u64 cur_ms_lv = get_cur_time();
-      if (afl->most_time * 1000 < cur_ms_lv - afl->start_time) {
-
-        afl->most_time_key = 2;
-        afl->stop_soon = 2;
-        break;
-
-      }
-
-    }
-
-    if (afl->most_execs_key == 1) {
-
-      if (afl->most_execs <= afl->total_execs) {
-
-        afl->most_execs_key = 2;
-        afl->stop_soon = 2;
-        break;
-
-      }
-
-    }
-
-  }
-
-  if (afl->queue_cur) show_stats(afl);
-
-  /*
-   * ATTENTION - the following 10 lines were copied from a PR to Google's afl
-   * repository - and slightly fixed.
-   * These lines have nothing to do with the purpose of original PR though.
-   * Looks like when an exit condition was completed (AFL_BENCH_JUST_ONE,
-   * AFL_EXIT_WHEN_DONE or AFL_BENCH_UNTIL_CRASH) the child and forkserver
-   * where not killed?
-   */
-  /* if we stopped programmatically, we kill the forkserver and the current
-     runner. if we stopped manually, this is done by the signal handler */
-  if (afl->stop_soon == 2) {
-
-    if (afl->fsrv.child_pid > 0) kill(afl->fsrv.child_pid, SIGKILL);
-    if (afl->fsrv.fsrv_pid > 0) kill(afl->fsrv.fsrv_pid, SIGKILL);
-    if (afl->cmplog_child_pid > 0) kill(afl->cmplog_child_pid, SIGKILL);
-    if (afl->cmplog_fsrv_pid > 0) kill(afl->cmplog_fsrv_pid, SIGKILL);
-    /* Now that we've killed the forkserver, we wait for it to be able to get
-     * rusage stats. */
-    if (waitpid(afl->fsrv.fsrv_pid, NULL, 0) <= 0) { WARNF("error waitpid\n"); }
-
   }
 
   write_bitmap(afl);
-  write_stats_file(afl, 0, 0, 0);
   maybe_update_plot_file(afl, 0, 0);
   save_auto(afl);
 
 stop_fuzzing:
 
+  write_stats_file(afl, 0, 0, 0);
   afl->force_ui_update = 1;  // ensure the screen is reprinted
   show_stats(afl);           // print the screen one last time
 
@@ -1177,6 +1161,7 @@ stop_fuzzing:
   ck_free(afl->fsrv.target_path);
   ck_free(afl->fsrv.out_file);
   ck_free(afl->sync_id);
+  afl_state_deinit(afl);
   free(afl);                                                 /* not tracked */
 
   argv_cpy_free(argv);
diff --git a/src/afl-gcc.c b/src/afl-gcc.c
index b0153b49..32cd36cb 100644
--- a/src/afl-gcc.c
+++ b/src/afl-gcc.c
@@ -142,12 +142,12 @@ static void edit_params(u32 argc, char **argv) {
     if (!strcmp(name, "afl-clang++")) {
 
       u8 *alt_cxx = getenv("AFL_CXX");
-      cc_params[0] = alt_cxx ? alt_cxx : (u8 *)"clang++";
+      cc_params[0] = alt_cxx && *alt_cxx ? alt_cxx : (u8 *)"clang++";
 
     } else {
 
       u8 *alt_cc = getenv("AFL_CC");
-      cc_params[0] = alt_cc ? alt_cc : (u8 *)"clang";
+      cc_params[0] = alt_cc && *alt_cc ? alt_cc : (u8 *)"clang";
 
     }
 
@@ -187,17 +187,17 @@ static void edit_params(u32 argc, char **argv) {
     if (!strcmp(name, "afl-g++")) {
 
       u8 *alt_cxx = getenv("AFL_CXX");
-      cc_params[0] = alt_cxx ? alt_cxx : (u8 *)"g++";
+      cc_params[0] = alt_cxx && *alt_cxx ? alt_cxx : (u8 *)"g++";
 
     } else if (!strcmp(name, "afl-gcj")) {
 
       u8 *alt_cc = getenv("AFL_GCJ");
-      cc_params[0] = alt_cc ? alt_cc : (u8 *)"gcj";
+      cc_params[0] = alt_cc && *alt_cc ? alt_cc : (u8 *)"gcj";
 
     } else {
 
       u8 *alt_cc = getenv("AFL_CC");
-      cc_params[0] = alt_cc ? alt_cc : (u8 *)"gcc";
+      cc_params[0] = alt_cc && *alt_cc ? alt_cc : (u8 *)"gcc";
 
     }
 
diff --git a/src/afl-gotcpu.c b/src/afl-gotcpu.c
index 70ed4dbc..6c2fa147 100644
--- a/src/afl-gotcpu.c
+++ b/src/afl-gotcpu.c
@@ -90,7 +90,7 @@ static u64 get_cpu_usage_us(void) {
 
 static u32 measure_preemption(u32 target_ms) {
 
-  static volatile u32 v1, v2;
+  volatile u32 v1, v2 = 0;
 
   u64 st_t, en_t, st_c, en_c, real_delta, slice_delta;
   s32 loop_repeats = 0;
diff --git a/src/afl-sharedmem.c b/src/afl-sharedmem.c
index 7bdf8d03..9db84e77 100644
--- a/src/afl-sharedmem.c
+++ b/src/afl-sharedmem.c
@@ -95,7 +95,7 @@ void afl_shm_deinit(sharedmem_t *shm) {
 
 /* At exit, remove all leftover maps */
 
-void afl_shm_atexit() {
+void afl_shm_atexit(void) {
 
   LIST_FOREACH(&shm_list, sharedmem_t, { afl_shm_deinit(el); });
 
diff --git a/src/afl-showmap.c b/src/afl-showmap.c
index 0051bbec..c84fa36c 100644
--- a/src/afl-showmap.c
+++ b/src/afl-showmap.c
@@ -59,13 +59,10 @@
 #include <sys/types.h>
 #include <sys/resource.h>
 
-u8 be_quiet;
-
 char *stdin_file;                      /* stdin file                        */
 
 u8 *in_dir,                            /* input folder                      */
-    *doc_path,                         /* Path to docs                      */
-        *at_file = NULL;               /* Substitution string for @@        */
+    *at_file = NULL;              /* Substitution string for @@             */
 
 static u8 *in_data;                    /* Input data                        */
 
@@ -84,8 +81,6 @@ u8 quiet_mode,                         /* Hide non-essential messages?      */
 static volatile u8 stop_soon,          /* Ctrl-C pressed?                   */
     child_crashed;                     /* Child crashed?                    */
 
-static u8 qemu_mode;
-
 /* Classify tuple counts. Instead of mapping to individual bits, as in
    afl-fuzz.c, we map to more user-friendly numbers between 1 and 8. */
 
@@ -157,7 +152,8 @@ static u32 write_results_to_file(afl_forkserver_t *fsrv, u8 *outfile) {
 
   if (!strncmp(outfile, "/dev/", 5)) {
 
-    fd = open(outfile, O_WRONLY, 0600);
+    fd = open(outfile, O_WRONLY);
+
     if (fd < 0) PFATAL("Unable to open '%s'", fsrv->out_file);
 
   } else if (!strcmp(outfile, "-")) {
@@ -224,26 +220,6 @@ static u32 write_results(afl_forkserver_t *fsrv) {
 
 }
 
-/* Write output file. */
-
-static s32 write_to_file(u8 *path, u8 *mem, u32 len) {
-
-  s32 ret;
-
-  unlink(path);                                            /* Ignore errors */
-
-  ret = open(path, O_RDWR | O_CREAT | O_EXCL, 0600);
-
-  if (ret < 0) PFATAL("Unable to create '%s'", path);
-
-  ck_write(ret, mem, len, path);
-
-  lseek(ret, 0, SEEK_SET);
-
-  return ret;
-
-}
-
 /* Write modified data to file for testing. If use_stdin is clear, the old file
    is unlinked and a new one is created. Otherwise, out_fd is rewound and
    truncated. */
@@ -263,9 +239,8 @@ static void write_to_testcase(afl_forkserver_t *fsrv, void *mem, u32 len) {
 static u8 run_target_forkserver(afl_forkserver_t *fsrv, char **argv, u8 *mem,
                                 u32 len) {
 
-  static struct itimerval it;
-  static u32              prev_timed_out = 0;
-  int                     status = 0;
+  struct itimerval it;
+  int              status = 0;
 
   memset(fsrv->trace_bits, 0, MAP_SIZE);
   MEM_BARRIER();
@@ -277,7 +252,7 @@ static u8 run_target_forkserver(afl_forkserver_t *fsrv, char **argv, u8 *mem,
   /* we have the fork server up and running, so simply
      tell it to have at it, and then read back PID. */
 
-  if ((res = write(fsrv->fsrv_ctl_fd, &prev_timed_out, 4)) != 4) {
+  if ((res = write(fsrv->fsrv_ctl_fd, &fsrv->prev_timed_out, 4)) != 4) {
 
     if (stop_soon) return 0;
     RPFATAL(res, "Unable to request new process from fork server (OOM?)");
@@ -505,7 +480,7 @@ static void handle_stop_sig(int sig) {
 
 /* Do basic preparations - persistent fds, filenames, etc. */
 
-static void set_up_environment(void) {
+static void set_up_environment(afl_forkserver_t *fsrv) {
 
   setenv("ASAN_OPTIONS",
          "abort_on_error=1:"
@@ -522,7 +497,7 @@ static void set_up_environment(void) {
 
   if (get_afl_env("AFL_PRELOAD")) {
 
-    if (qemu_mode) {
+    if (fsrv->qemu_mode) {
 
       u8 *qemu_preload = getenv("QEMU_SET_ENV");
       u8 *afl_preload = getenv("AFL_PRELOAD");
@@ -579,11 +554,6 @@ static void setup_signal_handlers(void) {
   sigaction(SIGINT, &sa, NULL);
   sigaction(SIGTERM, &sa, NULL);
 
-  /* Exec timeout notifications. */
-
-  sa.sa_handler = handle_timeout;
-  sigaction(SIGALRM, &sa, NULL);
-
 }
 
 /* Show banner. */
@@ -826,10 +796,10 @@ int main(int argc, char **argv_orig, char **envp) {
 
       case 'Q':
 
-        if (qemu_mode) FATAL("Multiple -Q options not supported");
+        if (fsrv->qemu_mode) FATAL("Multiple -Q options not supported");
         if (!mem_limit_given) fsrv->mem_limit = MEM_LIMIT_QEMU;
 
-        qemu_mode = 1;
+        fsrv->qemu_mode = 1;
         break;
 
       case 'U':
@@ -843,7 +813,7 @@ int main(int argc, char **argv_orig, char **envp) {
       case 'W':                                           /* Wine+QEMU mode */
 
         if (use_wine) FATAL("Multiple -W options not supported");
-        qemu_mode = 1;
+        fsrv->qemu_mode = 1;
         use_wine = 1;
 
         if (!mem_limit_given) fsrv->mem_limit = 0;
@@ -888,7 +858,7 @@ int main(int argc, char **argv_orig, char **envp) {
   fsrv->trace_bits = afl_shm_init(&shm, MAP_SIZE, 0);
   setup_signal_handlers();
 
-  set_up_environment();
+  set_up_environment(fsrv);
 
   find_binary(fsrv, argv[optind]);
 
@@ -913,7 +883,7 @@ int main(int argc, char **argv_orig, char **envp) {
   for (i = optind; i < argc; i++)
     if (strcmp(argv[i], "@@") == 0) arg_offset = i;
 
-  if (qemu_mode) {
+  if (fsrv->qemu_mode) {
 
     if (use_wine)
       use_argv = get_wine_argv(argv[0], &fsrv->target_path, argc - optind,
@@ -979,7 +949,8 @@ int main(int argc, char **argv_orig, char **envp) {
 
     }
 
-    afl_fsrv_start(fsrv, use_argv);
+    afl_fsrv_start(fsrv, use_argv, &stop_soon,
+                   get_afl_env("AFL_DEBUG_CHILD_OUTPUT") ? 1 : 0);
 
     while (done == 0 && (dir_ent = readdir(dir_in))) {
 
diff --git a/src/afl-tmin.c b/src/afl-tmin.c
index 17e9af5a..3be6b2c0 100644
--- a/src/afl-tmin.c
+++ b/src/afl-tmin.c
@@ -61,8 +61,7 @@
 static u8 *mask_bitmap;                /* Mask for trace bits (-B)          */
 
 u8 *in_file,                           /* Minimizer input test case         */
-    *output_file,                      /* Minimizer output file             */
-    *doc_path;                         /* Path to docs                      */
+    *output_file;                      /* Minimizer output file             */
 
 static u8 *in_data;                    /* Input data for trimming           */
 
@@ -77,13 +76,10 @@ u8 crash_mode,                         /* Crash-centric mode?               */
     hang_mode,                         /* Minimize as long as it hangs      */
     exit_crash,                        /* Treat non-zero exit as crash?     */
     edges_only,                        /* Ignore hit counts?                */
-    exact_mode,                        /* Require path match for crashes?   */
-    be_quiet;
+    exact_mode;                        /* Require path match for crashes?   */
 
 static volatile u8 stop_soon;          /* Ctrl-C pressed?                   */
 
-static u8 qemu_mode;
-
 /*
  * forkserver section
  */
@@ -247,166 +243,20 @@ static void write_to_testcase(afl_forkserver_t *fsrv, void *mem, u32 len) {
 
 }
 
-/* Handle timeout signal. */
-/*
-static void handle_timeout(int sig) {
-
-  if (child_pid > 0) {
-
-  child_timed_out = 1;
-    kill(child_pid, SIGKILL);
-
-  } else if (child_pid == -1 && forksrv_pid > 0) {
-
-    child_timed_out = 1;
-    kill(forksrv_pid, SIGKILL);
-
-  }
-
-}
-
-*/
-
-/* start the app and it's forkserver */
-/*
-static void init_forkserver(char **argv) {
-
-  static struct itimerval it;
-  int st_pipe[2], ctl_pipe[2];
-  int status = 0;
-  s32 rlen;
-
-  ACTF("Spinning up the fork server...");
-  if (pipe(st_pipe) || pipe(ctl_pipe)) PFATAL("pipe() failed");
-
-  forksrv_pid = fork();
-
-  if (forksrv_pid < 0) PFATAL("fork() failed");
-
-  if (!forksrv_pid) {
-
-    struct rlimit r;
-
-    if (dup2(use_stdin ? out_fd : dev_null_fd, 0) < 0 ||
-        dup2(dev_null_fd, 1) < 0 ||
-        dup2(dev_null_fd, 2) < 0) {
-
-      *(u32*)trace_bits = EXEC_FAIL_SIG;
-      PFATAL("dup2() failed");
-
-    }
-
-    close(dev_null_fd);
-    close(out_fd);
-
-    setsid();
-
-    if (mem_limit) {
-
-      r.rlim_max = r.rlim_cur = ((rlim_t)mem_limit) << 20;
-
-#ifdef RLIMIT_AS
-
-      setrlimit(RLIMIT_AS, &r); // Ignore errors
-
-#else
-
-      setrlimit(RLIMIT_DATA, &r); // Ignore errors
-
-#endif // ^RLIMIT_AS
-
-    }
-
-    r.rlim_max = r.rlim_cur = 0;
-    setrlimit(RLIMIT_CORE, &r); // Ignore errors
-
-    // Set up control and status pipes, close the unneeded original fds.
-
-    if (dup2(ctl_pipe[0], FORKSRV_FD) < 0) PFATAL("dup2() failed");
-    if (dup2(st_pipe[1], FORKSRV_FD + 1) < 0) PFATAL("dup2() failed");
-
-    close(ctl_pipe[0]);
-    close(ctl_pipe[1]);
-    close(st_pipe[0]);
-    close(st_pipe[1]);
-
-    execv(fsrv->target_path, argv);
-
-    *(u32*)trace_bits = EXEC_FAIL_SIG;
-    exit(0);
-
-  }
-
-  // Close the unneeded endpoints.
-
-  close(ctl_pipe[0]);
-  close(st_pipe[1]);
-
-  fsrv_ctl_fd = ctl_pipe[1];
-  fsrv_st_fd  = st_pipe[0];
-
-  // Configure timeout, wait for child, cancel timeout.
-
-  if (exec_tmout) {
-
-    child_timed_out = 0;
-    it.it_value.tv_sec = (exec_tmout * FORK_WAIT_MULT / 1000);
-    it.it_value.tv_usec = ((exec_tmout * FORK_WAIT_MULT) % 1000) * 1000;
-
-  }
-
-  setitimer(ITIMER_REAL, &it, NULL);
-
-  rlen = read(fsrv_st_fd, &status, 4);
-
-  it.it_value.tv_sec = 0;
-  it.it_value.tv_usec = 0;
-  setitimer(ITIMER_REAL, &it, NULL);
-
-  // If we have a four-byte "hello" message from the server, we're all set.
-  // Otherwise, try to figure out what went wrong.
-
-  if (rlen == 4) {
-
-    ACTF("All right - fork server is up.");
-    return;
-
-  }
-
-  if (waitpid(forksrv_pid, &status, 0) <= 0)
-    PFATAL("waitpid() failed");
-
-  u8 child_crashed;
-
-  if (WIFSIGNALED(status))
-    child_crashed = 1;
-
-  if (child_timed_out)
-    SAYF(cLRD "\n+++ Program timed off +++\n" cRST);
-  else if (stop_soon)
-    SAYF(cLRD "\n+++ Program aborted by user +++\n" cRST);
-  else if (child_crashed)
-    SAYF(cLRD "\n+++ Program killed by signal %u +++\n" cRST, WTERMSIG(status));
-
-}
-
-*/
-
 /* Execute target application. Returns 0 if the changes are a dud, or
    1 if they should be kept. */
 
 static u8 run_target(afl_forkserver_t *fsrv, char **argv, u8 *mem, u32 len,
                      u8 first_run) {
 
-  static struct itimerval it;
-  static u32              prev_timed_out = 0;
-  int                     status = 0;
+  struct itimerval it;
+  int              status = 0;
 
   u32 cksum;
 
   fsrv->child_timed_out = 0;
 
-  memset(fsrv->trace_bits, 0, MAP_SIZE);
+  memset(fsrv->trace_bits, 0, fsrv->map_size);
   MEM_BARRIER();
 
   write_to_testcase(fsrv, mem, len);
@@ -416,7 +266,7 @@ static u8 run_target(afl_forkserver_t *fsrv, char **argv, u8 *mem, u32 len,
   /* we have the fork server up and running, so simply
      tell it to have at it, and then read back PID. */
 
-  if ((res = write(fsrv->fsrv_ctl_fd, &prev_timed_out, 4)) != 4) {
+  if ((res = write(fsrv->fsrv_ctl_fd, &fsrv->prev_timed_out, 4)) != 4) {
 
     if (stop_soon) return 0;
     RPFATAL(res, "Unable to request new process from fork server (OOM?)");
@@ -541,7 +391,7 @@ static u8 run_target(afl_forkserver_t *fsrv, char **argv, u8 *mem, u32 len,
 
   }
 
-  cksum = hash32(fsrv->trace_bits, MAP_SIZE, HASH_CONST);
+  cksum = hash32(fsrv->trace_bits, fsrv->map_size, HASH_CONST);
 
   if (first_run) orig_cksum = cksum;
 
@@ -552,17 +402,6 @@ static u8 run_target(afl_forkserver_t *fsrv, char **argv, u8 *mem, u32 len,
 
 }
 
-/* Find first power of two greater or equal to val. */
-
-static u32 next_p2(u32 val) {
-
-  u32 ret = 1;
-  while (val > ret)
-    ret <<= 1;
-  return ret;
-
-}
-
 /* Actually minimize! */
 
 static void minimize(afl_forkserver_t *fsrv, char **argv) {
@@ -580,7 +419,7 @@ static void minimize(afl_forkserver_t *fsrv, char **argv) {
    * BLOCK NORMALIZATION *
    ***********************/
 
-  set_len = next_p2(in_len / TMIN_SET_STEPS);
+  set_len = next_pow2(in_len / TMIN_SET_STEPS);
   set_pos = 0;
 
   if (set_len < TMIN_SET_MIN_SIZE) set_len = TMIN_SET_MIN_SIZE;
@@ -630,7 +469,7 @@ next_pass:
    * BLOCK DELETION *
    ******************/
 
-  del_len = next_p2(in_len / TRIM_START_STEPS);
+  del_len = next_pow2(in_len / TRIM_START_STEPS);
   stage_o_len = in_len;
 
   ACTF(cBRI "Stage #1: " cRST "Removing blocks of data...");
@@ -905,7 +744,7 @@ static void set_up_environment(afl_forkserver_t *fsrv) {
 
   if (get_afl_env("AFL_PRELOAD")) {
 
-    if (qemu_mode) {
+    if (fsrv->qemu_mode) {
 
       u8 *qemu_preload = getenv("QEMU_SET_ENV");
       u8 *afl_preload = getenv("AFL_PRELOAD");
@@ -962,11 +801,6 @@ static void setup_signal_handlers(void) {
   sigaction(SIGINT, &sa, NULL);
   sigaction(SIGTERM, &sa, NULL);
 
-  /* Exec timeout notifications. */
-
-  sa.sa_handler = handle_timeout;
-  sigaction(SIGALRM, &sa, NULL);
-
 }
 
 /* Display usage hints. */
@@ -1193,10 +1027,10 @@ int main(int argc, char **argv_orig, char **envp) {
 
       case 'Q':
 
-        if (qemu_mode) FATAL("Multiple -Q options not supported");
+        if (fsrv->qemu_mode) FATAL("Multiple -Q options not supported");
         if (!mem_limit_given) fsrv->mem_limit = MEM_LIMIT_QEMU;
 
-        qemu_mode = 1;
+        fsrv->qemu_mode = 1;
         break;
 
       case 'U':
@@ -1210,7 +1044,7 @@ int main(int argc, char **argv_orig, char **envp) {
       case 'W':                                           /* Wine+QEMU mode */
 
         if (use_wine) FATAL("Multiple -W options not supported");
-        qemu_mode = 1;
+        fsrv->qemu_mode = 1;
         use_wine = 1;
 
         if (!mem_limit_given) fsrv->mem_limit = 0;
@@ -1271,7 +1105,7 @@ int main(int argc, char **argv_orig, char **envp) {
   find_binary(fsrv, argv[optind]);
   detect_file_args(argv + optind, fsrv->out_file, &fsrv->use_stdin);
 
-  if (qemu_mode) {
+  if (fsrv->qemu_mode) {
 
     if (use_wine)
       use_argv = get_wine_argv(argv[0], &fsrv->target_path, argc - optind,
@@ -1297,7 +1131,8 @@ int main(int argc, char **argv_orig, char **envp) {
 
   read_initial_file();
 
-  afl_fsrv_start(fsrv, use_argv);
+  afl_fsrv_start(fsrv, use_argv, &stop_soon,
+                 get_afl_env("AFL_DEBUG_CHILD_OUTPUT") ? 1 : 0);
 
   ACTF("Performing dry run (mem limit = %llu MB, timeout = %u ms%s)...",
        fsrv->mem_limit, fsrv->exec_tmout, edges_only ? ", edges only" : "");
diff --git a/src/third_party/libradamsa/Makefile b/src/third_party/libradamsa/GNUmakefile
index c5a78ead..c5a78ead 100644
--- a/src/third_party/libradamsa/Makefile
+++ b/src/third_party/libradamsa/GNUmakefile
diff --git a/src/third_party/libradamsa/libradamsa.c b/src/third_party/libradamsa/libradamsa.c
index f3677fa7..27cf91bc 100644
--- a/src/third_party/libradamsa/libradamsa.c
+++ b/src/third_party/libradamsa/libradamsa.c
@@ -2177,7 +2177,7 @@ static uint llen(word *ptr) {
    return len;
 }
 
-static void set_signal_handler() {
+static void set_signal_handler(void) {
    struct sigaction sa;
    sa.sa_handler = signal_handler;
    sigemptyset(&sa.sa_mask);
@@ -2312,7 +2312,7 @@ static word prim_set(word wptr, hval pos, word val) {
    return (word) new;
 }
 
-static void setdown() {
+static void setdown(void) {
    tcsetattr(0, TCSANOW, &tsettings); /* return stdio settings */
 }
 
@@ -30773,7 +30773,7 @@ int secondary(int nargs, char **argv) {
    return 127;
 }
 
-void radamsa_init() {
+void radamsa_init(void) {
    int nobjs=0, nwords=0;
    hp = (byte *) &heap; /* builtin heap */
    state = IFALSE;
@@ -30815,7 +30815,7 @@ size_t copy_list(uint8_t *ptr, word lispval, size_t max) {
       lispval = G(lispval, 2);              // list   = cdr(list)
    }
    if (lispval != INULL && max == 0) {
-      printf("ERROR: lisp return value was not a proper list. Trailing %lu\n", lispval);
+      printf("ERROR: lisp return value was not a proper list. Trailing %lu\n", (unsigned long)lispval);
    }
    return n;
 }
diff --git a/src/third_party/libradamsa/radamsa.h b/src/third_party/libradamsa/radamsa.h
index d54fa2ec..33cccde4 100644
--- a/src/third_party/libradamsa/radamsa.h
+++ b/src/third_party/libradamsa/radamsa.h
@@ -1,15 +1,13 @@
 #include <inttypes.h>
 #include <stddef.h>
 
-extern void radamsa_init();
+extern void radamsa_init(void);
 
-extern size_t radamsa(uint8_t *ptr, size_t len, 
-                      uint8_t *target, size_t max, 
+extern size_t radamsa(uint8_t *ptr, size_t len,
+                      uint8_t *target, size_t max,
                       unsigned int seed);
 
-extern size_t radamsa_inplace(uint8_t *ptr, 
-                              size_t len, 
-                              size_t max, 
+extern size_t radamsa_inplace(uint8_t *ptr,
+                              size_t len,
+                              size_t max,
                               unsigned int seed);
-
-
diff --git a/test-instr.c b/test-instr.c
index 579577a4..84ac0036 100644
--- a/test-instr.c
+++ b/test-instr.c
@@ -1,18 +1,13 @@
 /*
    american fuzzy lop++ - a trivial program to test the build
    --------------------------------------------------------
-
    Originally written by Michal Zalewski
-
    Copyright 2014 Google Inc. All rights reserved.
    Copyright 2019-2020 AFLplusplus Project. All rights reserved.
-
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at:
-
      http://www.apache.org/licenses/LICENSE-2.0
-
  */
 
 #include <stdio.h>
diff --git a/test/checkcommit.sh b/test/checkcommit.sh
new file mode 100755
index 00000000..27d08d36
--- /dev/null
+++ b/test/checkcommit.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+CMDLINE="/prg/tests/normal/tiff-4.0.4/tools/thumbnail @@ /dev/null"
+INDIR="/prg/tests/normal/tiff-4.0.4/in-small"
+
+test -z "$1" -o -n "$4" && { 
+  echo "Syntax: $0 commit-id <indir> \"<cmdline>\""
+  echo
+  echo "Switches to the defined commit ID, compiles with profiling and runs"
+  echo "afl-fuzz on a defind target and input directory, saving timing,"
+  echo "fuzzer_stats and profiling output to \"<commit-id>.out\""
+  echo "Honors CFLAGS and LDFLAGS"
+  echo
+  echo "Defaults:"
+  echo "  indir: \"$INDIR\""
+  echo "  cmdline: \"$CMDLINE\""
+  exit 1
+}
+
+C=$1
+test -n "$2" && INDIR=$2
+test -n "$3" && CMDLINE=$3
+
+git checkout "$C" || { echo "CHECKOUT FAIL $C" > $C.out ; exit 1 ; }
+export AFL_BENCH_JUST_ONE=1
+test -z "$CFLAGS" && CFLAGS="-O3 -funroll-loops"
+export CFLAGS="$CFLAGS -pg"
+export LDFLAGS="$LDFLAGS -pg"
+make >/dev/null 2>&1 || echo ERROR: BUILD FAILURE 
+test -x ./afl-fuzz || { echo "BUILD FAIL $C" > $C.out ; make clean ; exit 1 ; }
+
+START=`date +%s`
+echo $START > $C.out
+time nice -n -20 ./afl-fuzz -i "$INDIR" -s 123 -o out-profile -- $CMDLINE 2>> $C.out
+STOP=`date +%s`
+echo $STOP >> $C.out
+echo RUNTIME: `expr $STOP - $START` >> $C.out
+cat out-profile/fuzzer_stats >> $C.out
+gprof ./afl-fuzz gmon.out >> $C.out
+
+make clean >/dev/null 2>&1
+rm -rf out-profile gmon.out
diff --git a/test/test-compcov.c b/test/test-compcov.c
index 89611bfb..f1743265 100644
--- a/test/test-compcov.c
+++ b/test/test-compcov.c
@@ -3,30 +3,47 @@
 #include <unistd.h>
 #include <string.h>
 
+char global_cmpval[] = "GLOBALVARIABLE";
+
 int main(int argc, char **argv) {
+
   char *input = argv[1], *buf, buffer[20];
+  char  cmpval[] = "LOCALVARIABLE";
+  char  shortval[4] = "abc";
 
   if (argc < 2) {
+
     ssize_t ret = read(0, buffer, sizeof(buffer) - 1);
     buffer[ret] = 0;
     input = buffer;
+
   }
-  
+
   if (strcmp(input, "LIBTOKENCAP") == 0)
     printf("your string was libtokencap\n");
   else if (strcmp(input, "BUGMENOT") == 0)
     printf("your string was bugmenot\n");
   else if (strcmp(input, "BUFFEROVERFLOW") == 0) {
+
     buf = malloc(16);
     strcpy(buf, "TEST");
     strcat(buf, input);
     printf("This will only crash with libdislocator: %s\n", buf);
     return 0;
-  } else if (*(unsigned int*)input == 0xabadcafe)
+
+  } else if (*(unsigned int *)input == 0xabadcafe)
+
     printf("GG you eat cmp tokens for breakfast!\n");
+  else if (memcmp(cmpval, input, 8) == 0)
+    printf("local var memcmp works!\n");
+  else if (memcmp(shortval, input, 4) == 0)
+    printf("short local var memcmp works!\n");
+  else if (memcmp(global_cmpval, input, sizeof(global_cmpval)) == 0)
+    printf("global var memcmp works!\n");
   else
     printf("I do not know your string\n");
 
   return 0;
 
 }
+
diff --git a/test/test-custom-mutator.c b/test/test-custom-mutator.c
new file mode 100644
index 00000000..f868550c
--- /dev/null
+++ b/test/test-custom-mutator.c
@@ -0,0 +1,20 @@
+/**
+ * Reference:
+ * https://github.com/bruce30262/libprotobuf-mutator_fuzzing_learning/blob/master/4_libprotobuf_aflpp_custom_mutator/vuln.c
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+int main(int argc, char *argv[]) {
+
+  char str[100];
+  read(0, str, 100);
+  if (str[6] == 'A') { abort(); }
+  return 0;
+
+}
+
diff --git a/test/test-unsigaction.c b/test/test-unsigaction.c
index 1a5e4b26..8c6c7f41 100644
--- a/test/test-unsigaction.c
+++ b/test/test-unsigaction.c
@@ -1,25 +1,31 @@
-#include <signal.h> /* sigemptyset(), sigaction(), kill(), SIGUSR1 */
-#include <stdlib.h> /* exit() */
-#include <unistd.h> /* getpid() */
-#include <errno.h> /* errno */
-#include <stdio.h> /* fprintf() */
-
-static void mysig_handler(int sig)
-{
-	exit(2);
+#include <signal.h>          /* sigemptyset(), sigaction(), kill(), SIGUSR1 */
+#include <stdlib.h>                                               /* exit() */
+#include <unistd.h>                                             /* getpid() */
+#include <errno.h>                                                 /* errno */
+#include <stdio.h>                                             /* fprintf() */
+
+static void mysig_handler(int sig) {
+
+  exit(2);
+
 }
 
-int main()
-{
-	/* setup sig handler */
-	struct sigaction sa;
-       	sa.sa_handler = mysig_handler;
-	sigemptyset(&sa.sa_mask);
-        sa.sa_flags = 0;	
-	if (sigaction(SIGCHLD, &sa, NULL)) {
-		fprintf(stderr, "could not set signal handler %d, aborted\n", errno);
-		exit(1);
-	}
-	kill(getpid(), SIGCHLD);
-	return 0;
+int main() {
+
+  /* setup sig handler */
+  struct sigaction sa;
+  sa.sa_handler = mysig_handler;
+  sigemptyset(&sa.sa_mask);
+  sa.sa_flags = 0;
+  if (sigaction(SIGCHLD, &sa, NULL)) {
+
+    fprintf(stderr, "could not set signal handler %d, aborted\n", errno);
+    exit(1);
+
+  }
+
+  kill(getpid(), SIGCHLD);
+  return 0;
+
 }
+
diff --git a/test/test.sh b/test/test.sh
index 49dfb1a9..4295d36b 100755
--- a/test/test.sh
+++ b/test/test.sh
@@ -3,10 +3,10 @@
 #
 # Ensure we have: test, type, diff, grep -qE
 #
-test -z "" 2> /dev/null || { echo Error: test command not found ; exit 1 ; }
+test -z "" 2>/dev/null || { echo Error: test command not found ; exit 1 ; }
 GREP=`type grep > /dev/null 2>&1 && echo OK`
 test "$GREP" = OK || { echo Error: grep command not found ; exit 1 ; }
-echo foobar | grep -qE 'asd|oob' 2> /dev/null || { echo Error: grep command does not support -q and/or -E option ; exit 1 ; }
+echo foobar | grep -qE 'asd|oob' 2>/dev/null || { echo Error: grep command does not support -q and/or -E option ; exit 1 ; }
 echo 1 > test.1
 echo 1 > test.2
 OK=OK
@@ -73,7 +73,7 @@ export ASAN_OPTIONS=detect_leaks=0:allocator_may_return_null=1:abort_on_error=1:
 # on OpenBSD we need to work with llvm from /usr/local/bin
 test -e /usr/local/bin/opt && {
   export PATH=/usr/local/bin:${PATH}
-} 
+}
 # on MacOS X we prefer afl-clang over afl-gcc, because
 # afl-gcc does not work there
 test `uname -s` = 'Darwin' -o `uname -s` = 'FreeBSD' && {
@@ -142,11 +142,11 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
       CODE=1
     }
     rm -f test-compcov.harden
-  } || { 
+  } || {
     $ECHO "$RED[!] ${AFL_GCC} hardened mode compilation failed"
     CODE=1
   }
-  # now we want to be sure that afl-fuzz is working  
+  # now we want to be sure that afl-fuzz is working
   # make sure core_pattern is set to core on linux
   (test "$(uname -s)" = "Linux" && test "$(sysctl kernel.core_pattern)" != "kernel.core_pattern = core" && {
     $ECHO "$YELLOW[-] we should not run afl-fuzz with enabled core dumps. Run 'sudo sh afl-system-config'.$RESET"
@@ -163,7 +163,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
     {
       ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain >>errors 2>&1
     } >>errors 2>&1
-    test -n "$( ls out/queue/id:000002* 2> /dev/null )" && {
+    test -n "$( ls out/queue/id:000002* 2>/dev/null )" && {
       $ECHO "$GREEN[+] afl-fuzz is working correctly with ${AFL_GCC}"
     } || {
       echo CUT------------------------------------------------------------------CUT
@@ -185,7 +185,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
     esac
     rm -f in2/in*
     export AFL_QUIET=1
-    if type bash >/dev/null ; then {
+    if command -v bash >/dev/null ; then {
       AFL_PATH=`pwd`/.. ../afl-cmin.bash -m ${MEM_LIMIT} -i in -o in2 -- ./test-instr.plain >/dev/null
       CNT=`ls in2/* 2>/dev/null | wc -l`
       case "$CNT" in
@@ -200,7 +200,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
     }
     fi
     ../afl-tmin -m ${MEM_LIMIT} -i in/in2 -o in2/in2 -- ./test-instr.plain > /dev/null 2>&1
-    SIZE=`ls -l in2/in2 2> /dev/null | awk '{print$5}'`
+    SIZE=`ls -l in2/in2 2>/dev/null | awk '{print$5}'`
     test "$SIZE" = 1 && $ECHO "$GREEN[+] afl-tmin correctly minimized the testcase"
     test "$SIZE" = 1 || {
        $ECHO "$RED[!] afl-tmin did incorrectly minimize the testcase to $SIZE"
@@ -210,20 +210,20 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
     unset AFL_QUIET
   }
   rm -f test-instr.plain
- } || { 
+ } || {
   $ECHO "$YELLOW[-] afl is not compiled, cannot test"
   INCOMPLETE=1
  }
-} || { 
+} || {
  $ECHO "$YELLOW[-] not an intel platform, cannot test afl-gcc"
-} 
+}
 
 $ECHO "$BLUE[*] Testing: llvm_mode, afl-showmap, afl-fuzz, afl-cmin and afl-tmin"
 test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
   # on FreeBSD need to set AFL_CC
   test `uname -s` = 'FreeBSD' && {
     if type clang >/dev/null; then
-      export AFL_CC=`type clang | awk '{print $NF}'`
+      export AFL_CC=`command -v clang`
     else
       export AFL_CC=`$LLVM_CONFIG --bindir`/clang
     fi
@@ -248,7 +248,7 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
           CODE=1
         }
       }
-    } || { 
+    } || {
       $ECHO "$RED[!] llvm_mode instrumentation failed"
       CODE=1
     }
@@ -265,11 +265,11 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
       CODE=1
     }
     rm -f test-compcov.harden
-  } || { 
+  } || {
     $ECHO "$RED[!] llvm_mode hardened mode compilation failed"
     CODE=1
   }
-  # now we want to be sure that afl-fuzz is working  
+  # now we want to be sure that afl-fuzz is working
   (test "$(uname -s)" = "Linux" && test "$(sysctl kernel.core_pattern)" != "kernel.core_pattern = core" && {
     $ECHO "$YELLOW[-] we should not run afl-fuzz with enabled core dumps. Run 'sudo sh afl-system-config'.$RESET"
     true
@@ -286,7 +286,7 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
     {
       ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain >>errors 2>&1
     } >>errors 2>&1
-    test -n "$( ls out/queue/id:000002* 2> /dev/null )" && {
+    test -n "$( ls out/queue/id:000002* 2>/dev/null )" && {
       $ECHO "$GREEN[+] afl-fuzz is working correctly with llvm_mode"
     } || {
       echo CUT------------------------------------------------------------------CUT
@@ -324,7 +324,7 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
       }
       fi
       ../afl-tmin -m ${MEM_LIMIT} -i in/in2 -o in2/in2 -- ./test-instr.plain > /dev/null 2>&1
-      SIZE=`ls -l in2/in2 2> /dev/null | awk '{print$5}'`
+      SIZE=`ls -l in2/in2 2>/dev/null | awk '{print$5}'`
       test "$SIZE" = 1 && $ECHO "$GREEN[+] afl-tmin correctly minimized the testcase"
       test "$SIZE" = 1 || {
          $ECHO "$RED[!] afl-tmin did incorrectly minimize the testcase to $SIZE"
@@ -337,13 +337,13 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
   rm -f test-instr.plain
 
   # now for the special llvm_mode things
-  AFL_LLVM_INSTRIM=1 AFL_LLVM_INSTRIM_LOOPHEAD=1 ../afl-clang-fast -o test-instr.instrim ../test-instr.c > /dev/null 2> test.out
+  AFL_LLVM_INSTRIM=1 AFL_LLVM_INSTRIM_LOOPHEAD=1 ../afl-clang-fast -o test-instr.instrim ../test-instr.c > /dev/null 2>test.out
   test -e test-instr.instrim && {
     TUPLES=`echo 0|../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.instrim 2>&1 | grep Captur | awk '{print$3}'`
     test "$TUPLES" -gt 2 -a "$TUPLES" -lt 5 && {
-      $ECHO "$GREEN[+] llvm_mode Instrim reported $TUPLES instrumented locations which is fine"
+      $ECHO "$GREEN[+] llvm_mode InsTrim reported $TUPLES instrumented locations which is fine"
     } || {
-      $ECHO "$RED[!] llvm_mode Instrim instrumentation produces weird numbers: $TUPLES"
+      $ECHO "$RED[!] llvm_mode InsTrim instrumentation produces weird numbers: $TUPLES"
       CODE=1
     }
     rm -f test-instr.instrim test.out
@@ -351,9 +351,9 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
     $ECHO "$RED[!] llvm_mode InsTrim compilation failed"
     CODE=1
   }
-  AFL_DEBUG=1 AFL_LLVM_LAF_SPLIT_SWITCHES=1 AFL_LLVM_LAF_TRANSFORM_COMPARES=1 AFL_LLVM_LAF_SPLIT_COMPARES=1 ../afl-clang-fast -o test-compcov.compcov test-compcov.c > /dev/null 2> test.out
+  AFL_DEBUG=1 AFL_LLVM_LAF_SPLIT_SWITCHES=1 AFL_LLVM_LAF_TRANSFORM_COMPARES=1 AFL_LLVM_LAF_SPLIT_COMPARES=1 ../afl-clang-fast -o test-compcov.compcov test-compcov.c > test.out 2>&1
   test -e test-compcov.compcov && {
-    grep -Eq " [3-9][0-9] location" test.out && {
+    grep -Eq " [ 12][0-9][0-9] location| [3-9][0-9] location" test.out && {
       $ECHO "$GREEN[+] llvm_mode laf-intel/compcov feature works correctly"
     } || {
       $ECHO "$RED[!] llvm_mode laf-intel/compcov feature failed"
@@ -373,7 +373,7 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
       $ECHO "$RED[!] llvm_mode whitelist feature failed"
       CODE=1
     }
-  } || { 
+  } || {
     $ECHO "$RED[!] llvm_mode whitelist feature compilation failed"
     CODE=1
   }
@@ -401,7 +401,7 @@ test -e ../afl-clang-lto -a -e ../afl-llvm-lto-instrumentation.so && {
   # on FreeBSD need to set AFL_CC
   test `uname -s` = 'FreeBSD' && {
     if type clang >/dev/null; then
-      export AFL_CC=`type clang | awk '{print $NF}'`
+      export AFL_CC=`command -v clang`
     else
       export AFL_CC=`$LLVM_CONFIG --bindir`/clang
     fi
@@ -426,7 +426,7 @@ test -e ../afl-clang-lto -a -e ../afl-llvm-lto-instrumentation.so && {
           CODE=1
         }
       }
-    } || { 
+    } || {
       $ECHO "$RED[!] llvm_mode LTO instrumentation failed"
       CODE=1
     }
@@ -447,7 +447,7 @@ test -e ../afl-clang-lto -a -e ../afl-llvm-lto-instrumentation.so && {
 #      $ECHO "$RED[!] llvm_mode LTO whitelist feature failed"
 #      CODE=1
 #    }
-#  } || { 
+#  } || {
 #    $ECHO "$RED[!] llvm_mode LTO whitelist feature compilation failed"
 #    CODE=1
 #  }
@@ -471,8 +471,9 @@ test -e ../afl-clang-lto -a -e ../afl-llvm-lto-instrumentation.so && {
 }
 
 $ECHO "$BLUE[*] Testing: gcc_plugin"
-export AFL_CC=`type gcc | awk '{print $NF}'`
 test -e ../afl-gcc-fast -a -e ../afl-gcc-rt.o && {
+  SAVE_AFL_CC=${AFL_CC}
+  export AFL_CC=`command -v gcc`
   ../afl-gcc-fast -o test-instr.plain.gccpi ../test-instr.c > /dev/null 2>&1
   AFL_HARDEN=1 ../afl-gcc-fast -o test-compcov.harden.gccpi test-compcov.c > /dev/null 2>&1
   test -e test-instr.plain.gccpi && {
@@ -483,7 +484,7 @@ test -e ../afl-gcc-fast -a -e ../afl-gcc-rt.o && {
       diff test-instr.plain.0 test-instr.plain.1 > /dev/null 2>&1 && {
         $ECHO "$RED[!] gcc_plugin instrumentation should be different on different input but is not"
         CODE=1
-      } || { 
+      } || {
         $ECHO "$GREEN[+] gcc_plugin instrumentation present and working correctly"
         TUPLES=`echo 0|../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-instr.plain.gccpi 2>&1 | grep Captur | awk '{print$3}'`
         test "$TUPLES" -gt 3 -a "$TUPLES" -lt 7 && {
@@ -516,7 +517,7 @@ test -e ../afl-gcc-fast -a -e ../afl-gcc-rt.o && {
     $ECHO "$RED[!] gcc_plugin hardened mode compilation failed"
     CODE=1
   }
-  # now we want to be sure that afl-fuzz is working  
+  # now we want to be sure that afl-fuzz is working
   (test "$(uname -s)" = "Linux" && test "$(sysctl kernel.core_pattern)" != "kernel.core_pattern = core" && {
     $ECHO "$YELLOW[-] we should not run afl-fuzz with enabled core dumps. Run 'sudo sh afl-system-config'.$RESET"
     true
@@ -533,7 +534,7 @@ test -e ../afl-gcc-fast -a -e ../afl-gcc-rt.o && {
     {
       ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain.gccpi >>errors 2>&1
     } >>errors 2>&1
-    test -n "$( ls out/queue/id:000002* 2> /dev/null )" && {
+    test -n "$( ls out/queue/id:000002* 2>/dev/null )" && {
       $ECHO "$GREEN[+] afl-fuzz is working correctly with gcc_plugin"
     } || {
       echo CUT------------------------------------------------------------------CUT
@@ -552,11 +553,11 @@ test -e ../afl-gcc-fast -a -e ../afl-gcc-rt.o && {
   test -e test-compcov && {
     echo 1 | ../afl-showmap -m ${MEM_LIMIT} -o - -r -- ./test-compcov 2>&1 | grep -q "Captured 1 tuples" && {
       $ECHO "$GREEN[+] gcc_plugin whitelist feature works correctly"
-    } || { 
+    } || {
       $ECHO "$RED[!] gcc_plugin whitelist feature failed"
       CODE=1
     }
-  } || { 
+  } || {
     $ECHO "$RED[!] gcc_plugin whitelist feature compilation failed"
     CODE=1
   }
@@ -574,18 +575,21 @@ test -e ../afl-gcc-fast -a -e ../afl-gcc-rt.o && {
     CODE=1
   }
   rm -f test-persistent
+  export AFL_CC=${SAVE_AFL_CC}
 } || {
   $ECHO "$YELLOW[-] gcc_plugin not compiled, cannot test"
   INCOMPLETE=1
 }
 
+test -z "$AFL_CC" && unset AFL_CC
+
 $ECHO "$BLUE[*] Testing: shared library extensions"
-cc -o test-compcov test-compcov.c > /dev/null 2>&1
+cc $CFLAGS -o test-compcov test-compcov.c > /dev/null 2>&1
 test -e ../libtokencap.so && {
   AFL_TOKEN_FILE=token.out LD_PRELOAD=../libtokencap.so DYLD_INSERT_LIBRARIES=../libtokencap.so DYLD_FORCE_FLAT_NAMESPACE=1 ./test-compcov foobar > /dev/null 2>&1
   grep -q BUGMENOT token.out > /dev/null 2>&1 && {
     $ECHO "$GREEN[+] libtokencap did successfully capture tokens"
-  } || { 
+  } || {
     $ECHO "$RED[!] libtokencap did not capture tokens"
     CODE=1
   }
@@ -598,13 +602,13 @@ test -e ../libdislocator.so && {
   {
     ulimit -c 1
     # DYLD_INSERT_LIBRARIES and DYLD_FORCE_FLAT_NAMESPACE is used on Darwin/MacOSX
-    LD_PRELOAD=../libdislocator.so DYLD_INSERT_LIBRARIES=../libdislocator.so DYLD_FORCE_FLAT_NAMESPACE=1 ./test-compcov BUFFEROVERFLOW > test.out 2> /dev/null
+    LD_PRELOAD=../libdislocator.so DYLD_INSERT_LIBRARIES=../libdislocator.so DYLD_FORCE_FLAT_NAMESPACE=1 ./test-compcov BUFFEROVERFLOW > test.out 2>/dev/null
   } > /dev/null 2>&1
   grep -q BUFFEROVERFLOW test.out > /dev/null 2>&1 && {
     $ECHO "$RED[!] libdislocator did not detect the memory corruption"
     CODE=1
   } || {
-    $ECHO "$GREEN[+] libdislocator did successfully detect the memory corruption" 
+    $ECHO "$GREEN[+] libdislocator did successfully detect the memory corruption"
   }
   rm -f test.out core test-compcov.core core.test-compcov
 } || {
@@ -614,10 +618,9 @@ test -e ../libdislocator.so && {
 rm -f test-compcov
 test -e ../libradamsa.so && {
   # on FreeBSD need to set AFL_CC
-
   test `uname -s` = 'FreeBSD' && {
     if type clang >/dev/null; then
-      export AFL_CC=`type clang | awk '{print $NF}'`
+      export AFL_CC=`command -v clang`
     else
       export AFL_CC=`$LLVM_CONFIG --bindir`/clang
     fi
@@ -632,7 +635,7 @@ test -e ../libradamsa.so && {
     {
       ../afl-fuzz -RR -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-instr.plain
     } >>errors 2>&1
-    test -n "$( ls out/queue/id:000001* 2> /dev/null )" && {
+    test -n "$( ls out/queue/id:000001* 2>/dev/null )" && {
       $ECHO "$GREEN[+] libradamsa performs good - and very slow - mutations"
     } || {
       echo CUT------------------------------------------------------------------CUT
@@ -651,6 +654,16 @@ test -e ../libradamsa.so && {
   INCOMPLETE=1
 }
 
+test -z "$AFL_CC" && {
+  if type gcc >/dev/null; then
+    export AFL_CC=gcc
+  else
+    if type clang >/dev/null; then
+      export AFL_CC=clang
+    fi
+  fi
+}
+
 $ECHO "$BLUE[*] Testing: qemu_mode"
 test -e ../afl-qemu-trace && {
   gcc -pie -fPIE -o test-instr ../test-instr.c
@@ -663,7 +676,7 @@ test -e ../afl-qemu-trace && {
       {
         ../afl-fuzz -m ${MEM_LIMIT} -V10 -Q -i in -o out -- ./test-instr >>errors 2>&1
       } >>errors 2>&1
-      test -n "$( ls out/queue/id:000002* 2> /dev/null )" && {
+      test -n "$( ls out/queue/id:000002* 2>/dev/null )" && {
         $ECHO "$GREEN[+] afl-fuzz is working correctly with qemu_mode"
         RUNTIME=`grep execs_done out/fuzzer_stats | awk '{print$3}'`
       } || {
@@ -678,13 +691,13 @@ test -e ../afl-qemu-trace && {
       $ECHO "$GREY[*] running afl-fuzz for qemu_mode AFL_ENTRYPOINT, this will take approx 6 seconds"
       {
         {
-          export AFL_ENTRYPOINT=`expr 0x4$(nm test-instr | grep "T main" | awk '{print $1}' | sed 's/^.......//')`
-          $ECHO AFL_ENTRYPOINT=$AFL_ENTRYPOINT - $(m test-instr | grep "T main") - $(file ./test-instr)
+          export AFL_ENTRYPOINT=`expr 0x4$(nm test-instr | grep "T main" | awk '{print $1}' | sed 's/^.......//' )`
+          $ECHO AFL_ENTRYPOINT=$AFL_ENTRYPOINT - $(nm test-instr | grep "T main") - $(file ./test-instr)
           ../afl-fuzz -m ${MEM_LIMIT} -V2 -Q -i in -o out -- ./test-instr
           unset AFL_ENTRYPOINT
         } >>errors 2>&1
       } >>errors 2>&1
-      test -n "$( ls out/queue/id:000001* 2> /dev/null )" && {
+      test -n "$( ls out/queue/id:000001* 2>/dev/null )" && {
         $ECHO "$GREEN[+] afl-fuzz is working correctly with qemu_mode AFL_ENTRYPOINT"
         RUNTIME=`grep execs_done out/fuzzer_stats | awk '{print$3}'`
       } || {
@@ -700,13 +713,13 @@ test -e ../afl-qemu-trace && {
         test -e ../libcompcov.so && {
           $ECHO "$GREY[*] running afl-fuzz for qemu_mode compcov, this will take approx 10 seconds"
           {
-            export AFL_PRELOAD=../libcompcov.so 
+            export AFL_PRELOAD=../libcompcov.so
             export AFL_COMPCOV_LEVEL=2
             ../afl-fuzz -m ${MEM_LIMIT} -V10 -Q -i in -o out -- ./test-compcov >>errors 2>&1
             unset AFL_PRELOAD
             unset AFL_COMPCOV_LEVEL
           } >>errors 2>&1
-          test -n "$( ls out/queue/id:000001* 2> /dev/null )" && {
+          test -n "$( ls out/queue/id:000001* 2>/dev/null )" && {
             $ECHO "$GREEN[+] afl-fuzz is working correctly with qemu_mode compcov"
           } || {
             echo CUT------------------------------------------------------------------CUT
@@ -720,21 +733,41 @@ test -e ../afl-qemu-trace && {
           INCOMPLETE=1
         }
         rm -f errors
-      } || { 
+      } || {
        $ECHO "$YELLOW[-] not an intel or arm platform, cannot test qemu_mode compcov"
       }
       
       test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc" -o "$SYS" = "aarch64" -o ! "${SYS%%arm*}" && {
+        $ECHO "$GREY[*] running afl-fuzz for qemu_mode cmplog, this will take approx 10 seconds"
+        {
+          ../afl-fuzz -m none -V10 -Q -c 0 -i in -o out -- ./test-compcov >>errors 2>&1
+        } >>errors 2>&1
+        test -n "$( ls out/queue/id:000001* 2>/dev/null )" && {
+          $ECHO "$GREEN[+] afl-fuzz is working correctly with qemu_mode cmplog"
+        } || {
+          echo CUT------------------------------------------------------------------CUT
+          cat errors
+          echo CUT------------------------------------------------------------------CUT
+          $ECHO "$RED[!] afl-fuzz is not working correctly with qemu_mode cmplog"
+          CODE=1
+        }
+        rm -f errors
+      } || {
+       $ECHO "$YELLOW[-] not an intel or arm platform, cannot test qemu_mode cmplog"
+      }
+
+      test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc" -o "$SYS" = "aarch64" -o ! "${SYS%%arm*}" && {
         $ECHO "$GREY[*] running afl-fuzz for persistent qemu_mode, this will take approx 10 seconds"
         {
-          export AFL_QEMU_PERSISTENT_ADDR=`expr 0x4$(nm test-instr | grep "T main" | awk '{print $1}' | sed 's/^.......//')`
+          export AFL_QEMU_PERSISTENT_ADDR=`expr 0x4$(nm test-instr | grep "T main" | awk '{print $1}' | sed 's/^.......//' )`
           export AFL_QEMU_PERSISTENT_GPR=1
           $ECHO "Info: AFL_QEMU_PERSISTENT_ADDR=$AFL_QEMU_PERSISTENT_ADDR <= $(nm test-instr | grep "T main" | awk '{print $1}')"
+          env|grep AFL_|sort
           file test-instr
           ../afl-fuzz -m ${MEM_LIMIT} -V10 -Q -i in -o out -- ./test-instr
           unset AFL_QEMU_PERSISTENT_ADDR
         } >>errors 2>&1
-        test -n "$( ls out/queue/id:000002* 2> /dev/null )" && {
+        test -n "$( ls out/queue/id:000002* 2>/dev/null )" && {
           $ECHO "$GREEN[+] afl-fuzz is working correctly with persistent qemu_mode"
           RUNTIMEP=`grep execs_done out/fuzzer_stats | awk '{print$3}'`
           test -n "$RUNTIME" -a -n "$RUNTIMEP" && {
@@ -756,9 +789,9 @@ test -e ../afl-qemu-trace && {
           exit 1
         }
         rm -rf in out errors
-      } || { 
+      } || {
        $ECHO "$YELLOW[-] not an intel or arm platform, cannot test persistent qemu_mode"
-      } 
+      }
 
       test -e ../qemu_mode/unsigaction/unsigaction32.so && {
         ${AFL_CC} -o test-unsigaction32 -m32 test-unsigaction.c >> errors 2>&1 && {
@@ -823,7 +856,7 @@ test -e ../afl-qemu-trace && {
     $ECHO "$RED[!] gcc compilation of test targets failed - what is going on??"
     CODE=1
   }
-  
+
   rm -f test-instr test-compcov
 } || {
   $ECHO "$YELLOW[-] qemu_mode is not compiled, cannot test"
@@ -835,12 +868,12 @@ test -d ../unicorn_mode/unicornafl && {
   test -e ../unicorn_mode/samples/simple/simple_target.bin -a -e ../unicorn_mode/samples/compcov_x64/compcov_target.bin && {
     {
       # travis workaround
-      PY=`type python | awk '{print $NF}'`
+      PY=`command -v python`
       test "$PY" = "/opt/pyenv/shims/python" -a -x /usr/bin/python && PY=/usr/bin/python
       mkdir -p in
       echo 0 > in/in
       $ECHO "$GREY[*] Using python binary $PY"
-      if ! $PY -c 'import unicornafl' 2> /dev/null ; then
+      if ! $PY -c 'import unicornafl' 2>/dev/null ; then
         $ECHO "$YELLOW[-] we cannot test unicorn_mode because it is not present"
         INCOMPLETE=1
       else
@@ -849,7 +882,7 @@ test -d ../unicorn_mode/unicornafl && {
         {
           ../afl-fuzz -m ${MEM_LIMIT} -V25 -U -i in -o out -d -- "$PY" ../unicorn_mode/samples/simple/simple_test_harness.py @@ >>errors 2>&1
         } >>errors 2>&1
-        test -n "$( ls out/queue/id:000002* 2> /dev/null )" && {
+        test -n "$( ls out/queue/id:000002* 2>/dev/null )" && {
           $ECHO "$GREEN[+] afl-fuzz is working correctly with unicorn_mode"
         } || {
           echo CUT------------------------------------------------------------------CUT
@@ -869,7 +902,7 @@ test -d ../unicorn_mode/unicornafl && {
           ../afl-fuzz -m ${MEM_LIMIT} -V35 -U -i in -o out -d -- "$PY" ../unicorn_mode/samples/compcov_x64/compcov_test_harness.py @@ >>errors 2>&1
           unset AFL_COMPCOV_LEVEL
         } >>errors 2>&1
-        test -n "$( ls out/queue/id:000001* 2> /dev/null )" && {
+        test -n "$( ls out/queue/id:000001* 2>/dev/null )" && {
           $ECHO "$GREEN[+] afl-fuzz is working correctly with unicorn_mode compcov"
         } || {
           echo CUT------------------------------------------------------------------CUT
@@ -886,12 +919,99 @@ test -d ../unicorn_mode/unicornafl && {
     $ECHO "$RED[!] missing sample binaries in unicorn_mode/samples/ - what is going on??"
     CODE=1
   }
-  
+
 } || {
   $ECHO "$YELLOW[-] unicorn_mode is not compiled, cannot test"
   INCOMPLETE=1
 }
 
+$ECHO "$BLUE[*] Testing: custom mutator"
+test "1" = "`../afl-fuzz | grep -i 'without python' >/dev/null; echo $?`" && {
+  test `uname -s` = 'Darwin' && {
+    CUSTOM_MUTATOR_PATH=$( realpath ../examples/custom_mutators )
+  } || {
+    CUSTOM_MUTATOR_PATH=$( readlink -f ../examples/custom_mutators )
+  }
+  test -e test-custom-mutator.c -a -e ${CUSTOM_MUTATOR_PATH}/example.c -a -e ${CUSTOM_MUTATOR_PATH}/example.py && {
+    unset AFL_CC
+    # Compile the vulnerable program
+    ../afl-clang-fast -o test-custom-mutator test-custom-mutator.c > /dev/null 2>&1
+    # Compile the custom mutator
+    make -C ../examples/custom_mutators libexamplemutator.so > /dev/null 2>&1
+    test -e test-custom-mutator -a -e ${CUSTOM_MUTATOR_PATH}/libexamplemutator.so && {
+      # Create input directory
+      mkdir -p in
+      echo "00000" > in/in
+
+      # Run afl-fuzz w/ the C mutator
+      $ECHO "$GREY[*] running afl-fuzz for the C mutator, this will take approx 10 seconds"
+      {
+        AFL_CUSTOM_MUTATOR_LIBRARY=${CUSTOM_MUTATOR_PATH}/libexamplemutator.so ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-custom-mutator >>errors 2>&1
+      } >>errors 2>&1
+
+      # Check results
+      test -n "$( ls out/crashes/id:000000* 2>/dev/null )" && {  # TODO: update here
+        $ECHO "$GREEN[+] afl-fuzz is working correctly with the C mutator"
+      } || {
+        echo CUT------------------------------------------------------------------CUT
+        cat errors
+        echo CUT------------------------------------------------------------------CUT
+        $ECHO "$RED[!] afl-fuzz is not working correctly with the C mutator"
+        CODE=1
+      }
+
+      # Clean
+      rm -rf out errors
+
+      # Run afl-fuzz w/ the Python mutator
+      $ECHO "$GREY[*] running afl-fuzz for the Python mutator, this will take approx 10 seconds"
+      {
+        export PYTHONPATH=${CUSTOM_MUTATOR_PATH}
+        export AFL_PYTHON_MODULE=example
+        ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-custom-mutator >>errors 2>&1
+        unset PYTHONPATH
+        unset AFL_PYTHON_MODULE
+      } >>errors 2>&1
+
+      # Check results
+      test -n "$( ls out/crashes/id:000000* 2>/dev/null )" && {  # TODO: update here
+        $ECHO "$GREEN[+] afl-fuzz is working correctly with the Python mutator"
+      } || {
+        echo CUT------------------------------------------------------------------CUT
+        cat errors
+        echo CUT------------------------------------------------------------------CUT
+        $ECHO "$RED[!] afl-fuzz is not working correctly with the Python mutator"
+        CODE=1
+      }
+
+      # Clean
+      rm -rf in out errors
+      rm -rf ${CUSTOM_MUTATOR_PATH}/__pycache__/
+    } || {
+      ls .
+      ls ${CUSTOM_MUTATOR_PATH}
+      $ECHO "$RED[!] cannot compile the test program or the custom mutator"
+      CODE=1
+    }
+
+    #test "$CODE" = 1 && { $ECHO "$YELLOW[!] custom mutator tests currently will not fail travis" ; CODE=0 ; }
+
+    make -C ../examples/custom_mutators clean > /dev/null 2>&1
+    rm -f test-custom-mutator
+  } || {
+    $ECHO "$YELLOW[-] no custom mutators in $CUSTOM_MUTATOR_PATH, cannot test"
+    INCOMPLETE=1
+  }
+  unset CUSTOM_MUTATOR_PATH
+} || {
+  $ECHO "$YELLOW[-] no python support in afl-fuzz, cannot test"
+  INCOMPLETE=1
+}
+
+$ECHO "$BLUE[*] Execution cmocka Unit-Tests $GREY"
+unset AFL_CC
+make -C .. unit || "$CODE" = "1"
+
 $ECHO "$GREY[*] all test cases completed.$RESET"
 test "$INCOMPLETE" = "0" && $ECHO "$GREEN[+] all test cases executed"
 test "$INCOMPLETE" = "1" && $ECHO "$YELLOW[-] not all test cases were executed"
diff --git a/test/unittests/unit_list.c b/test/unittests/unit_list.c
new file mode 100644
index 00000000..11d3227c
--- /dev/null
+++ b/test/unittests/unit_list.c
@@ -0,0 +1,128 @@
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <assert.h>
+#include <cmocka.h>
+/* cmocka < 1.0 didn't support these features we need */
+#ifndef assert_ptr_equal
+#define assert_ptr_equal(a, b) \
+    _assert_int_equal(cast_ptr_to_largest_integral_type(a), \
+                      cast_ptr_to_largest_integral_type(b), \
+                      __FILE__, __LINE__)
+#define CMUnitTest UnitTest
+#define cmocka_unit_test unit_test
+#define cmocka_run_group_tests(t, setup, teardown) run_tests(t)
+#endif
+
+
+extern void mock_assert(const int result, const char* const expression,
+                        const char * const file, const int line);
+#undef assert
+#define assert(expression) \
+    mock_assert((int)(expression), #expression, __FILE__, __LINE__);
+
+#include "list.h"
+
+/* remap exit -> assert, then use cmocka's mock_assert
+    (compile with `--wrap=exit`) */
+extern void exit(int status);
+extern void __real_exit(int status);
+void __wrap_exit(int status) {
+    assert(0);
+}
+
+/* ignore all printfs */
+extern int printf(const char *format, ...);
+extern int __real_printf(const char *format, ...);
+int __wrap_printf(const char *format, ...) {
+    return 1;
+}
+
+list_t testlist;
+
+static void test_contains(void **state) {
+
+    u32 one = 1;
+    u32 two = 2;
+
+    list_append(&testlist, &one);
+    assert_true(list_contains(&testlist, &one));
+    assert_false(list_contains(&testlist, &two));
+    list_remove(&testlist, &one);
+    assert_false(list_contains(&testlist, &one));
+}
+
+static void test_foreach(void **state) {
+
+    u32 one = 1;
+    u32 two = 2;
+    u32 result = 0;
+
+    list_append(&testlist, &one);
+    list_append(&testlist, &two);
+    list_append(&testlist, &one);
+
+    /* The list is for pointers, so int doesn't work as type directly */
+    LIST_FOREACH(&testlist, u32, {
+        result += *el;
+    });
+
+    assert_int_equal(result, 4);
+
+}
+
+static void test_long_list(void **state) {
+
+    u32 result1 = 0;
+    u32 result2 = 0;
+    u32 i;
+
+    u32 vals[100];
+
+    for (i = 0; i < 100; i++) {
+        vals[i] = i;
+    }
+
+    LIST_FOREACH_CLEAR(&testlist, void, {});
+    for (i = 0; i < 100; i++) {
+        list_append(&testlist, &vals[i]);
+    }
+    LIST_FOREACH(&testlist, u32, {
+        result1 += *el;
+    });
+    //printf("removing %d\n", vals[50]);
+    list_remove(&testlist, &vals[50]);
+
+    LIST_FOREACH(&testlist, u32, {
+        // printf("var: %d\n", *el);
+        result2 += *el;
+    });
+    assert_int_not_equal(result1, result2);
+    assert_int_equal(result1, result2 + 50);
+
+    result1 = 0;
+    LIST_FOREACH_CLEAR(&testlist, u32, {
+        result1 += *el;
+    });
+    assert_int_equal(result1, result2);
+
+    result1 = 0;
+    LIST_FOREACH(&testlist, u32, {
+        result1 += *el;
+    });
+    assert_int_equal(result1, 0);
+
+}
+
+int main(int argc, char **argv) {
+
+	const struct CMUnitTest tests[] = {
+        cmocka_unit_test(test_contains),
+        cmocka_unit_test(test_foreach),
+        cmocka_unit_test(test_long_list),
+	};
+
+    //return cmocka_run_group_tests (tests, setup, teardown);
+    return cmocka_run_group_tests (tests, NULL, NULL);
+
+}
diff --git a/test/unittests/unit_maybe_alloc.c b/test/unittests/unit_maybe_alloc.c
new file mode 100644
index 00000000..d9c037a0
--- /dev/null
+++ b/test/unittests/unit_maybe_alloc.c
@@ -0,0 +1,158 @@
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <assert.h>
+#include <cmocka.h>
+/* cmocka < 1.0 didn't support these features we need */
+#ifndef assert_ptr_equal
+#define assert_ptr_equal(a, b) \
+    _assert_int_equal(cast_ptr_to_largest_integral_type(a), \
+                      cast_ptr_to_largest_integral_type(b), \
+                      __FILE__, __LINE__)
+#define CMUnitTest UnitTest
+#define cmocka_unit_test unit_test
+#define cmocka_run_group_tests(t, setup, teardown) run_tests(t)
+#endif
+
+
+extern void mock_assert(const int result, const char* const expression,
+                        const char * const file, const int line);
+#undef assert
+#define assert(expression) \
+    mock_assert((int)(expression), #expression, __FILE__, __LINE__);
+#include "alloc-inl.h"
+
+/* remap exit -> assert, then use cmocka's mock_assert
+    (compile with `--wrap=exit`) */
+extern void exit(int status);
+extern void __real_exit(int status);
+void __wrap_exit(int status) {
+    assert(0);
+}
+
+/* ignore all printfs */
+extern int printf(const char *format, ...);
+extern int __real_printf(const char *format, ...);
+int __wrap_printf(const char *format, ...) {
+    return 1;
+}
+
+#define BUF_PARAMS (void **)&buf, &size
+
+/*
+static int setup(void **state) {
+
+    return 0;
+
+}
+*/
+
+static void test_null_allocs(void **state) {
+
+    void *buf = NULL;
+    size_t size = 0;
+    void *ptr = ck_maybe_grow(BUF_PARAMS, 100);
+    assert_true(buf == ptr);
+    assert_true(size >= 100);
+    ck_free(ptr);
+
+}
+
+static void test_nonpow2_size(void **state) {
+
+    char *buf = ck_alloc(150);
+    size_t size = 150;
+    buf[140] = '5';
+    char *ptr = ck_maybe_grow(BUF_PARAMS, 160);
+    assert_ptr_equal(buf, ptr);
+    assert_true(size >= 160);
+    assert_true(buf[140] == '5');
+    ck_free(ptr);
+
+}
+
+static void test_zero_size(void **state) {
+
+    char *buf = NULL;
+    size_t size = 0;
+    assert_non_null(maybe_grow(BUF_PARAMS, 0));
+    free(buf);
+    buf = NULL;
+    size = 0;
+
+    char *ptr = ck_maybe_grow(BUF_PARAMS, 100);
+    assert_non_null(ptr);
+    assert_ptr_equal(buf, ptr);
+    assert_true(size >= 100);
+
+    expect_assert_failure(ck_maybe_grow(BUF_PARAMS, 0));
+
+    ck_free(ptr);
+
+}
+
+static void test_unchanged_size(void **state) {
+
+    void *buf = ck_alloc(100);
+    size_t size = 100;
+    void *buf_before = buf;
+    void *buf_after = ck_maybe_grow(BUF_PARAMS, 100);
+    assert_ptr_equal(buf, buf_after);
+    assert_ptr_equal(buf_after, buf_before);
+    ck_free(buf);
+
+}
+
+static void test_grow_multiple(void **state) {
+
+    char *buf = NULL;
+    size_t size = 0;
+
+    char *ptr = ck_maybe_grow(BUF_PARAMS, 100);
+    assert_ptr_equal(ptr, buf);
+    assert_true(size >= 100);
+    assert_int_equal(size, next_pow2(size));
+    buf[50] = '5';
+
+    ptr = (char *)ck_maybe_grow(BUF_PARAMS, 1000);
+    assert_ptr_equal(ptr, buf);
+    assert_true(size >= 100);
+    assert_int_equal(size, next_pow2(size));
+    buf[500] = '5';
+
+    ptr = (char *)ck_maybe_grow(BUF_PARAMS, 10000);
+    assert_ptr_equal(ptr, buf);
+    assert_true(size >= 10000);
+    assert_int_equal(size, next_pow2(size));
+    buf[5000] = '5';
+
+    assert_int_equal(buf[50], '5');
+    assert_int_equal(buf[500], '5');
+    assert_int_equal(buf[5000], '5');
+
+    ck_free(buf);
+
+}
+
+/*
+static int teardown(void **state) {
+
+    return 0;
+
+}
+*/
+
+int main(int argc, char **argv) {
+
+	const struct CMUnitTest tests[] = {
+		cmocka_unit_test(test_null_allocs),
+		cmocka_unit_test(test_nonpow2_size),
+		cmocka_unit_test(test_zero_size),
+        cmocka_unit_test(test_unchanged_size),
+        cmocka_unit_test(test_grow_multiple),
+	};
+
+    //return cmocka_run_group_tests (tests, setup, teardown);
+    return cmocka_run_group_tests (tests, NULL, NULL);
+
+}
diff --git a/test/unittests/unit_preallocable.c b/test/unittests/unit_preallocable.c
new file mode 100644
index 00000000..8cd36165
--- /dev/null
+++ b/test/unittests/unit_preallocable.c
@@ -0,0 +1,111 @@
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <assert.h>
+#include <cmocka.h>
+/* cmocka < 1.0 didn't support these features we need */
+#ifndef assert_ptr_equal
+#define assert_ptr_equal(a, b) \
+    _assert_int_equal(cast_ptr_to_largest_integral_type(a), \
+                      cast_ptr_to_largest_integral_type(b), \
+                      __FILE__, __LINE__)
+#define CMUnitTest UnitTest
+#define cmocka_unit_test unit_test
+#define cmocka_run_group_tests(t, setup, teardown) run_tests(t)
+#endif
+
+
+extern void mock_assert(const int result, const char* const expression,
+                        const char * const file, const int line);
+#undef assert
+#define assert(expression) \
+    mock_assert((int)(expression), #expression, __FILE__, __LINE__);
+
+#include "afl-prealloc.h"
+
+/* remap exit -> assert, then use cmocka's mock_assert
+    (compile with `--wrap=exit`) */
+extern void exit(int status);
+extern void __real_exit(int status);
+void __wrap_exit(int status) {
+    assert(0);
+}
+
+/* ignore all printfs */
+extern int printf(const char *format, ...);
+extern int __real_printf(const char *format, ...);
+int __wrap_printf(const char *format, ...) {
+    return 1;
+}
+
+typedef struct prealloc_me
+{
+    PREALLOCABLE;
+
+    u8 *content[128];
+
+} prealloc_me_t;
+
+#define PREALLOCED_BUF_SIZE (64)
+prealloc_me_t prealloc_me_buf[PREALLOCED_BUF_SIZE];
+size_t prealloc_me_size = 0;
+
+static void test_alloc_free(void **state) {
+
+    prealloc_me_t *prealloced = NULL;
+    PRE_ALLOC(prealloced, prealloc_me_buf, PREALLOCED_BUF_SIZE, prealloc_me_size);
+    assert_non_null(prealloced);
+    PRE_FREE(prealloced, prealloc_me_size);
+
+}
+
+static void test_prealloc_overflow(void **state) {
+
+    u32 i = 0;
+    prealloc_me_t *prealloced[PREALLOCED_BUF_SIZE + 10];
+
+    for (i = 0; i < PREALLOCED_BUF_SIZE + 10; i++) {
+
+        PRE_ALLOC(prealloced[i], prealloc_me_buf, PREALLOCED_BUF_SIZE, prealloc_me_size);
+        assert_non_null(prealloced[i]);
+
+    }
+    assert_int_equal(prealloced[0]->pre_status,  PRE_STATUS_USED);
+    assert_int_equal(prealloced[PREALLOCED_BUF_SIZE]->pre_status,  PRE_STATUS_MALLOC);
+
+    PRE_FREE(prealloced[20], prealloc_me_size);
+    PRE_ALLOC(prealloced[20], prealloc_me_buf, PREALLOCED_BUF_SIZE, prealloc_me_size);
+    assert_non_null(prealloced[20]);
+    assert_int_equal(prealloced[20]->pre_status,  PRE_STATUS_USED);
+
+    PRE_FREE(prealloced[PREALLOCED_BUF_SIZE], prealloc_me_size);
+    PRE_FREE(prealloced[0], prealloc_me_size);
+    PRE_ALLOC(prealloced[PREALLOCED_BUF_SIZE], prealloc_me_buf, PREALLOCED_BUF_SIZE, prealloc_me_size);
+    assert_non_null(prealloced[PREALLOCED_BUF_SIZE]);
+    /* there should be space now! */
+    assert_int_equal(prealloced[PREALLOCED_BUF_SIZE]->pre_status,  PRE_STATUS_USED);
+
+    PRE_ALLOC(prealloced[0], prealloc_me_buf, PREALLOCED_BUF_SIZE, prealloc_me_size);
+    assert_non_null(prealloced[0]);
+    /* no more space */
+    assert_int_equal(prealloced[0]->pre_status,  PRE_STATUS_MALLOC);
+
+    for (i = 0; i < PREALLOCED_BUF_SIZE + 10; i++) {
+
+        PRE_FREE(prealloced[i], prealloc_me_size);
+
+    }
+
+}
+
+int main(int argc, char **argv) {
+
+	const struct CMUnitTest tests[] = {
+		cmocka_unit_test(test_alloc_free),
+		cmocka_unit_test(test_prealloc_overflow),
+	};
+
+    //return cmocka_run_group_tests (tests, setup, teardown);
+    return cmocka_run_group_tests (tests, NULL, NULL);
+
+}
diff --git a/unicorn_mode/build_unicorn_support.sh b/unicorn_mode/build_unicorn_support.sh
index 7b82055c..df2a7f6d 100755..100644
--- a/unicorn_mode/build_unicorn_support.sh
+++ b/unicorn_mode/build_unicorn_support.sh
@@ -4,7 +4,7 @@
 # ------------------------------------------------
 #
 # Originally written by Nathan Voss <njvoss99@gmail.com>
-# 
+#
 # Adapted from code by Andrew Griffiths <agriffiths@google.com> and
 #                      Michal Zalewski
 #
@@ -24,7 +24,7 @@
 #
 # This script downloads, patches, and builds a version of Unicorn with
 # minor tweaks to allow Unicorn-emulated binaries to be run under
-# afl-fuzz. 
+# afl-fuzz.
 #
 # The modifications reside in patches/*. The standalone Unicorn library
 # will be written to /usr/lib/libunicornafl.so, and the Python bindings
@@ -46,7 +46,7 @@ if [ ! "$PLT" = "Linux" ] && [ ! "$PLT" = "Darwin" ] && [ ! "$PLT" = "FreeBSD" ]
 
   echo "[-] Error: Unicorn instrumentation is unsupported on $PLT."
   exit 1
-  
+
 fi
 
 if [ ! -f "../config.h" ]; then
@@ -75,6 +75,7 @@ fi
 if [ "$PLT" = "Darwin" ]; then
   CORES=`sysctl -n hw.ncpu`
   TARCMD=tar
+  PYTHONBIN=python3
 fi
 
 if [ "$PLT" = "FreeBSD" ]; then
@@ -90,26 +91,40 @@ if [ "$PLT" = "NetBSD" ] || [ "$PLT" = "OpenBSD" ]; then
   TARCMD=gtar
 fi
 
+PREREQ_NOTFOUND=
 for i in $PYTHONBIN automake autoconf git $MAKECMD $TARCMD; do
 
-  T=`which "$i" 2>/dev/null`
+  T=`command -v "$i" 2>/dev/null`
 
   if [ "$T" = "" ]; then
 
     echo "[-] Error: '$i' not found. Run 'sudo apt-get install $i' or similar."
-    exit 1
+    PREREQ_NOTFOUND=1
 
   fi
 
 done
 
-if ! which $EASY_INSTALL > /dev/null; then
+if ! type $EASY_INSTALL > /dev/null; then
+
+  # work around for installs with executable easy_install
+  EASY_INSTALL_FOUND=0
+  MYPYTHONPATH=`python -v </dev/null 2>&1 >/dev/null | sed -n -e '/^# \/.*\/os.py/{ s/.*matches //; s/os.py$//; p}'`
+  for PATHCANDIDATE in \
+        "dist-packages/" \
+        "site-packages/"
+  do
+    if [ -e "${MYPYTHONPATH}/${PATHCANDIDATE}/easy_install.py" ] ; then
+
+      EASY_INSTALL_FOUND=1
+      break
 
-  # work around for unusual installs
-  if [ '!' -e /usr/lib/python2.7/dist-packages/easy_install.py ] && [ '!' -e /usr/local/lib/python2.7/dist-packages/easy_install.py ] && [ '!' -e /usr/pkg/lib/python2.7/dist-packages/easy_install.py ]; then
+    fi
+  done
+  if [ '!' $EASY_INSTALL_FOUND ]; then
 
     echo "[-] Error: Python setup-tools not found. Run 'sudo apt-get install python-setuptools'."
-    exit 1
+    PREREQ_NOTFOUND=1
 
   fi
 
@@ -118,10 +133,14 @@ fi
 if echo "$CC" | grep -qF /afl-; then
 
   echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool."
-  exit 1
+  PREREQ_NOTFOUND=1
 
 fi
 
+if [ "$PREREQ_NOTFOUND" = "1" ]; then
+  exit 1
+fi
+
 echo "[+] All checks passed!"
 
 echo "[*] Making sure unicornafl is checked out"
@@ -176,11 +195,11 @@ cd ../samples/simple || exit 1
 
 # Run afl-showmap on the sample application. If anything comes out then it must have worked!
 unset AFL_INST_RATIO
-echo 0 | ../../../afl-showmap -U -m none -q -o .test-instr0 -- $PYTHONBIN simple_test_harness.py ./sample_inputs/sample1.bin || exit 1
+echo 0 | ../../../afl-showmap -U -m none -t 2000 -q -o .test-instr0 -- $PYTHONBIN simple_test_harness.py ./sample_inputs/sample1.bin || exit 1
 
 if [ -s .test-instr0 ]
 then
-  
+
   echo "[+] Instrumentation tests passed. "
   echo '[+] Make sure to adapt older scripts to `import unicornafl` and use `uc.afl_forkserver_start`'
   echo '    or `uc.afl_fuzz` to kick off fuzzing.'
diff --git a/unicorn_mode/samples/c/Makefile b/unicorn_mode/samples/c/Makefile
index fe100490..cb491e10 100644
--- a/unicorn_mode/samples/c/Makefile
+++ b/unicorn_mode/samples/c/Makefile
@@ -1,8 +1,10 @@
 # UnicornAFL Usage
 # Original Unicorn Example Makefile by Nguyen Anh Quynh <aquynh@gmail.com>, 2015
 # Adapted for AFL++ by domenukk <domenukk@gmail.com>, 2020
-
-UNAME_S := $(shell uname -s)
+.POSIX:
+UNAME_S =$(shell uname -s)# GNU make
+UNAME_S:sh=uname -s       # BSD make
+_UNIQ=_QINU_
 
 LIBDIR = ../../unicornafl
 BIN_EXT =
@@ -14,29 +16,32 @@ V ?= 0
 CFLAGS += -Wall -Werror -I../../unicornafl/include
 
 LDFLAGS += -L$(LIBDIR) -lpthread -lm
-ifeq ($(UNAME_S), Linux)
-LDFLAGS += -lrt
-endif
 
-ifneq ($(CROSS),)
-CC = $(CROSS)gcc
-endif
+_LRT = $(_UNIQ)$(UNAME_S:Linux=)
+__LRT = $(_LRT:$(_UNIQ)=-lrt)
+LRT = $(__LRT:$(_UNIQ)=)
+
+LDFLAGS += $(LRT)
+
+_CC = $(_UNIQ)$(CROSS)
+__CC = $(_CC:$(_UNIQ)=$(CC))
+MYCC = $(__CC:$(_UNIQ)$(CROSS)=$(CROSS)gcc)
 
 .PHONY: all clean
 
 all: harness
 
-clean:		
+clean:
 	rm -rf *.o harness harness-debug
 
 harness.o: harness.c ../../unicornafl/include/unicorn/*.h
-	${CC} ${CFLAGS} -O3 -c $<
+	${MYCC} ${CFLAGS} -O3 -c harness.c
 
 harness-debug.o: harness.c ../../unicornafl/include/unicorn/*.h
-	${CC} ${CFLAGS} -g -c $< -o $@
+	${MYCC} ${CFLAGS} -g -c harness.c -o $@
 
 harness: harness.o
-	${CC} -L${LIBDIR} $< ../../unicornafl/libunicornafl.a $(LDFLAGS) -o $@
+	${MYCC} -L${LIBDIR} harness.o ../../unicornafl/libunicornafl.a $(LDFLAGS) -o $@
 
 debug: harness-debug.o
-	${CC} -L${LIBDIR} $< ../../unicornafl/libunicornafl.a $(LDFLAGS) -o harness-debug
+	${MYCC} -L${LIBDIR} harness.o ../../unicornafl/libunicornafl.a $(LDFLAGS) -o harness-debug
diff --git a/unicorn_mode/samples/persistent/Makefile b/unicorn_mode/samples/persistent/Makefile
index fe100490..cb491e10 100644
--- a/unicorn_mode/samples/persistent/Makefile
+++ b/unicorn_mode/samples/persistent/Makefile
@@ -1,8 +1,10 @@
 # UnicornAFL Usage
 # Original Unicorn Example Makefile by Nguyen Anh Quynh <aquynh@gmail.com>, 2015
 # Adapted for AFL++ by domenukk <domenukk@gmail.com>, 2020
-
-UNAME_S := $(shell uname -s)
+.POSIX:
+UNAME_S =$(shell uname -s)# GNU make
+UNAME_S:sh=uname -s       # BSD make
+_UNIQ=_QINU_
 
 LIBDIR = ../../unicornafl
 BIN_EXT =
@@ -14,29 +16,32 @@ V ?= 0
 CFLAGS += -Wall -Werror -I../../unicornafl/include
 
 LDFLAGS += -L$(LIBDIR) -lpthread -lm
-ifeq ($(UNAME_S), Linux)
-LDFLAGS += -lrt
-endif
 
-ifneq ($(CROSS),)
-CC = $(CROSS)gcc
-endif
+_LRT = $(_UNIQ)$(UNAME_S:Linux=)
+__LRT = $(_LRT:$(_UNIQ)=-lrt)
+LRT = $(__LRT:$(_UNIQ)=)
+
+LDFLAGS += $(LRT)
+
+_CC = $(_UNIQ)$(CROSS)
+__CC = $(_CC:$(_UNIQ)=$(CC))
+MYCC = $(__CC:$(_UNIQ)$(CROSS)=$(CROSS)gcc)
 
 .PHONY: all clean
 
 all: harness
 
-clean:		
+clean:
 	rm -rf *.o harness harness-debug
 
 harness.o: harness.c ../../unicornafl/include/unicorn/*.h
-	${CC} ${CFLAGS} -O3 -c $<
+	${MYCC} ${CFLAGS} -O3 -c harness.c
 
 harness-debug.o: harness.c ../../unicornafl/include/unicorn/*.h
-	${CC} ${CFLAGS} -g -c $< -o $@
+	${MYCC} ${CFLAGS} -g -c harness.c -o $@
 
 harness: harness.o
-	${CC} -L${LIBDIR} $< ../../unicornafl/libunicornafl.a $(LDFLAGS) -o $@
+	${MYCC} -L${LIBDIR} harness.o ../../unicornafl/libunicornafl.a $(LDFLAGS) -o $@
 
 debug: harness-debug.o
-	${CC} -L${LIBDIR} $< ../../unicornafl/libunicornafl.a $(LDFLAGS) -o harness-debug
+	${MYCC} -L${LIBDIR} harness.o ../../unicornafl/libunicornafl.a $(LDFLAGS) -o harness-debug