about summary refs log tree commit diff
diff options
context:
space:
mode:
-rwxr-xr-x.custom-format.py63
-rw-r--r--.github/workflows/build_aflplusplus_docker.yaml27
-rw-r--r--.github/workflows/rust_custom_mutator.yml30
-rw-r--r--.gitignore1
-rw-r--r--Dockerfile10
-rw-r--r--GNUmakefile29
-rw-r--r--GNUmakefile.llvm11
-rw-r--r--README.md162
-rw-r--r--TODO.md14
-rwxr-xr-xafl-cmin24
-rwxr-xr-xafl-cmin.bash1
-rwxr-xr-xafl-plot11
-rwxr-xr-xafl-system-config20
-rw-r--r--custom_mutators/honggfuzz/common.h0
-rw-r--r--custom_mutators/honggfuzz/honggfuzz.h9
-rw-r--r--custom_mutators/honggfuzz/input.h6
l---------custom_mutators/honggfuzz/libhfcommon1
-rw-r--r--custom_mutators/honggfuzz/libhfcommon/common.h3
l---------custom_mutators/honggfuzz/libhfcommon/log.h (renamed from custom_mutators/honggfuzz/log.h)0
l---------custom_mutators/honggfuzz/libhfcommon/util.h (renamed from custom_mutators/honggfuzz/util.h)0
-rw-r--r--custom_mutators/honggfuzz/mangle.c1174
-rw-r--r--custom_mutators/rust/.gitignore10
-rw-r--r--custom_mutators/rust/Cargo.toml8
-rw-r--r--custom_mutators/rust/README.md11
-rw-r--r--custom_mutators/rust/custom_mutator-sys/Cargo.toml12
-rw-r--r--custom_mutators/rust/custom_mutator-sys/build.rs42
-rw-r--r--custom_mutators/rust/custom_mutator-sys/src/lib.rs5
-rw-r--r--custom_mutators/rust/custom_mutator-sys/wrapper.h4
-rw-r--r--custom_mutators/rust/custom_mutator/Cargo.toml13
-rw-r--r--custom_mutators/rust/custom_mutator/src/lib.rs740
-rw-r--r--custom_mutators/rust/example/Cargo.toml15
-rw-r--r--custom_mutators/rust/example/src/example_mutator.rs50
-rw-r--r--custom_mutators/rust/example_lain/Cargo.toml16
-rw-r--r--custom_mutators/rust/example_lain/rust-toolchain1
-rw-r--r--custom_mutators/rust/example_lain/src/lain_mutator.rs61
-rw-r--r--docs/Changelog.md113
-rw-r--r--docs/custom_mutators.md5
-rw-r--r--docs/docs.md122
-rw-r--r--docs/env_variables.md36
-rw-r--r--docs/ideas.md36
-rw-r--r--docs/rpc_statsd.md8
-rw-r--r--dynamic_list.txt37
-rw-r--r--include/afl-fuzz.h13
-rw-r--r--include/common.h10
-rw-r--r--include/config.h46
-rw-r--r--include/coverage-32.h2
-rw-r--r--include/coverage-64.h4
-rw-r--r--include/envs.h13
-rw-r--r--instrumentation/LLVMInsTrim.so.cc33
-rw-r--r--instrumentation/README.cmplog.md10
-rw-r--r--instrumentation/README.ctx.md22
-rw-r--r--instrumentation/README.gcc_plugin.md61
-rw-r--r--instrumentation/README.instrument_list.md4
-rw-r--r--instrumentation/README.lto.md31
-rw-r--r--instrumentation/README.neverzero.md2
-rw-r--r--instrumentation/README.ngram.md4
-rw-r--r--instrumentation/README.out_of_line.md8
-rw-r--r--instrumentation/README.persistent_mode.md26
-rw-r--r--instrumentation/SanitizerCoverageLTO.so.cc33
-rw-r--r--instrumentation/SanitizerCoveragePCGUARD.so.cc246
-rw-r--r--instrumentation/afl-compiler-rt.o.c368
-rw-r--r--instrumentation/afl-llvm-common.cc29
-rw-r--r--instrumentation/afl-llvm-dict2file.so.cc26
-rw-r--r--instrumentation/afl-llvm-lto-instrumentation.so.cc30
-rw-r--r--instrumentation/afl-llvm-pass.so.cc191
-rw-r--r--instrumentation/cmplog-instructions-pass.cc181
-rw-r--r--instrumentation/compare-transform-pass.so.cc62
-rw-r--r--instrumentation/llvm-alternative-coverage.h (renamed from instrumentation/llvm-ngram-coverage.h)3
-rw-r--r--instrumentation/split-compares-pass.so.cc57
-rw-r--r--qemu_mode/QEMUAFL_VERSION2
-rw-r--r--qemu_mode/README.md29
-rwxr-xr-xqemu_mode/build_qemu_support.sh109
-rw-r--r--qemu_mode/libcompcov/libcompcov.so.c17
-rw-r--r--qemu_mode/libqasan/README.md23
-rw-r--r--qemu_mode/libqasan/dlmalloc.c3191
-rw-r--r--qemu_mode/libqasan/hooks.c86
-rw-r--r--qemu_mode/libqasan/libqasan.c2
-rw-r--r--qemu_mode/libqasan/malloc.c80
-rw-r--r--qemu_mode/libqasan/uninstrument.c2
m---------qemu_mode/qemuafl0
-rw-r--r--src/afl-analyze.c61
-rw-r--r--src/afl-as.c2
-rw-r--r--src/afl-cc.c290
-rw-r--r--src/afl-common.c568
-rw-r--r--src/afl-forkserver.c124
-rw-r--r--src/afl-fuzz-bitmap.c11
-rw-r--r--src/afl-fuzz-extras.c151
-rw-r--r--src/afl-fuzz-init.c120
-rw-r--r--src/afl-fuzz-mutators.c2
-rw-r--r--src/afl-fuzz-one.c21
-rw-r--r--src/afl-fuzz-queue.c69
-rw-r--r--src/afl-fuzz-redqueen.c291
-rw-r--r--src/afl-fuzz-run.c20
-rw-r--r--src/afl-fuzz-state.c9
-rw-r--r--src/afl-fuzz-stats.c165
-rw-r--r--src/afl-fuzz-statsd.c63
-rw-r--r--src/afl-fuzz.c389
-rw-r--r--src/afl-sharedmem.c12
-rw-r--r--src/afl-showmap.c114
-rw-r--r--src/afl-tmin.c63
-rw-r--r--test-instr.c4
-rwxr-xr-xtest/test-basic.sh6
-rwxr-xr-xtest/test-custom-mutators.sh12
-rw-r--r--test/test-dlopen.c23
-rwxr-xr-xtest/test-libextensions.sh10
-rwxr-xr-xtest/test-llvm.sh46
-rwxr-xr-xtest/test-qemu-mode.sh19
-rwxr-xr-xtest/test-unicorn-mode.sh2
-rw-r--r--test/travis/bionic/Dockerfile45
-rw-r--r--test/travis/focal/Dockerfile45
-rw-r--r--test/travis/trusty/Dockerfile49
-rw-r--r--test/travis/xenial/Dockerfile46
-rw-r--r--unicorn_mode/README.md55
-rw-r--r--unicorn_mode/UNICORNAFL_VERSION2
-rwxr-xr-xunicorn_mode/build_unicorn_support.sh6
-rw-r--r--unicorn_mode/helper_scripts/ida_context_loader.py197
-rw-r--r--unicorn_mode/helper_scripts/unicorn_dumper_gdb.py108
-rw-r--r--unicorn_mode/helper_scripts/unicorn_dumper_ida.py207
-rw-r--r--unicorn_mode/helper_scripts/unicorn_dumper_lldb.py241
-rw-r--r--unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py143
-rw-r--r--unicorn_mode/samples/c/COMPILE.md2
-rw-r--r--unicorn_mode/samples/compcov_x64/compcov_test_harness.py76
-rw-r--r--unicorn_mode/samples/simple/simple_test_harness.py79
-rw-r--r--unicorn_mode/samples/simple/simple_test_harness_alt.py100
-rwxr-xr-x[-rw-r--r--]unicorn_mode/samples/speedtest/get_offsets.py0
-rw-r--r--unicorn_mode/samples/speedtest/python/harness.py6
m---------unicorn_mode/unicornafl0
-rw-r--r--utils/afl_network_proxy/GNUmakefile3
-rw-r--r--utils/afl_network_proxy/afl-network-server.c33
-rw-r--r--utils/afl_untracer/afl-untracer.c10
-rw-r--r--utils/afl_untracer/ida_get_patchpoints.py17
-rw-r--r--utils/aflpp_driver/aflpp_driver.c16
-rwxr-xr-xutils/crash_triage/triage_crashes.sh9
-rw-r--r--utils/custom_mutators/XmlMutatorMin.py110
-rw-r--r--utils/custom_mutators/common.py12
-rw-r--r--utils/custom_mutators/example.py13
-rw-r--r--utils/custom_mutators/simple-chunk-replace.py16
-rw-r--r--utils/custom_mutators/wrapper_afl_min.py13
-rw-r--r--utils/libdislocator/libdislocator.so.c12
-rw-r--r--utils/persistent_mode/persistent_demo_new.c2
140 files changed, 7555 insertions, 4782 deletions
diff --git a/.custom-format.py b/.custom-format.py
index fad74a69..346e4b07 100755
--- a/.custom-format.py
+++ b/.custom-format.py
@@ -33,13 +33,13 @@ if CLANG_FORMAT_BIN is None:
         o, _ = p.communicate()
         o = str(o, "utf-8")
         o = re.sub(r".*ersion ", "", o)
-        #o = o[len("clang-format version "):].strip()
-        o = o[:o.find(".")]
+        # o = o[len("clang-format version "):].strip()
+        o = o[: o.find(".")]
         o = int(o)
     except:
-        print ("clang-format-11 is needed. Aborted.")
+        print("clang-format-11 is needed. Aborted.")
         exit(1)
-    #if o < 7:
+    # if o < 7:
     #    if subprocess.call(['which', 'clang-format-7'], stdout=subprocess.PIPE) == 0:
     #        CLANG_FORMAT_BIN = 'clang-format-7'
     #    elif subprocess.call(['which', 'clang-format-8'], stdout=subprocess.PIPE) == 0:
@@ -52,8 +52,8 @@ if CLANG_FORMAT_BIN is None:
     #        print ("clang-format 7 or above is needed. Aborted.")
     #        exit(1)
     else:
-        CLANG_FORMAT_BIN = 'clang-format-11'
-            
+        CLANG_FORMAT_BIN = "clang-format-11"
+
 COLUMN_LIMIT = 80
 for line in fmt.split("\n"):
     line = line.split(":")
@@ -69,26 +69,47 @@ def custom_format(filename):
     in_define = False
     last_line = None
     out = ""
-    
+
     for line in src.split("\n"):
         if line.lstrip().startswith("#"):
-            if line[line.find("#")+1:].lstrip().startswith("define"):
+            if line[line.find("#") + 1 :].lstrip().startswith("define"):
                 in_define = True
-        
-        if "/*" in line and not line.strip().startswith("/*") and line.endswith("*/") and len(line) < (COLUMN_LIMIT-2):
+
+        if (
+            "/*" in line
+            and not line.strip().startswith("/*")
+            and line.endswith("*/")
+            and len(line) < (COLUMN_LIMIT - 2)
+        ):
             cmt_start = line.rfind("/*")
-            line = line[:cmt_start] + " " * (COLUMN_LIMIT-2 - len(line)) + line[cmt_start:]
+            line = (
+                line[:cmt_start]
+                + " " * (COLUMN_LIMIT - 2 - len(line))
+                + line[cmt_start:]
+            )
 
         define_padding = 0
         if last_line is not None and in_define and last_line.endswith("\\"):
             last_line = last_line[:-1]
-            define_padding = max(0, len(last_line[last_line.rfind("\n")+1:]))
+            define_padding = max(0, len(last_line[last_line.rfind("\n") + 1 :]))
 
-        if last_line is not None and last_line.strip().endswith("{") and line.strip() != "":
+        if (
+            last_line is not None
+            and last_line.strip().endswith("{")
+            and line.strip() != ""
+        ):
             line = (" " * define_padding + "\\" if in_define else "") + "\n" + line
-        elif last_line is not None and last_line.strip().startswith("}") and line.strip() != "":
+        elif (
+            last_line is not None
+            and last_line.strip().startswith("}")
+            and line.strip() != ""
+        ):
             line = (" " * define_padding + "\\" if in_define else "") + "\n" + line
-        elif line.strip().startswith("}") and last_line is not None and last_line.strip() != "":
+        elif (
+            line.strip().startswith("}")
+            and last_line is not None
+            and last_line.strip() != ""
+        ):
             line = (" " * define_padding + "\\" if in_define else "") + "\n" + line
 
         if not line.endswith("\\"):
@@ -97,14 +118,15 @@ def custom_format(filename):
         out += line + "\n"
         last_line = line
 
-    return (out)
+    return out
+
 
 args = sys.argv[1:]
 if len(args) == 0:
-    print ("Usage: ./format.py [-i] <filename>")
-    print ()
-    print (" The -i option, if specified, let the script to modify in-place")
-    print (" the source files. By default the results are written to stdout.")
+    print("Usage: ./format.py [-i] <filename>")
+    print()
+    print(" The -i option, if specified, let the script to modify in-place")
+    print(" the source files. By default the results are written to stdout.")
     print()
     exit(1)
 
@@ -120,4 +142,3 @@ for filename in args:
             f.write(code)
     else:
         print(code)
-
diff --git a/.github/workflows/build_aflplusplus_docker.yaml b/.github/workflows/build_aflplusplus_docker.yaml
new file mode 100644
index 00000000..be8d795d
--- /dev/null
+++ b/.github/workflows/build_aflplusplus_docker.yaml
@@ -0,0 +1,27 @@
+name: Publish Docker Images
+on:
+  push:
+    branches: [ stable ]
+    paths:
+    - Dockerfile
+  pull_request:
+    branches: [ stable ]
+    paths:
+    - Dockerfile
+jobs:
+  push_to_registry:
+    name: Push Docker images to Dockerhub
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@master
+    - name: Login to Dockerhub
+      uses: docker/login-action@v1
+      with:
+        username: ${{ secrets.DOCKER_USERNAME }}
+        password: ${{ secrets.DOCKER_TOKEN }}
+    - name: Publish aflpp to Registry
+      uses: docker/build-push-action@v2
+      with:
+        context: .
+        push: true
+        tags: aflplusplus/aflplusplus:latest
diff --git a/.github/workflows/rust_custom_mutator.yml b/.github/workflows/rust_custom_mutator.yml
new file mode 100644
index 00000000..de2b184a
--- /dev/null
+++ b/.github/workflows/rust_custom_mutator.yml
@@ -0,0 +1,30 @@
+name: Rust Custom Mutators
+
+on:
+  push:
+    branches: [ stable, dev ]
+  pull_request:
+    branches: [ stable, dev ]
+
+jobs:
+  test:
+    name: Test Rust Custom Mutator Support
+    runs-on: '${{ matrix.os }}'
+    defaults:
+      run:
+        working-directory: custom_mutators/rust
+    strategy:
+      matrix:
+        os: [ubuntu-20.04]
+    steps:
+      - uses: actions/checkout@v2
+      - name: Install Rust Toolchain
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: stable
+      - name: Check Code Compiles
+        run: cargo check
+      - name: Run General Tests
+        run: cargo test
+      - name: Run Tests for afl_internals feature flag
+        run: cd custom_mutator && cargo test --features=afl_internals
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index fa820833..3f440730 100644
--- a/.gitignore
+++ b/.gitignore
@@ -65,7 +65,6 @@ qemu_mode/qemu-*
 qemu_mode/qemuafl
 unicorn_mode/samples/*/\.test-*
 unicorn_mode/samples/*/output/
-unicorn_mode/unicornafl
 test/unittests/unit_maybe_alloc
 test/unittests/unit_preallocable
 test/unittests/unit_list
diff --git a/Dockerfile b/Dockerfile
index 8779fee5..1cb00d5d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,6 +11,8 @@ LABEL "about"="AFLplusplus docker image"
 
 ARG DEBIAN_FRONTEND=noninteractive
 
+env NO_ARCH_OPT 1
+
 RUN apt-get update && \
     apt-get -y install --no-install-suggests --no-install-recommends \
     automake \
@@ -48,16 +50,16 @@ RUN update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 0
 
 ENV LLVM_CONFIG=llvm-config-12
 ENV AFL_SKIP_CPUFREQ=1
+ENV AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1
 
-RUN git clone https://github.com/vanhauser-thc/afl-cov /afl-cov
+RUN git clone --depth=1 https://github.com/vanhauser-thc/afl-cov /afl-cov
 RUN cd /afl-cov && make install && cd ..
 
 COPY . /AFLplusplus
 WORKDIR /AFLplusplus
 
-RUN export REAL_CXX=g++-10 && export CC=gcc-10 && \
-    export CXX=g++-10 && make clean && \
-    make distrib CFLAGS="-O3 -funroll-loops -D_FORTIFY_SOURCE=2" && make install && make clean
+RUN export CC=gcc-10 && export CXX=g++-10 && make clean && \
+    make distrib && make install && make clean
 
 RUN echo 'alias joe="jupp --wordwrap"' >> ~/.bashrc
 RUN echo 'export PS1="[afl++]$PS1"' >> ~/.bashrc
diff --git a/GNUmakefile b/GNUmakefile
index b0ab1ab0..ac8fe796 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -24,7 +24,7 @@ BIN_PATH    = $(PREFIX)/bin
 HELPER_PATH = $(PREFIX)/lib/afl
 DOC_PATH    = $(PREFIX)/share/doc/afl
 MISC_PATH   = $(PREFIX)/share/afl
-MAN_PATH    = $(PREFIX)/man/man8
+MAN_PATH    = $(PREFIX)/share/man/man8
 
 PROGNAME    = afl
 VERSION     = $(shell grep '^$(HASH)define VERSION ' ../config.h | cut -d '"' -f2)
@@ -57,8 +57,6 @@ ifdef MSAN_BUILD
   override LDFLAGS += -fsanitize=memory
 endif
 
-
-
 ifeq "$(findstring android, $(shell $(CC) --version 2>/dev/null))" ""
 ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -flto=full -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
 	CFLAGS_FLTO ?= -flto=full
@@ -77,17 +75,17 @@ ifeq "$(shell echo 'int main() {return 0; }' | $(CC) -fno-move-loop-invariants -
 	SPECIAL_PERFORMANCE += -fno-move-loop-invariants -fdisable-tree-cunrolli
 endif
 
-ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
-  ifndef SOURCE_DATE_EPOCH
-    HAVE_MARCHNATIVE = 1
-    CFLAGS_OPT += -march=native
-  endif
-endif
+#ifeq "$(shell echo 'int main() {return 0; }' | $(CC) $(CFLAGS) -Werror -x c - -march=native -o .test 2>/dev/null && echo 1 || echo 0 ; rm -f .test )" "1"
+#  ifndef SOURCE_DATE_EPOCH
+#    HAVE_MARCHNATIVE = 1
+#    CFLAGS_OPT += -march=native
+#  endif
+#endif
 
 ifneq "$(shell uname)" "Darwin"
-  ifeq "$(HAVE_MARCHNATIVE)" "1"
-    SPECIAL_PERFORMANCE += -march=native
-  endif
+  #ifeq "$(HAVE_MARCHNATIVE)" "1"
+  #  SPECIAL_PERFORMANCE += -march=native
+  #endif
  # OS X does not like _FORTIFY_SOURCE=2
   ifndef DEBUG
     CFLAGS_OPT += -D_FORTIFY_SOURCE=2
@@ -519,7 +517,7 @@ code-format:
 ifndef AFL_NO_X86
 test_build: afl-cc afl-gcc afl-as afl-showmap
 	@echo "[*] Testing the CC wrapper afl-cc and its instrumentation output..."
-	@unset AFL_MAP_SIZE AFL_USE_UBSAN AFL_USE_CFISAN AFL_USE_ASAN AFL_USE_MSAN AFL_CC; ASAN_OPTIONS=detect_leaks=0 AFL_INST_RATIO=100 AFL_PATH=. ./afl-cc test-instr.c -o test-instr 2>&1 || (echo "Oops, afl-cc failed"; exit 1 )
+	@unset AFL_MAP_SIZE AFL_USE_UBSAN AFL_USE_CFISAN AFL_USE_ASAN AFL_USE_MSAN; ASAN_OPTIONS=detect_leaks=0 AFL_INST_RATIO=100 AFL_PATH=. ./afl-cc test-instr.c -o test-instr 2>&1 || (echo "Oops, afl-cc failed"; exit 1 )
 	ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr0 ./test-instr < /dev/null
 	echo 1 | ASAN_OPTIONS=detect_leaks=0 ./afl-showmap -m none -q -o .test-instr1 ./test-instr
 	@rm -f test-instr
@@ -560,6 +558,7 @@ clean:
 	-$(MAKE) -f GNUmakefile.gcc_plugin clean
 	$(MAKE) -C utils/libdislocator clean
 	$(MAKE) -C utils/libtokencap clean
+	$(MAKE) -C utils/aflpp_driver clean
 	$(MAKE) -C utils/afl_network_proxy clean
 	$(MAKE) -C utils/socket_fuzzing clean
 	$(MAKE) -C utils/argv_fuzzing clean
@@ -578,7 +577,11 @@ endif
 deepclean:	clean
 	rm -rf unicorn_mode/unicornafl
 	rm -rf qemu_mode/qemuafl
+ifeq "$(IN_REPO)" "1"
 # NEVER EVER ACTIVATE THAT!!!!! git reset --hard >/dev/null 2>&1 || true
+	git checkout unicorn_mode/unicornafl
+	git checkout qemu_mode/qemuafl
+endif
 
 .PHONY: distrib
 distrib: all
diff --git a/GNUmakefile.llvm b/GNUmakefile.llvm
index a9092579..111a847d 100644
--- a/GNUmakefile.llvm
+++ b/GNUmakefile.llvm
@@ -43,7 +43,8 @@ endif
 LLVMVER  = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/git//' | sed 's/svn//' )
 LLVM_MAJOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/\..*//' )
 LLVM_MINOR = $(shell $(LLVM_CONFIG) --version 2>/dev/null | sed 's/.*\.//' | sed 's/git//' | sed 's/svn//' | sed 's/ .*//' )
-LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-3]|^19' && echo 1 || echo 0 )
+LLVM_UNSUPPORTED = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^3\.[0-3]|^[0-2]\.' && echo 1 || echo 0 )
+LLVM_TOO_NEW = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[3-9]' && echo 1 || echo 0 )
 LLVM_NEW_API = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[0-9]' && echo 1 || echo 0 )
 LLVM_10_OK = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[1-9]|^10\.[1-9]|^10\.0.[1-9]' && echo 1 || echo 0 )
 LLVM_HAVE_LTO = $(shell $(LLVM_CONFIG) --version 2>/dev/null | egrep -q '^1[1-9]' && echo 1 || echo 0 )
@@ -58,7 +59,11 @@ ifeq "$(LLVMVER)" ""
 endif
 
 ifeq "$(LLVM_UNSUPPORTED)" "1"
-  $(warning llvm_mode only supports llvm versions 3.4 up to 12)
+  $(error llvm_mode only supports llvm from version 3.4 onwards)
+endif
+
+ifeq "$(LLVM_TOO_NEW)" "1"
+  $(warning you are using an in-development llvm version - this might break llvm_mode!)
 endif
 
 LLVM_TOO_OLD=1
@@ -502,6 +507,8 @@ install: all
 	@echo .SH LICENSE >> ./$@
 	@echo Apache License Version 2.0, January 2004 >> ./$@
 	@ln -sf afl-cc.8 ./afl-c++.8
+	@ln -sf afl-cc.8 ./afl-clang-fast.8
+	@ln -sf afl-cc.8 ./afl-clang-fast++.8
 ifneq "$(AFL_CLANG_FLTO)" ""
 ifeq "$(LLVM_LTO)" "1"
 	@ln -sf afl-cc.8 ./afl-clang-lto.8
diff --git a/README.md b/README.md
index d1ae05d3..084971f3 100644
--- a/README.md
+++ b/README.md
@@ -2,11 +2,9 @@
 
   <img align="right" src="https://raw.githubusercontent.com/andreafioraldi/AFLplusplus-website/master/static/logo_256x256.png" alt="AFL++ Logo">
 
-  ![Travis State](https://api.travis-ci.com/AFLplusplus/AFLplusplus.svg?branch=stable)
+  Release Version: [3.12c](https://github.com/AFLplusplus/AFLplusplus/releases)
 
-  Release Version: [3.00c](https://github.com/AFLplusplus/AFLplusplus/releases)
-
-  Github Version: 3.01a
+  Github Version: 3.13a
 
   Repository: [https://github.com/AFLplusplus/AFLplusplus](https://github.com/AFLplusplus/AFLplusplus)
 
@@ -23,11 +21,18 @@
   mutations, more and better instrumentation, custom module support, etc.
 
   If you want to use afl++ for your academic work, check the [papers page](https://aflplus.plus/papers/)
-  on the website.
+  on the website. To cite our work, look at the [Cite](#cite) section.
+  For comparisons use the fuzzbench `aflplusplus` setup, or use `afl-clang-fast`
+  with `AFL_LLVM_CMPLOG=1`.
+
+## Major changes in afl++ 3.00 + 3.10
 
-## Major changes in afl++ 3.0
+With afl++ 3.10 we introduced the following changes from previous behaviours:
+  * The '+' feature of the '-t' option now means to  auto-calculate the timeout
+    with the value given being the maximum timeout. The original meaning of
+    "skipping timeouts instead of abort" is now inherent to the -t option.
 
-With afl++ 3.0 we introduced changes that break some previous afl and afl++
+With afl++ 3.00 we introduced changes that break some previous afl and afl++
 behaviours and defaults:
 
   * There are no llvm_mode and gcc_plugin subdirectories anymore and there is
@@ -170,7 +175,13 @@ If you want to build afl++ yourself you have many options.
 The easiest choice is to build and install everything:
 
 ```shell
-sudo apt install build-essential python3-dev automake flex bison libglib2.0-dev libpixman-1-dev clang python3-setuptools clang llvm llvm-dev libstdc++-dev
+sudo apt-get update
+sudo apt-get install -y build-essential python3-dev automake git flex bison libglib2.0-dev libpixman-1-dev python3-setuptools
+# try to install llvm 11 and install the distro default if that fails
+sudo apt-get install -y lld-11 llvm-11 llvm-11-dev clang-11 || sudo apt-get install -y lld llvm llvm-dev clang 
+sudo apt-get install -y gcc-$(gcc --version|head -n1|sed 's/.* //'|sed 's/\..*//')-plugin-dev libstdc++-$(gcc --version|head -n1|sed 's/.* //'|sed 's/\..*//')-dev
+git clone https://github.com/AFLplusplus/AFLplusplus && cd AFLplusplus
+cd AFLplusplus
 make distrib
 sudo make install
 ```
@@ -221,7 +232,7 @@ These build options exist:
 * AFL_NO_X86 - if compiling on non-intel/amd platforms
 * LLVM_CONFIG - if your distro doesn't use the standard name for llvm-config (e.g. Debian)
 
-e.g.: make ASAN_BUILD=1
+e.g.: `make ASAN_BUILD=1`
 
 ## Good examples and writeups
 
@@ -233,6 +244,7 @@ Here are some good writeups to show how to effectively use AFL++:
  * [https://securitylab.github.com/research/fuzzing-software-2](https://securitylab.github.com/research/fuzzing-software-2)
  * [https://securitylab.github.com/research/fuzzing-sockets-FTP](https://securitylab.github.com/research/fuzzing-sockets-FTP)
  * [https://securitylab.github.com/research/fuzzing-sockets-FreeRDP](https://securitylab.github.com/research/fuzzing-sockets-FreeRDP)
+ * [https://securitylab.github.com/research/fuzzing-apache-1](https://securitylab.github.com/research/fuzzing-apache-1)
 
 If you are interested in fuzzing structured data (where you define what the
 structure is), these links have you covered:
@@ -298,7 +310,7 @@ Clickable README links for the chosen compiler:
   * [LTO mode - afl-clang-lto](instrumentation/README.lto.md)
   * [LLVM mode - afl-clang-fast](instrumentation/README.llvm.md)
   * [GCC_PLUGIN mode - afl-gcc-fast](instrumentation/README.gcc_plugin.md)
-  * GCC/CLANG mode (afl-gcc/afl-clang) have no README as they have no own features
+  * GCC/CLANG modes (afl-gcc/afl-clang) have no README as they have no own features
 
 You can select the mode for the afl-cc compiler by:
   1. use a symlink to afl-cc: afl-gcc, afl-g++, afl-clang, afl-clang++,
@@ -393,10 +405,19 @@ How to do this is described below.
 
 Then build the target. (Usually with `make`)
 
-**NOTE**: sometimes configure and build systems are fickle and do not like
-stderr output (and think this means a test failure) - which is something
-afl++ like to do to show statistics. It is recommended to disable them via
-`export AFL_QUIET=1`.
+**NOTES**
+
+1. sometimes configure and build systems are fickle and do not like
+   stderr output (and think this means a test failure) - which is something
+   afl++ likes to do to show statistics. It is recommended to disable them via
+   `export AFL_QUIET=1`.
+
+2. sometimes configure and build systems error on warnings - these should be
+   disabled (e.g. `--disable-werror` for some configure scripts).
+
+3. in case the configure/build system complains about afl++'s compiler and
+   aborts then set `export AFL_NOOPT=1` which will then just behave like the
+   real compiler. This option has to be unset again before building the target!
 
 ##### configure
 
@@ -478,8 +499,9 @@ default.
 #### c) Minimizing all corpus files
 
 The shorter the input files that still traverse the same path
-within the target, the better the fuzzing will be. This is done with `afl-tmin`
-however it is a long process as this has to be done for every file:
+within the target, the better the fuzzing will be. This minimization
+is done with `afl-tmin` however it is a long process as this has to
+be done for every file:
 
 ```
 mkdir input
@@ -530,12 +552,10 @@ If you need to stop and re-start the fuzzing, use the same command line options
 mutation mode!) and switch the input directory with a dash (`-`):
 `afl-fuzz -i - -o output -- bin/target -d @@`
 
-Note that afl-fuzz enforces memory limits to prevent the system to run out
-of memory. By default this is 50MB for a process. If this is too little for
-the target (which you can usually see by afl-fuzz bailing with the message
-that it could not connect to the forkserver), then you can increase this
-with the `-m` option, the value is in MB. To disable any memory limits
-(beware!) set `-m none` - which is usually required for ASAN compiled targets.
+Memory limits are not enforced by afl-fuzz by default and the system may run
+out of memory. You can decrease the memory with the `-m` option, the value is
+in MB. If this is too small for the target, you can usually see this by
+afl-fuzz bailing with the message that it could not connect to the forkserver.
 
 Adding a dictionary is helpful. See the directory [dictionaries/](dictionaries/) if
 something is already included for your data format, and tell afl-fuzz to load
@@ -548,7 +568,9 @@ afl-fuzz has a variety of options that help to workaround target quirks like
 specific locations for the input file (`-f`), not performing deterministic
 fuzzing (`-d`) and many more. Check out `afl-fuzz -h`.
 
-afl-fuzz never stops fuzzing. To terminate afl++ simply press Control-C.
+By default afl-fuzz never stops fuzzing. To terminate afl++ simply press Control-C
+or send a signal SIGINT. You can limit the number of executions or approximate runtime
+in seconds with options also.
 
 When you start afl-fuzz you will see a user interface that shows what the status
 is:
@@ -691,7 +713,7 @@ Note that there are also a lot of tools out there that help fuzzing with afl++
 (some might be deprecated or unsupported):
 
 Speeding up fuzzing:
- * [libfiowrapper](https://github.com/marekzmyslowski/libfiowrapper) - if you cannot use stdin or in-memory fuzzing, this emulates file reading, recommended.
+ * [libfiowrapper](https://github.com/marekzmyslowski/libfiowrapper) - if the function you want to fuzz requires loading a file, this allows using the shared memory testcase feature :-) - recommended.
 
 Minimization of test cases:
  * [afl-pytmin](https://github.com/ilsani/afl-pytmin) - a wrapper for afl-tmin that tries to speed up the process of minimization of a single test case by using many CPU cores.
@@ -722,11 +744,57 @@ Crash processing
  * [AFLize](https://github.com/d33tah/aflize) - a tool that automatically generates builds of debian packages suitable for AFL.
  * [afl-fid](https://github.com/FoRTE-Research/afl-fid) - a set of tools for working with input data.
 
+## CI Fuzzing
+
+Some notes on CI Fuzzing - this fuzzing is different to normal fuzzing
+campaigns as these are much shorter runnings.
+
+1. Always:
+  * LTO has a much longer compile time which is diametrical to short fuzzing - 
+    hence use afl-clang-fast instead.
+  * If you compile with CMPLOG then you can save fuzzing time and reuse that
+    compiled target for both the -c option and the main fuzz target.
+    This will impact the speed by ~15% though.
+  * `AFL_FAST_CAL` - Enable fast calibration, this halfs the time the saturated
+     corpus needs to be loaded.
+  * `AFL_CMPLOG_ONLY_NEW` - only perform cmplog on new found paths, not the
+    initial corpus as this very likely has been done for them already.
+  * Keep the generated corpus, use afl-cmin and reuse it everytime!
+
+2. Additionally randomize the afl++ compilation options, e.g.
+  * 40% for `AFL_LLVM_CMPLOG`
+  * 10% for `AFL_LLVM_LAF_ALL`
+
+3. Also randomize the afl-fuzz runtime options, e.g.
+  * 60% for `AFL_DISABLE_TRIM`
+  * 50% use a dictionary generated by `AFL_LLVM_DICT2FILE`
+  * 50% use MOpt (`-L 0`)
+  * 40% for `AFL_EXPAND_HAVOC_NOW`
+  * 30% for old queue processing (`-Z`)
+  * for CMPLOG targets, 60% for `-l 2`, 40% for `-l 3`
+
+4. Do *not* run any `-M` modes, just running `-S` modes is better for CI fuzzing.
+   `-M` enables deterministic fuzzing, old queue handling etc. which is good for
+   a fuzzing campaign but not good for short CI runs.
+
+How this can look like can e.g. be seen at afl++'s setup in Google's [oss-fuzz](https://github.com/google/oss-fuzz/blob/4bb61df7905c6005000f5766e966e6fe30ab4559/infra/base-images/base-builder/compile_afl#L69).
+
 ## Fuzzing binary-only targets
 
 When source code is *NOT* available, afl++ offers various support for fast,
 on-the-fly instrumentation of black-box binaries. 
 
+If you do not have to use Unicorn the following setup is recommended:
+  * run 1 afl-fuzz -Q instance with CMPLOG (`-c 0` + `AFL_COMPCOV_LEVEL=2`)
+  * run 1 afl-fuzz -Q instance with QASAN  (`AFL_USE_QASAN=1`)
+  * run 1 afl-fuzz -Q instance with LAF (``AFL_PRELOAD=libcmpcov.so` + `AFL_COMPCOV_LEVEL=2`)
+
+Then run as many instances as you have cores left with either -Q mode or - better -
+use a binary rewriter like afl-dyninst, retrowrite, zipr, fibre, etc.
+
+For Qemu mode, check out the persistent mode and snapshot features, they give
+a huge speed improvement!  
+
 ### QEMU
 
 For linux programs and its libraries this is accomplished with a version of
@@ -737,7 +805,8 @@ feature by doing:
 cd qemu_mode
 ./build_qemu_support.sh
 ```
-For additional instructions and caveats, see [qemu_mode/README.md](qemu_mode/README.md).
+For additional instructions and caveats, see [qemu_mode/README.md](qemu_mode/README.md) -
+check out the snapshot feature! :-)
 If possible you should use the persistent mode, see [qemu_mode/README.persistent.md](qemu_mode/README.persistent.md).
 The mode is approximately 2-5x slower than compile-time instrumentation, and is
 less conducive to parallelization.
@@ -745,6 +814,8 @@ less conducive to parallelization.
 If [afl-dyninst](https://github.com/vanhauser-thc/afl-dyninst) works for
 your binary, then you can use afl-fuzz normally and it will have twice
 the speed compared to qemu_mode (but slower than persistent mode).
+Note that several other binary rewriters exist, all with their advantages and
+caveats.
 
 ### Unicorn
 
@@ -798,34 +869,6 @@ All these methods are extremely promising in experimental settings, but tend
 to suffer from reliability and performance problems in practical uses - and
 currently do not offer a viable alternative to "dumb" fuzzing techniques.
 
-## CI Fuzzing
-
-Some notes on CI Fuzzing - this fuzzing is different to normal fuzzing
-campaigns as these are much shorter runnings.
-
-1. Always:
-  * LTO has a much longer compile time which is diametrical to short fuzzing - 
-    hence use afl-clang-fast instead
-  * `AFL_FAST_CAL` - Enable fast calibration, halfs the time the saturated
-     corpus is loaded
-  * `AFL_CMPLOG_ONLY_NEW` - only perform cmplog on new found paths, not the
-    initial corpus as it has been done there already
-  * Keep the generated corpus, use afl-cmin and reuse it everytime!
-
-2. Additionally randomize the afl++ compilation options, e.g.
-  * 40% for `AFL_LLVM_CMPLOG`
-  * 10% for `AFL_LLVM_LAF_ALL`
-
-3. Also randomize the afl-fuzz runtime options, e.g.
-  * 60% for `AFL_DISABLE_TRIM`
-  * 50% use a dictionary generated by `AFL_LLVM_DICT2FILE`
-  * 50% use MOpt (`-L 0`)
-  * 40% for `AFL_EXPAND_HAVOC_NOW`
-  * 30% for old queue processing (`-Z`)
-  * for CMPLOG targets, 60% for `-l 2`, 40% for `-l 3`
-
-4. Do *not* run any `-M` modes, just running `-S` modes are better for CI fuzzing.
-
 ## Background: The afl-fuzz approach
 
 American Fuzzy Lop is a brute-force fuzzer coupled with an exceedingly simple
@@ -1141,6 +1184,7 @@ without feedback, bug reports, or patches from:
   Josephine Calliotte                   Konrad Welc
   Thomas Rooijakkers                    David Carlier
   Ruben ten Hove                        Joey Jiao
+  fuzzah
 ```
 
 Thank you!
@@ -1148,8 +1192,18 @@ Thank you!
 
 ## Cite
 
+If you use AFLpluplus to compare to your work, please use either `afl-clang-lto`
+or `afl-clang-fast` with `AFL_LLVM_CMPLOG=1` for building targets and
+`afl-fuzz` with the command line option `-l 2` for fuzzing.
+The most effective setup is the `aflplusplus` default configuration on Google's [fuzzbench](https://github.com/google/fuzzbench/tree/master/fuzzers/aflplusplus).
+
 If you use AFLplusplus in scientific work, consider citing [our paper](https://www.usenix.org/conference/woot20/presentation/fioraldi) presented at WOOT'20:
-```
+
++ Andrea Fioraldi, Dominik Maier, Heiko Eißfeldt, and Marc Heuse. “AFL++: Combining incremental steps of fuzzing research”. In 14th USENIX Workshop on Offensive Technologies (WOOT 20). USENIX Association, Aug. 2020.
+
+Bibtex:
+
+```bibtex
 @inproceedings {AFLplusplus-Woot20,
 	author = {Andrea Fioraldi and Dominik Maier and Heiko Ei{\ss}feldt and Marc Heuse},
 	title = {{AFL++}: Combining Incremental Steps of Fuzzing Research},
diff --git a/TODO.md b/TODO.md
index 890a481a..b8a091ff 100644
--- a/TODO.md
+++ b/TODO.md
@@ -2,20 +2,21 @@
 
 ## Roadmap 3.00+
 
- - AFL_MAP_SIZE for qemu_mode and unicorn_mode
+ - Update afl->pending_not_fuzzed for MOpt
  - CPU affinity for many cores? There seems to be an issue > 96 cores
  - afl-plot to support multiple plot_data
  - afl_custom_fuzz_splice_optin()
+ - afl_custom_splice()
  - intel-pt tracer
+ - better autodetection of shifting runtime timeout values
+ - cmplog: use colorization input for havoc?
+ - cmplog: too much tainted bytes, directly add to dict and skip?
+
 
 ## Further down the road
 
 afl-fuzz:
  - setting min_len/max_len/start_offset/end_offset limits for mutation output
- - add __sanitizer_cov_trace_cmp* support via shmem
-
-llvm_mode:
- - add __sanitizer_cov_trace_cmp* support
 
 qemu_mode:
  - non colliding instrumentation
@@ -26,9 +27,12 @@ qemu_mode:
  - add/implement AFL_QEMU_INST_LIBLIST and AFL_QEMU_NOINST_PROGRAM
  - add/implement AFL_QEMU_INST_REGIONS as a list of _START/_END addresses
 
+
 ## Ideas
 
  - LTO/sancov: write current edge to prev_loc and use that information when
    using cmplog or __sanitizer_cov_trace_cmp*. maybe we can deduct by follow
    up edge numbers that both following cmp paths have been found and then
    disable working on this edge id -> cmplog_intelligence branch
+ - use cmplog colorization taint result for havoc locations?
+
diff --git a/afl-cmin b/afl-cmin
index 31d7ddad..a1fc6f21 100755
--- a/afl-cmin
+++ b/afl-cmin
@@ -243,7 +243,7 @@ BEGIN {
   if (!stdin_file) {
     found_atat = 0
     for (prog_args_ind in prog_args) {
-      if ("@@" == prog_args[prog_args_ind]) {
+      if (match(prog_args[prog_args_ind], "@@") != 0) {
         found_atat = 1
         break
       }
@@ -287,6 +287,10 @@ BEGIN {
     exit 1
   }
 
+  if (0 == system( "test -d "in_dir"/default" )) {
+    in_dir = in_dir "/default"
+  }
+
   if (0 == system( "test -d "in_dir"/queue" )) {
     in_dir = in_dir "/queue"
   }
@@ -342,8 +346,10 @@ BEGIN {
   } else {
     stat_format = "-f '%z %N'" # *BSD, MacOS
   }
-  cmdline = "cd "in_dir" && find . \\( ! -name . -a -type d -prune \\) -o -type f -exec stat "stat_format" \\{\\} \\; | sort -k1n -k2r"
-  cmdline = "ls "in_dir" | (cd "in_dir" && xargs stat "stat_format" 2>/dev/null) | sort -k1n -k2r"
+  cmdline = "(cd "in_dir" && find . \\( ! -name . -a -type d -prune \\) -o -type f -exec stat "stat_format" \\{\\} + | sort -k1n -k2r)"
+  #cmdline = "ls "in_dir" | (cd "in_dir" && xargs stat "stat_format" 2>/dev/null) | sort -k1n -k2r"
+  #cmdline = "(cd "in_dir" && stat "stat_format" *) | sort -k1n -k2r"
+  #cmdline = "(cd "in_dir" && ls | xargs stat "stat_format" ) | sort -k1n -k2r"
   while (cmdline | getline) {
     sub(/^[0-9]+ (\.\/)?/,"",$0)
     infilesSmallToBig[i++] = $0
@@ -354,12 +360,12 @@ BEGIN {
   
   # Make sure that we're not dealing with a directory.
 
-  if (0 == system("test -d "in_dir"/"first_file)) {
+  if (0 == system("test -d ""\""in_dir"/"first_file"\"")) {
     print "[-] Error: The input directory is empty or contains subdirectories - please fix." > "/dev/stderr"
     exit 1
   }
 
-  if (0 == system("ln "in_dir"/"first_file" "trace_dir"/.link_test")) {
+  if (0 == system("ln \""in_dir"/"first_file"\" "trace_dir"/.link_test")) {
     cp_tool = "ln"
   } else {
     cp_tool = "cp"
@@ -374,7 +380,7 @@ BEGIN {
     if (!stdin_file) {
       system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -- \""target_bin"\" "prog_args_string" <\""in_dir"/"first_file"\"")
     } else {
-      system("cp "in_dir"/"first_file" "stdin_file)
+      system("cp \""in_dir"/"first_file"\" "stdin_file)
       system( "AFL_CMIN_ALLOW_ANY=1 "AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"/.run_test\" -Z "extra_par" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null")
     }
 
@@ -411,8 +417,8 @@ BEGIN {
     retval = system( AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string)
   } else {
     print "    Processing "in_count" files (forkserver mode)..."
-#    print AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string" </dev/null"
-    retval = system( AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -- \""target_bin"\" "prog_args_string" </dev/null")
+#    print AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null"
+    retval = system( AFL_CMIN_CRASHES_ONLY"\""showmap"\" -m "mem_limit" -t "timeout" -o \""trace_dir"\" -Z "extra_par" -i \""in_dir"\" -A \""stdin_file"\" -- \""target_bin"\" "prog_args_string" </dev/null")
   }
 
   if (retval && !AFL_CMIN_CRASHES_ONLY) {
@@ -496,7 +502,7 @@ BEGIN {
 
     # copy file unless already done
     if (! (fn in file_already_copied)) {
-      system(cp_tool" "in_dir"/"fn" "out_dir"/"fn)
+      system(cp_tool" \""in_dir"/"fn"\" \""out_dir"/"fn"\"")
       file_already_copied[fn] = ""
       ++out_count
       #printf "tuple nr %d (%d cnt=%d) -> %s\n",tcnt,key,key_count[key],fn > trace_dir"/.log"
diff --git a/afl-cmin.bash b/afl-cmin.bash
index dae21939..5b2c3894 100755
--- a/afl-cmin.bash
+++ b/afl-cmin.bash
@@ -223,6 +223,7 @@ if [ ! -d "$IN_DIR" ]; then
   exit 1
 fi
 
+test -d "$IN_DIR/default" && IN_DIR="$IN_DIR/default"
 test -d "$IN_DIR/queue" && IN_DIR="$IN_DIR/queue"
 
 find "$OUT_DIR" -name 'id[:_]*' -maxdepth 1 -exec rm -- {} \; 2>/dev/null
diff --git a/afl-plot b/afl-plot
index 0faed0ec..ba100d3e 100755
--- a/afl-plot
+++ b/afl-plot
@@ -99,7 +99,7 @@ if [ ! -d "$outputdir" ]; then
 
 fi
 
-rm -f "$outputdir/high_freq.png" "$outputdir/low_freq.png" "$outputdir/exec_speed.png"
+rm -f "$outputdir/high_freq.png" "$outputdir/low_freq.png" "$outputdir/exec_speed.png" "$outputdir/edges.png"
 mv -f "$outputdir/index.html" "$outputdir/index.html.orig" 2>/dev/null
 
 echo "[*] Generating plots..."
@@ -152,6 +152,12 @@ set ytics auto
 plot '$inputdir/plot_data' using 1:11 with filledcurve x1 title '' linecolor rgb '#0090ff' fillstyle transparent solid 0.2 noborder, \\
      '$inputdir/plot_data' using 1:11 with lines title '    execs/sec' linecolor rgb '#0090ff' linewidth 3 smooth bezier;
 
+set terminal png truecolor enhanced size 1000,300 butt
+set output '$outputdir/edges.png'
+
+set ytics auto
+plot '$inputdir/plot_data' using 1:13 with lines title '        edges' linecolor rgb '#0090ff' linewidth 3
+
 _EOF_
 
 ) | gnuplot 
@@ -172,6 +178,7 @@ cat >"$outputdir/index.html" <<_EOF_
 <tr><td><b>Generated on:</b></td><td>`date`</td></tr>
 </table>
 <p>
+<img src="edges.png" width=1000 height=300>
 <img src="high_freq.png" width=1000 height=300><p>
 <img src="low_freq.png" width=1000 height=200><p>
 <img src="exec_speed.png" width=1000 height=200>
@@ -183,7 +190,7 @@ _EOF_
 # sensitive, this seems like a reasonable trade-off.
 
 chmod 755 "$outputdir"
-chmod 644 "$outputdir/high_freq.png" "$outputdir/low_freq.png" "$outputdir/exec_speed.png" "$outputdir/index.html"
+chmod 644 "$outputdir/high_freq.png" "$outputdir/low_freq.png" "$outputdir/exec_speed.png" "$outputdir/edges.png" "$outputdir/index.html"
 
 echo "[+] All done - enjoy your charts!"
 
diff --git a/afl-system-config b/afl-system-config
index d5e5ceae..ae37a062 100755
--- a/afl-system-config
+++ b/afl-system-config
@@ -39,7 +39,7 @@ if [ "$PLATFORM" = "Linux" ] ; then
   echo Settings applied.
   dmesg | egrep -q 'nospectre_v2|spectre_v2=off' || {
     echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this:
-    echo '  /etc/default/grub:GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=off l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off"'
+    echo '  /etc/default/grub:GRUB_CMDLINE_LINUX_DEFAULT="ibpb=off ibrs=off kpti=0 l1tf=off mds=off mitigations=off no_stf_barrier noibpb noibrs nopcid nopti nospec_store_bypass_disable nospectre_v1 nospectre_v2 pcid=off pti=off spec_store_bypass_disable=off spectre_v2=off stf_barrier=off srbds=off noexec=off noexec32=off tsx=on tsx_async_abort=off arm64.nopauth audit=0 hardened_usercopy=off ssbd=force-off"'
   }
   DONE=1
 fi
@@ -49,6 +49,12 @@ if [ "$PLATFORM" = "FreeBSD" ] ; then
   sysctl kern.elf64.aslr.enable=0
 } > /dev/null
   echo Settings applied.
+  cat <<EOF
+In order to suppress core file generation during fuzzing it is recommended to set
+me:\\
+	:coredumpsize=0:
+in the ~/.login_conf file for the user used for fuzzing.
+EOF
   echo It is recommended to boot the kernel with lots of security off - if you are running a machine that is in a secured network - so set this:
   echo '  sysctl hw.ibrs_disable=1'
   echo 'Setting kern.pmap.pg_ps_enabled=0 into /boot/loader.conf might be helpful too.'
@@ -60,8 +66,14 @@ if [ "$PLATFORM" = "OpenBSD" ] ; then
   DONE=1
 fi
 if [ "$PLATFORM" = "DragonFly" ] ; then
-  echo
-  echo 'System security features cannot be disabled on DragonFly.'
+  #/sbin/sysctl kern.corefile=/dev/null
+  #echo Settings applied.
+  cat <<EOF
+In order to suppress core file generation during fuzzing it is recommended to set
+me:\\
+	:coredumpsize=0:
+in the ~/.login_conf file for the user used for fuzzing.
+EOF
   DONE=1
 fi
 if [ "$PLATFORM" = "NetBSD" ] ; then
@@ -88,7 +100,7 @@ fi
 if [ "$PLATFORM" = "Haiku" ] ; then
   SETTINGS=~/config/settings/system/debug_server/settings
   [ -r ${SETTINGS} ] && grep -qE "default_action\s+kill" ${SETTINGS} && { echo "Nothing to do"; } || { \
-    echo We change the debug_server default_action from user to silenty kill; \
+    echo We change the debug_server default_action from user to silently kill; \
     [ ! -r ${SETTINGS} ] && echo "default_action kill" >${SETTINGS} || { mv ${SETTINGS} s.tmp; sed -e "s/default_action\s\s*user/default_action kill/" s.tmp > ${SETTINGS}; rm s.tmp; }; \
     echo Settings applied.; \
   }
diff --git a/custom_mutators/honggfuzz/common.h b/custom_mutators/honggfuzz/common.h
deleted file mode 100644
index e69de29b..00000000
--- a/custom_mutators/honggfuzz/common.h
+++ /dev/null
diff --git a/custom_mutators/honggfuzz/honggfuzz.h b/custom_mutators/honggfuzz/honggfuzz.h
index 9d07fdf4..c80cdd87 100644
--- a/custom_mutators/honggfuzz/honggfuzz.h
+++ b/custom_mutators/honggfuzz/honggfuzz.h
@@ -39,7 +39,7 @@
 #include "libhfcommon/util.h"
 
 #define PROG_NAME    "honggfuzz"
-#define PROG_VERSION "2.3"
+#define PROG_VERSION "2.4"
 
 /* Name of the template which will be replaced with the proper name of the file */
 #define _HF_FILE_PLACEHOLDER "___FILE___"
@@ -208,6 +208,7 @@ typedef struct {
         const char* crashDir;
         const char* covDirNew;
         bool        saveUnique;
+        bool        saveSmaller;
         size_t      dynfileqMaxSz;
         size_t      dynfileqCnt;
         dynfile_t*  dynfileqCurrent;
@@ -279,9 +280,9 @@ typedef struct {
         cmpfeedback_t*  cmpFeedbackMap;
         int             cmpFeedbackFd;
         bool            cmpFeedback;
-        const char*     blacklistFile;
-        uint64_t*       blacklist;
-        size_t          blacklistCnt;
+        const char*     blocklistFile;
+        uint64_t*       blocklist;
+        size_t          blocklistCnt;
         bool            skipFeedbackOnTimeout;
         uint64_t        maxCov[4];
         dynFileMethod_t dynFileMethod;
diff --git a/custom_mutators/honggfuzz/input.h b/custom_mutators/honggfuzz/input.h
index 7b0c55ae..09712f54 100644
--- a/custom_mutators/honggfuzz/input.h
+++ b/custom_mutators/honggfuzz/input.h
@@ -77,11 +77,11 @@ static inline uint64_t util_rndGet(uint64_t min, uint64_t max) {
 }
 static inline uint64_t util_rnd64() { return rand_below(afl_struct, 1 << 30); }
 
-static inline size_t input_getRandomInputAsBuf(run_t *run, const uint8_t **buf) {
-  *buf = queue_input;
+static inline const uint8_t* input_getRandomInputAsBuf(run_t* run, size_t* len) {
+  *len = queue_input_size;
   run->dynfile->data = queue_input;
   run->dynfile->size = queue_input_size;
-  return queue_input_size;
+  return queue_input;
 }
 static inline void input_setSize(run_t* run, size_t sz) {
   run->dynfile->size = sz;
diff --git a/custom_mutators/honggfuzz/libhfcommon b/custom_mutators/honggfuzz/libhfcommon
deleted file mode 120000
index 945c9b46..00000000
--- a/custom_mutators/honggfuzz/libhfcommon
+++ /dev/null
@@ -1 +0,0 @@
-.
\ No newline at end of file
diff --git a/custom_mutators/honggfuzz/libhfcommon/common.h b/custom_mutators/honggfuzz/libhfcommon/common.h
new file mode 100644
index 00000000..c8cf1329
--- /dev/null
+++ b/custom_mutators/honggfuzz/libhfcommon/common.h
@@ -0,0 +1,3 @@
+#ifndef LOG_E
+  #define LOG_E LOG_F
+#endif
diff --git a/custom_mutators/honggfuzz/log.h b/custom_mutators/honggfuzz/libhfcommon/log.h
index 51e19654..51e19654 120000
--- a/custom_mutators/honggfuzz/log.h
+++ b/custom_mutators/honggfuzz/libhfcommon/log.h
diff --git a/custom_mutators/honggfuzz/util.h b/custom_mutators/honggfuzz/libhfcommon/util.h
index 51e19654..51e19654 120000
--- a/custom_mutators/honggfuzz/util.h
+++ b/custom_mutators/honggfuzz/libhfcommon/util.h
diff --git a/custom_mutators/honggfuzz/mangle.c b/custom_mutators/honggfuzz/mangle.c
index 9c3d1ed4..637d428d 100644
--- a/custom_mutators/honggfuzz/mangle.c
+++ b/custom_mutators/honggfuzz/mangle.c
@@ -39,252 +39,208 @@
 #include "libhfcommon/log.h"
 #include "libhfcommon/util.h"
 
-static inline size_t mangle_LenLeft(run_t *run, size_t off) {
-
-  if (off >= run->dynfile->size) {
-
-    LOG_F("Offset is too large: off:%zu >= len:%zu", off, run->dynfile->size);
-
-  }
-
-  return (run->dynfile->size - off - 1);
-
+static inline size_t mangle_LenLeft(run_t* run, size_t off) {
+    if (off >= run->dynfile->size) {
+        LOG_F("Offset is too large: off:%zu >= len:%zu", off, run->dynfile->size);
+    }
+    return (run->dynfile->size - off - 1);
 }
 
-/* Get a random value <1:max>, but prefer smaller ones - up to 4KiB */
+/*
+ * Get a random value <1:max>, but prefer smaller ones
+ * Based on an idea by https://twitter.com/gamozolabs
+ */
 static inline size_t mangle_getLen(size_t max) {
+    if (max > _HF_INPUT_MAX_SIZE) {
+        LOG_F("max (%zu) > _HF_INPUT_MAX_SIZE (%zu)", max, (size_t)_HF_INPUT_MAX_SIZE);
+    }
+    if (max == 0) {
+        LOG_F("max == 0");
+    }
+    if (max == 1) {
+        return 1;
+    }
 
-  if (max > _HF_INPUT_MAX_SIZE) {
-
-    LOG_F("max (%zu) > _HF_INPUT_MAX_SIZE (%zu)", max,
-          (size_t)_HF_INPUT_MAX_SIZE);
-
-  }
-
-  if (max == 0) { LOG_F("max == 0"); }
-  if (max == 1) { return 1; }
-
-  /* Give 50% chance the the uniform distribution */
-  switch (util_rndGet(0, 9)) {
-
-    case 0:
-      return (size_t)util_rndGet(1, HF_MIN(16, max));
-    case 1:
-      return (size_t)util_rndGet(1, HF_MIN(64, max));
-    case 2:
-      return (size_t)util_rndGet(1, HF_MIN(256, max));
-    case 3:
-      return (size_t)util_rndGet(1, HF_MIN(1024, max));
-    case 4:
-      return (size_t)util_rndGet(1, HF_MIN(4096, max));
-    default:
-      break;
-
-  }
-
-  return (size_t)util_rndGet(1, max);
+    /* Give 50% chance the the uniform distribution */
+    if (util_rnd64() & 1) {
+        return (size_t)util_rndGet(1, max);
+    }
 
+    /* effectively exprand() */
+    return (size_t)util_rndGet(1, util_rndGet(1, max));
 }
 
 /* Prefer smaller values here, so use mangle_getLen() */
-static inline size_t mangle_getOffSet(run_t *run) {
-
-  return mangle_getLen(run->dynfile->size) - 1;
-
+static inline size_t mangle_getOffSet(run_t* run) {
+    return mangle_getLen(run->dynfile->size) - 1;
 }
 
 /* Offset which can be equal to the file size */
-static inline size_t mangle_getOffSetPlus1(run_t *run) {
-
-  size_t reqlen = HF_MIN(run->dynfile->size + 1, _HF_INPUT_MAX_SIZE);
-  return mangle_getLen(reqlen) - 1;
-
+static inline size_t mangle_getOffSetPlus1(run_t* run) {
+    size_t reqlen = HF_MIN(run->dynfile->size + 1, _HF_INPUT_MAX_SIZE);
+    return mangle_getLen(reqlen) - 1;
 }
 
-static inline void mangle_Move(run_t *run, size_t off_from, size_t off_to,
-                               size_t len) {
-
-  if (off_from >= run->dynfile->size) { return; }
-  if (off_to >= run->dynfile->size) { return; }
-  if (off_from == off_to) { return; }
-
-  size_t len_from = run->dynfile->size - off_from;
-  len = HF_MIN(len, len_from);
+static inline void mangle_Move(run_t* run, size_t off_from, size_t off_to, size_t len) {
+    if (off_from >= run->dynfile->size) {
+        return;
+    }
+    if (off_to >= run->dynfile->size) {
+        return;
+    }
+    if (off_from == off_to) {
+        return;
+    }
 
-  size_t len_to = run->dynfile->size - off_to;
-  len = HF_MIN(len, len_to);
+    size_t len_from = run->dynfile->size - off_from;
+    len             = HF_MIN(len, len_from);
 
-  memmove(&run->dynfile->data[off_to], &run->dynfile->data[off_from], len);
+    size_t len_to = run->dynfile->size - off_to;
+    len           = HF_MIN(len, len_to);
 
+    memmove(&run->dynfile->data[off_to], &run->dynfile->data[off_from], len);
 }
 
-static inline void mangle_Overwrite(run_t *run, size_t off, const uint8_t *src,
-                                    size_t len, bool printable) {
-
-  if (len == 0) { return; }
-  size_t maxToCopy = run->dynfile->size - off;
-  if (len > maxToCopy) { len = maxToCopy; }
-
-  memmove(&run->dynfile->data[off], src, len);
-  if (printable) { util_turnToPrintable(&run->dynfile->data[off], len); }
+static inline void mangle_Overwrite(
+    run_t* run, size_t off, const uint8_t* src, size_t len, bool printable) {
+    if (len == 0) {
+        return;
+    }
+    size_t maxToCopy = run->dynfile->size - off;
+    if (len > maxToCopy) {
+        len = maxToCopy;
+    }
 
+    memmove(&run->dynfile->data[off], src, len);
+    if (printable) {
+        util_turnToPrintable(&run->dynfile->data[off], len);
+    }
 }
 
-static inline size_t mangle_Inflate(run_t *run, size_t off, size_t len,
-                                    bool printable) {
-
-  if (run->dynfile->size >= run->global->mutate.maxInputSz) { return 0; }
-  if (len > (run->global->mutate.maxInputSz - run->dynfile->size)) {
-
-    len = run->global->mutate.maxInputSz - run->dynfile->size;
-
-  }
-
-  input_setSize(run, run->dynfile->size + len);
-  mangle_Move(run, off, off + len, run->dynfile->size);
-  if (printable) { memset(&run->dynfile->data[off], ' ', len); }
+static inline size_t mangle_Inflate(run_t* run, size_t off, size_t len, bool printable) {
+    if (run->dynfile->size >= run->global->mutate.maxInputSz) {
+        return 0;
+    }
+    if (len > (run->global->mutate.maxInputSz - run->dynfile->size)) {
+        len = run->global->mutate.maxInputSz - run->dynfile->size;
+    }
 
-  return len;
+    input_setSize(run, run->dynfile->size + len);
+    mangle_Move(run, off, off + len, run->dynfile->size);
+    if (printable) {
+        memset(&run->dynfile->data[off], ' ', len);
+    }
 
+    return len;
 }
 
-static inline void mangle_Insert(run_t *run, size_t off, const uint8_t *val,
-                                 size_t len, bool printable) {
-
-  len = mangle_Inflate(run, off, len, printable);
-  mangle_Overwrite(run, off, val, len, printable);
-
+static inline void mangle_Insert(
+    run_t* run, size_t off, const uint8_t* val, size_t len, bool printable) {
+    len = mangle_Inflate(run, off, len, printable);
+    mangle_Overwrite(run, off, val, len, printable);
 }
 
-static inline void mangle_UseValue(run_t *run, const uint8_t *val, size_t len,
-                                   bool printable) {
-
-  if (util_rnd64() % 2) {
-
-    mangle_Insert(run, mangle_getOffSetPlus1(run), val, len, printable);
-
-  } else {
-
-    mangle_Overwrite(run, mangle_getOffSet(run), val, len, printable);
-
-  }
-
+static inline void mangle_UseValue(run_t* run, const uint8_t* val, size_t len, bool printable) {
+    if (util_rnd64() & 1) {
+        mangle_Overwrite(run, mangle_getOffSet(run), val, len, printable);
+    } else {
+        mangle_Insert(run, mangle_getOffSetPlus1(run), val, len, printable);
+    }
 }
 
-static void mangle_MemSwap(run_t *run, bool printable HF_ATTR_UNUSED) {
-
-  size_t off1 = mangle_getOffSet(run);
-  size_t maxlen1 = run->dynfile->size - off1;
-
-  size_t off2 = mangle_getOffSet(run);
-  size_t maxlen2 = run->dynfile->size - off2;
-
-  size_t   len = mangle_getLen(HF_MIN(maxlen1, maxlen2));
-  uint8_t *tmpbuf = (uint8_t *)util_Malloc(len);
-  defer {
-
-    free(tmpbuf);
-
-  };
-
-  memcpy(tmpbuf, &run->dynfile->data[off1], len);
-  memmove(&run->dynfile->data[off1], &run->dynfile->data[off2], len);
-  memcpy(&run->dynfile->data[off2], tmpbuf, len);
-
+static inline void mangle_UseValueAt(
+    run_t* run, size_t off, const uint8_t* val, size_t len, bool printable) {
+    if (util_rnd64() & 1) {
+        mangle_Overwrite(run, off, val, len, printable);
+    } else {
+        mangle_Insert(run, off, val, len, printable);
+    }
 }
 
-static void mangle_MemCopy(run_t *run, bool printable HF_ATTR_UNUSED) {
-
-  size_t off = mangle_getOffSet(run);
-  size_t len = mangle_getLen(run->dynfile->size - off);
-
-  /* Use a temp buf, as Insert/Inflate can change source bytes */
-  uint8_t *tmpbuf = (uint8_t *)util_Malloc(len);
-  defer {
+static void mangle_MemSwap(run_t* run, bool printable HF_ATTR_UNUSED) {
+    /* No big deal if those two are overlapping */
+    size_t off1    = mangle_getOffSet(run);
+    size_t maxlen1 = run->dynfile->size - off1;
+    size_t off2    = mangle_getOffSet(run);
+    size_t maxlen2 = run->dynfile->size - off2;
+    size_t len     = mangle_getLen(HF_MIN(maxlen1, maxlen2));
 
-    free(tmpbuf);
-
-  };
-
-  memcpy(tmpbuf, &run->dynfile->data[off], len);
-
-  mangle_UseValue(run, tmpbuf, len, printable);
+    if (off1 == off2) {
+        return;
+    }
 
+    for (size_t i = 0; i < (len / 2); i++) {
+        /*
+         * First - from the head, next from the tail. Don't worry about layout of the overlapping
+         * part - there's no good solution to that, and it can be left somewhat scrambled,
+         * while still preserving the entropy
+         */
+        const uint8_t tmp1                       = run->dynfile->data[off2 + i];
+        run->dynfile->data[off2 + i]             = run->dynfile->data[off1 + i];
+        run->dynfile->data[off1 + i]             = tmp1;
+        const uint8_t tmp2                       = run->dynfile->data[off2 + (len - 1) - i];
+        run->dynfile->data[off2 + (len - 1) - i] = run->dynfile->data[off1 + (len - 1) - i];
+        run->dynfile->data[off1 + (len - 1) - i] = tmp2;
+    }
 }
 
-static void mangle_Bytes(run_t *run, bool printable) {
-
-  uint16_t buf;
-  if (printable) {
-
-    util_rndBufPrintable((uint8_t *)&buf, sizeof(buf));
-
-  } else {
+static void mangle_MemCopy(run_t* run, bool printable HF_ATTR_UNUSED) {
+    size_t off = mangle_getOffSet(run);
+    size_t len = mangle_getLen(run->dynfile->size - off);
 
-    buf = util_rnd64();
-
-  }
-
-  /* Overwrite with random 1-2-byte values */
-  size_t toCopy = util_rndGet(1, 2);
-  mangle_UseValue(run, (const uint8_t *)&buf, toCopy, printable);
+    /* Use a temp buf, as Insert/Inflate can change source bytes */
+    uint8_t* tmpbuf = (uint8_t*)util_Malloc(len);
+    defer {
+        free(tmpbuf);
+    };
+    memmove(tmpbuf, &run->dynfile->data[off], len);
 
+    mangle_UseValue(run, tmpbuf, len, printable);
 }
 
-static void mangle_ByteRepeatOverwrite(run_t *run, bool printable) {
-
-  size_t off = mangle_getOffSet(run);
-  size_t destOff = off + 1;
-  size_t maxSz = run->dynfile->size - destOff;
-
-  /* No space to repeat */
-  if (!maxSz) {
-
-    mangle_Bytes(run, printable);
-    return;
-
-  }
-
-  size_t len = mangle_getLen(maxSz);
-  memset(&run->dynfile->data[destOff], run->dynfile->data[off], len);
+static void mangle_Bytes(run_t* run, bool printable) {
+    uint16_t buf;
+    if (printable) {
+        util_rndBufPrintable((uint8_t*)&buf, sizeof(buf));
+    } else {
+        buf = util_rnd64();
+    }
 
+    /* Overwrite with random 1-2-byte values */
+    size_t toCopy = util_rndGet(1, 2);
+    mangle_UseValue(run, (const uint8_t*)&buf, toCopy, printable);
 }
 
-static void mangle_ByteRepeatInsert(run_t *run, bool printable) {
-
-  size_t off = mangle_getOffSet(run);
-  size_t destOff = off + 1;
-  size_t maxSz = run->dynfile->size - destOff;
+static void mangle_ByteRepeat(run_t* run, bool printable) {
+    size_t off     = mangle_getOffSet(run);
+    size_t destOff = off + 1;
+    size_t maxSz   = run->dynfile->size - destOff;
 
-  /* No space to repeat */
-  if (!maxSz) {
-
-    mangle_Bytes(run, printable);
-    return;
-
-  }
-
-  size_t len = mangle_getLen(maxSz);
-  len = mangle_Inflate(run, destOff, len, printable);
-  memset(&run->dynfile->data[destOff], run->dynfile->data[off], len);
+    /* No space to repeat */
+    if (!maxSz) {
+        mangle_Bytes(run, printable);
+        return;
+    }
 
+    size_t len = mangle_getLen(maxSz);
+    if (util_rnd64() & 0x1) {
+        len = mangle_Inflate(run, destOff, len, printable);
+    }
+    memset(&run->dynfile->data[destOff], run->dynfile->data[off], len);
 }
 
-static void mangle_Bit(run_t *run, bool printable) {
-
-  size_t off = mangle_getOffSet(run);
-  run->dynfile->data[off] ^= (uint8_t)(1U << util_rndGet(0, 7));
-  if (printable) { util_turnToPrintable(&(run->dynfile->data[off]), 1); }
-
+static void mangle_Bit(run_t* run, bool printable) {
+    size_t off = mangle_getOffSet(run);
+    run->dynfile->data[off] ^= (uint8_t)(1U << util_rndGet(0, 7));
+    if (printable) {
+        util_turnToPrintable(&(run->dynfile->data[off]), 1);
+    }
 }
 
 static const struct {
-
-  const uint8_t val[8];
-  const size_t  size;
-
+    const uint8_t val[8];
+    const size_t  size;
 } mangleMagicVals[] = {
-
     /* 1B - No endianness */
     {"\x00\x00\x00\x00\x00\x00\x00\x00", 1},
     {"\x01\x00\x00\x00\x00\x00\x00\x00", 1},
@@ -516,522 +472,436 @@ static const struct {
     {"\x00\x00\x00\x00\x00\x00\x00\x80", 8},
     {"\x01\x00\x00\x00\x00\x00\x00\x80", 8},
     {"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8},
-
 };
 
-static void mangle_Magic(run_t *run, bool printable) {
-
-  uint64_t choice = util_rndGet(0, ARRAYSIZE(mangleMagicVals) - 1);
-  mangle_UseValue(run, mangleMagicVals[choice].val,
-                  mangleMagicVals[choice].size, printable);
-
-}
-
-static void mangle_StaticDict(run_t *run, bool printable) {
-
-  if (run->global->mutate.dictionaryCnt == 0) {
-
-    mangle_Bytes(run, printable);
-    return;
-
-  }
-
-  uint64_t choice = util_rndGet(0, run->global->mutate.dictionaryCnt - 1);
-  mangle_UseValue(run, run->global->mutate.dictionary[choice].val,
-                  run->global->mutate.dictionary[choice].len, printable);
-
+static void mangle_Magic(run_t* run, bool printable) {
+    uint64_t choice = util_rndGet(0, ARRAYSIZE(mangleMagicVals) - 1);
+    mangle_UseValue(run, mangleMagicVals[choice].val, mangleMagicVals[choice].size, printable);
 }
 
-static inline const uint8_t *mangle_FeedbackDict(run_t *run, size_t *len) {
-
-  if (!run->global->feedback.cmpFeedback) { return NULL; }
-  cmpfeedback_t *cmpf = run->global->feedback.cmpFeedbackMap;
-  uint32_t       cnt = ATOMIC_GET(cmpf->cnt);
-  if (cnt == 0) { return NULL; }
-  if (cnt > ARRAYSIZE(cmpf->valArr)) { cnt = ARRAYSIZE(cmpf->valArr); }
-  uint32_t choice = util_rndGet(0, cnt - 1);
-  *len = (size_t)ATOMIC_GET(cmpf->valArr[choice].len);
-  if (*len == 0) { return NULL; }
-  return cmpf->valArr[choice].val;
-
+static void mangle_StaticDict(run_t* run, bool printable) {
+    if (run->global->mutate.dictionaryCnt == 0) {
+        mangle_Bytes(run, printable);
+        return;
+    }
+    uint64_t choice = util_rndGet(0, run->global->mutate.dictionaryCnt - 1);
+    mangle_UseValue(run, run->global->mutate.dictionary[choice].val,
+        run->global->mutate.dictionary[choice].len, printable);
 }
 
-static void mangle_ConstFeedbackDict(run_t *run, bool printable) {
-
-  size_t         len;
-  const uint8_t *val = mangle_FeedbackDict(run, &len);
-  if (val == NULL) {
-
-    mangle_Bytes(run, printable);
-    return;
-
-  }
-
-  mangle_UseValue(run, val, len, printable);
-
+static inline const uint8_t* mangle_FeedbackDict(run_t* run, size_t* len) {
+    if (!run->global->feedback.cmpFeedback) {
+        return NULL;
+    }
+    cmpfeedback_t* cmpf = run->global->feedback.cmpFeedbackMap;
+    uint32_t       cnt  = ATOMIC_GET(cmpf->cnt);
+    if (cnt == 0) {
+        return NULL;
+    }
+    if (cnt > ARRAYSIZE(cmpf->valArr)) {
+        cnt = ARRAYSIZE(cmpf->valArr);
+    }
+    uint32_t choice = util_rndGet(0, cnt - 1);
+    *len            = (size_t)ATOMIC_GET(cmpf->valArr[choice].len);
+    if (*len == 0) {
+        return NULL;
+    }
+    return cmpf->valArr[choice].val;
 }
 
-static void mangle_MemSet(run_t *run, bool printable) {
-
-  size_t off = mangle_getOffSet(run);
-  size_t len = mangle_getLen(run->dynfile->size - off);
-  int    val =
-      printable ? (int)util_rndPrintable() : (int)util_rndGet(0, UINT8_MAX);
-
-  memset(&run->dynfile->data[off], val, len);
-
+static void mangle_ConstFeedbackDict(run_t* run, bool printable) {
+    size_t         len;
+    const uint8_t* val = mangle_FeedbackDict(run, &len);
+    if (val == NULL) {
+        mangle_Bytes(run, printable);
+        return;
+    }
+    mangle_UseValue(run, val, len, printable);
 }
 
-static void mangle_RandomOverwrite(run_t *run, bool printable) {
-
-  size_t off = mangle_getOffSet(run);
-  size_t len = mangle_getLen(run->dynfile->size - off);
-  if (printable) {
-
-    util_rndBufPrintable(&run->dynfile->data[off], len);
-
-  } else {
-
-    util_rndBuf(&run->dynfile->data[off], len);
+static void mangle_MemSet(run_t* run, bool printable) {
+    size_t off = mangle_getOffSet(run);
+    size_t len = mangle_getLen(run->dynfile->size - off);
+    int    val = printable ? (int)util_rndPrintable() : (int)util_rndGet(0, UINT8_MAX);
 
-  }
+    if (util_rnd64() & 1) {
+        len = mangle_Inflate(run, off, len, printable);
+    }
 
+    memset(&run->dynfile->data[off], val, len);
 }
 
-static void mangle_RandomInsert(run_t *run, bool printable) {
-
-  size_t off = mangle_getOffSet(run);
-  size_t len = mangle_getLen(run->dynfile->size - off);
-
-  len = mangle_Inflate(run, off, len, printable);
-
-  if (printable) {
+static void mangle_MemClr(run_t* run, bool printable) {
+    size_t off = mangle_getOffSet(run);
+    size_t len = mangle_getLen(run->dynfile->size - off);
+    int    val = printable ? ' ' : 0;
 
-    util_rndBufPrintable(&run->dynfile->data[off], len);
-
-  } else {
-
-    util_rndBuf(&run->dynfile->data[off], len);
-
-  }
+    if (util_rnd64() & 1) {
+        len = mangle_Inflate(run, off, len, printable);
+    }
 
+    memset(&run->dynfile->data[off], val, len);
 }
 
-static inline void mangle_AddSubWithRange(run_t *run, size_t off, size_t varLen,
-                                          uint64_t range, bool printable) {
-
-  int64_t delta = (int64_t)util_rndGet(0, range * 2) - (int64_t)range;
-
-  switch (varLen) {
-
-    case 1: {
-
-      run->dynfile->data[off] += delta;
-      break;
+static void mangle_RandomBuf(run_t* run, bool printable) {
+    size_t off = mangle_getOffSet(run);
+    size_t len = mangle_getLen(run->dynfile->size - off);
 
+    if (util_rnd64() & 1) {
+        len = mangle_Inflate(run, off, len, printable);
     }
 
-    case 2: {
-
-      int16_t val;
-      memcpy(&val, &run->dynfile->data[off], sizeof(val));
-      if (util_rnd64() & 0x1) {
-
-        val += delta;
-
-      } else {
-
-        /* Foreign endianess */
-        val = __builtin_bswap16(val);
-        val += delta;
-        val = __builtin_bswap16(val);
-
-      }
-
-      mangle_Overwrite(run, off, (uint8_t *)&val, varLen, printable);
-      break;
-
+    if (printable) {
+        util_rndBufPrintable(&run->dynfile->data[off], len);
+    } else {
+        util_rndBuf(&run->dynfile->data[off], len);
     }
+}
 
-    case 4: {
-
-      int32_t val;
-      memcpy(&val, &run->dynfile->data[off], sizeof(val));
-      if (util_rnd64() & 0x1) {
-
-        val += delta;
-
-      } else {
-
-        /* Foreign endianess */
-        val = __builtin_bswap32(val);
-        val += delta;
-        val = __builtin_bswap32(val);
-
-      }
-
-      mangle_Overwrite(run, off, (uint8_t *)&val, varLen, printable);
-      break;
-
+static inline void mangle_AddSubWithRange(
+    run_t* run, size_t off, size_t varLen, uint64_t range, bool printable) {
+    int64_t delta = (int64_t)util_rndGet(0, range * 2) - (int64_t)range;
+
+    switch (varLen) {
+        case 1: {
+            run->dynfile->data[off] += delta;
+            break;
+        }
+        case 2: {
+            int16_t val;
+            memcpy(&val, &run->dynfile->data[off], sizeof(val));
+            if (util_rnd64() & 0x1) {
+                val += delta;
+            } else {
+                /* Foreign endianess */
+                val = __builtin_bswap16(val);
+                val += delta;
+                val = __builtin_bswap16(val);
+            }
+            mangle_Overwrite(run, off, (uint8_t*)&val, varLen, printable);
+            break;
+        }
+        case 4: {
+            int32_t val;
+            memcpy(&val, &run->dynfile->data[off], sizeof(val));
+            if (util_rnd64() & 0x1) {
+                val += delta;
+            } else {
+                /* Foreign endianess */
+                val = __builtin_bswap32(val);
+                val += delta;
+                val = __builtin_bswap32(val);
+            }
+            mangle_Overwrite(run, off, (uint8_t*)&val, varLen, printable);
+            break;
+        }
+        case 8: {
+            int64_t val;
+            memcpy(&val, &run->dynfile->data[off], sizeof(val));
+            if (util_rnd64() & 0x1) {
+                val += delta;
+            } else {
+                /* Foreign endianess */
+                val = __builtin_bswap64(val);
+                val += delta;
+                val = __builtin_bswap64(val);
+            }
+            mangle_Overwrite(run, off, (uint8_t*)&val, varLen, printable);
+            break;
+        }
+        default: {
+            LOG_F("Unknown variable length size: %zu", varLen);
+        }
     }
+}
 
-    case 8: {
-
-      int64_t val;
-      memcpy(&val, &run->dynfile->data[off], sizeof(val));
-      if (util_rnd64() & 0x1) {
-
-        val += delta;
-
-      } else {
-
-        /* Foreign endianess */
-        val = __builtin_bswap64(val);
-        val += delta;
-        val = __builtin_bswap64(val);
-
-      }
-
-      mangle_Overwrite(run, off, (uint8_t *)&val, varLen, printable);
-      break;
+static void mangle_AddSub(run_t* run, bool printable) {
+    size_t off = mangle_getOffSet(run);
 
+    /* 1,2,4,8 */
+    size_t varLen = 1U << util_rndGet(0, 3);
+    if ((run->dynfile->size - off) < varLen) {
+        varLen = 1;
     }
 
-    default: {
-
-      LOG_F("Unknown variable length size: %zu", varLen);
-
+    uint64_t range;
+    switch (varLen) {
+        case 1:
+            range = 16;
+            break;
+        case 2:
+            range = 4096;
+            break;
+        case 4:
+            range = 1048576;
+            break;
+        case 8:
+            range = 268435456;
+            break;
+        default:
+            LOG_F("Invalid operand size: %zu", varLen);
     }
 
-  }
-
-}
-
-static void mangle_AddSub(run_t *run, bool printable) {
-
-  size_t off = mangle_getOffSet(run);
-
-  /* 1,2,4,8 */
-  size_t varLen = 1U << util_rndGet(0, 3);
-  if ((run->dynfile->size - off) < varLen) { varLen = 1; }
-
-  uint64_t range;
-  switch (varLen) {
-
-    case 1:
-      range = 16;
-      break;
-    case 2:
-      range = 4096;
-      break;
-    case 4:
-      range = 1048576;
-      break;
-    case 8:
-      range = 268435456;
-      break;
-    default:
-      LOG_F("Invalid operand size: %zu", varLen);
-
-  }
-
-  mangle_AddSubWithRange(run, off, varLen, range, printable);
-
-}
-
-static void mangle_IncByte(run_t *run, bool printable) {
-
-  size_t off = mangle_getOffSet(run);
-  if (printable) {
-
-    run->dynfile->data[off] = (run->dynfile->data[off] - 32 + 1) % 95 + 32;
-
-  } else {
-
-    run->dynfile->data[off] += (uint8_t)1UL;
-
-  }
-
+    mangle_AddSubWithRange(run, off, varLen, range, printable);
 }
 
-static void mangle_DecByte(run_t *run, bool printable) {
-
-  size_t off = mangle_getOffSet(run);
-  if (printable) {
-
-    run->dynfile->data[off] = (run->dynfile->data[off] - 32 + 94) % 95 + 32;
-
-  } else {
-
-    run->dynfile->data[off] -= (uint8_t)1UL;
-
-  }
-
-}
-
-static void mangle_NegByte(run_t *run, bool printable) {
-
-  size_t off = mangle_getOffSet(run);
-  if (printable) {
-
-    run->dynfile->data[off] = 94 - (run->dynfile->data[off] - 32) + 32;
-
-  } else {
-
-    run->dynfile->data[off] = ~(run->dynfile->data[off]);
-
-  }
-
+static void mangle_IncByte(run_t* run, bool printable) {
+    size_t off = mangle_getOffSet(run);
+    if (printable) {
+        run->dynfile->data[off] = (run->dynfile->data[off] - 32 + 1) % 95 + 32;
+    } else {
+        run->dynfile->data[off] += (uint8_t)1UL;
+    }
 }
 
-static void mangle_Expand(run_t *run, bool printable) {
-
-  size_t off = mangle_getOffSet(run);
-  size_t len;
-  if (util_rnd64() % 16) {
-
-    len = mangle_getLen(HF_MIN(16, run->global->mutate.maxInputSz - off));
-
-  } else {
-
-    len = mangle_getLen(run->global->mutate.maxInputSz - off);
-
-  }
-
-  mangle_Inflate(run, off, len, printable);
-
+static void mangle_DecByte(run_t* run, bool printable) {
+    size_t off = mangle_getOffSet(run);
+    if (printable) {
+        run->dynfile->data[off] = (run->dynfile->data[off] - 32 + 94) % 95 + 32;
+    } else {
+        run->dynfile->data[off] -= (uint8_t)1UL;
+    }
 }
 
-static void mangle_Shrink(run_t *run, bool printable HF_ATTR_UNUSED) {
-
-  if (run->dynfile->size <= 2U) { return; }
-
-  size_t off_start = mangle_getOffSet(run);
-  size_t len = mangle_LenLeft(run, off_start);
-  if (len == 0) { return; }
-  if (util_rnd64() % 16) {
-
-    len = mangle_getLen(HF_MIN(16, len));
-
-  } else {
-
-    len = mangle_getLen(len);
-
-  }
-
-  size_t off_end = off_start + len;
-  size_t len_to_move = run->dynfile->size - off_end;
-
-  mangle_Move(run, off_end, off_start, len_to_move);
-  input_setSize(run, run->dynfile->size - len);
-
+static void mangle_NegByte(run_t* run, bool printable) {
+    size_t off = mangle_getOffSet(run);
+    if (printable) {
+        run->dynfile->data[off] = 94 - (run->dynfile->data[off] - 32) + 32;
+    } else {
+        run->dynfile->data[off] = ~(run->dynfile->data[off]);
+    }
 }
 
-static void mangle_ASCIINum(run_t *run, bool printable) {
-
-  size_t len = util_rndGet(2, 8);
-
-  char buf[20];
-  snprintf(buf, sizeof(buf), "%-19" PRId64, (int64_t)util_rnd64());
-
-  mangle_UseValue(run, (const uint8_t *)buf, len, printable);
+static void mangle_Expand(run_t* run, bool printable) {
+    size_t off = mangle_getOffSet(run);
+    size_t len;
+    if (util_rnd64() % 16) {
+        len = mangle_getLen(HF_MIN(16, run->global->mutate.maxInputSz - off));
+    } else {
+        len = mangle_getLen(run->global->mutate.maxInputSz - off);
+    }
 
+    mangle_Inflate(run, off, len, printable);
 }
 
-static void mangle_ASCIINumChange(run_t *run, bool printable) {
-
-  size_t off = mangle_getOffSet(run);
-
-  /* Find a digit */
-  for (; off < run->dynfile->size; off++) {
-
-    if (isdigit(run->dynfile->data[off])) { break; }
-
-  }
-
-  if (off == run->dynfile->size) {
-
-    mangle_Bytes(run, printable);
-    return;
-
-  }
-
-  size_t len = HF_MIN(20, run->dynfile->size - off);
-  char   numbuf[21] = {};
-  strncpy(numbuf, (const char *)&run->dynfile->data[off], len);
-  uint64_t val = (uint64_t)strtoull(numbuf, NULL, 10);
-
-  switch (util_rndGet(0, 5)) {
-
-    case 0:
-      val += util_rndGet(1, 256);
-      break;
-    case 1:
-      val -= util_rndGet(1, 256);
-      break;
-    case 2:
-      val *= util_rndGet(1, 256);
-      break;
-    case 3:
-      val /= util_rndGet(1, 256);
-      break;
-    case 4:
-      val = ~(val);
-      break;
-    case 5:
-      val = util_rnd64();
-      break;
-    default:
-      LOG_F("Invalid choice");
-
-  };
+static void mangle_Shrink(run_t* run, bool printable HF_ATTR_UNUSED) {
+    if (run->dynfile->size <= 2U) {
+        return;
+    }
 
-  len = HF_MIN((size_t)snprintf(numbuf, sizeof(numbuf), "%" PRIu64, val), len);
-  mangle_Overwrite(run, off, (const uint8_t *)numbuf, len, printable);
+    size_t off_start = mangle_getOffSet(run);
+    size_t len       = mangle_LenLeft(run, off_start);
+    if (len == 0) {
+        return;
+    }
+    if (util_rnd64() % 16) {
+        len = mangle_getLen(HF_MIN(16, len));
+    } else {
+        len = mangle_getLen(len);
+    }
+    size_t off_end     = off_start + len;
+    size_t len_to_move = run->dynfile->size - off_end;
 
+    mangle_Move(run, off_end, off_start, len_to_move);
+    input_setSize(run, run->dynfile->size - len);
 }
+static void mangle_ASCIINum(run_t* run, bool printable) {
+    size_t len = util_rndGet(2, 8);
 
-static void mangle_Splice(run_t *run, bool printable) {
-
-  const uint8_t *buf;
-  size_t         sz = input_getRandomInputAsBuf(run, &buf);
-  if (!sz) {
-
-    mangle_Bytes(run, printable);
-    return;
-
-  }
-
-  size_t remoteOff = mangle_getLen(sz) - 1;
-  size_t len = mangle_getLen(sz - remoteOff);
-  mangle_UseValue(run, &buf[remoteOff], len, printable);
+    char buf[20];
+    snprintf(buf, sizeof(buf), "%-19" PRId64, (int64_t)util_rnd64());
 
+    mangle_UseValue(run, (const uint8_t*)buf, len, printable);
 }
 
-static void mangle_Resize(run_t *run, bool printable) {
-
-  ssize_t oldsz = run->dynfile->size;
-  ssize_t newsz = 0;
-
-  uint64_t choice = util_rndGet(0, 32);
-  switch (choice) {
+static void mangle_ASCIINumChange(run_t* run, bool printable) {
+    size_t off = mangle_getOffSet(run);
 
-    case 0:                                     /* Set new size arbitrarily */
-      newsz = (ssize_t)util_rndGet(1, run->global->mutate.maxInputSz);
-      break;
-    case 1 ... 4:                         /* Increase size by a small value */
-      newsz = oldsz + (ssize_t)util_rndGet(0, 8);
-      break;
-    case 5:                              /* Increase size by a larger value */
-      newsz = oldsz + (ssize_t)util_rndGet(9, 128);
-      break;
-    case 6 ... 9:                         /* Decrease size by a small value */
-      newsz = oldsz - (ssize_t)util_rndGet(0, 8);
-      break;
-    case 10:                             /* Decrease size by a larger value */
-      newsz = oldsz - (ssize_t)util_rndGet(9, 128);
-      break;
-    case 11 ... 32:                                           /* Do nothing */
-      newsz = oldsz;
-      break;
-    default:
-      LOG_F("Illegal value from util_rndGet: %" PRIu64, choice);
-      break;
-
-  }
-
-  if (newsz < 1) { newsz = 1; }
-  if (newsz > (ssize_t)run->global->mutate.maxInputSz) {
-
-    newsz = run->global->mutate.maxInputSz;
+    /* Find a digit */
+    for (; off < run->dynfile->size; off++) {
+        if (isdigit(run->dynfile->data[off])) {
+            break;
+        }
+    }
+    size_t left = run->dynfile->size - off;
+    if (left == 0) {
+        return;
+    }
 
-  }
+    size_t   len = 0;
+    uint64_t val = 0;
+    /* 20 is maximum lenght of a string representing a 64-bit unsigned value */
+    for (len = 0; (len < 20) && (len < left); len++) {
+        char c = run->dynfile->data[off + len];
+        if (!isdigit(c)) {
+            break;
+        }
+        val *= 10;
+        val += (c - '0');
+    }
 
-  input_setSize(run, (size_t)newsz);
-  if (newsz > oldsz) {
+    switch (util_rndGet(0, 7)) {
+        case 0:
+            val++;
+            break;
+        case 1:
+            val--;
+            break;
+        case 2:
+            val *= 2;
+            break;
+        case 3:
+            val /= 2;
+            break;
+        case 4:
+            val = util_rnd64();
+            break;
+        case 5:
+            val += util_rndGet(1, 256);
+            break;
+        case 6:
+            val -= util_rndGet(1, 256);
+            break;
+        case 7:
+            val = ~(val);
+            break;
+        default:
+            LOG_F("Invalid choice");
+    };
+
+    char buf[20];
+    snprintf(buf, sizeof(buf), "%-19" PRIu64, val);
+
+    mangle_UseValueAt(run, off, (const uint8_t*)buf, len, printable);
+}
+
+static void mangle_Splice(run_t* run, bool printable) {
+    if (run->global->feedback.dynFileMethod == _HF_DYNFILE_NONE) {
+        mangle_Bytes(run, printable);
+        return;
+    }
 
-    if (printable) { memset(&run->dynfile->data[oldsz], ' ', newsz - oldsz); }
+    size_t         sz  = 0;
+    const uint8_t* buf = input_getRandomInputAsBuf(run, &sz);
+    if (!buf) {
+        LOG_E("input_getRandomInputAsBuf() returned no input");
+        mangle_Bytes(run, printable);
+        return;
+    }
+    if (!sz) {
+        mangle_Bytes(run, printable);
+        return;
+    }
 
-  }
+    size_t remoteOff = mangle_getLen(sz) - 1;
+    size_t len       = mangle_getLen(sz - remoteOff);
+    mangle_UseValue(run, &buf[remoteOff], len, printable);
+}
+
+static void mangle_Resize(run_t* run, bool printable) {
+    ssize_t oldsz = run->dynfile->size;
+    ssize_t newsz = 0;
+
+    uint64_t choice = util_rndGet(0, 32);
+    switch (choice) {
+        case 0: /* Set new size arbitrarily */
+            newsz = (ssize_t)util_rndGet(1, run->global->mutate.maxInputSz);
+            break;
+        case 1 ... 4: /* Increase size by a small value */
+            newsz = oldsz + (ssize_t)util_rndGet(0, 8);
+            break;
+        case 5: /* Increase size by a larger value */
+            newsz = oldsz + (ssize_t)util_rndGet(9, 128);
+            break;
+        case 6 ... 9: /* Decrease size by a small value */
+            newsz = oldsz - (ssize_t)util_rndGet(0, 8);
+            break;
+        case 10: /* Decrease size by a larger value */
+            newsz = oldsz - (ssize_t)util_rndGet(9, 128);
+            break;
+        case 11 ... 32: /* Do nothing */
+            newsz = oldsz;
+            break;
+        default:
+            LOG_F("Illegal value from util_rndGet: %" PRIu64, choice);
+            break;
+    }
+    if (newsz < 1) {
+        newsz = 1;
+    }
+    if (newsz > (ssize_t)run->global->mutate.maxInputSz) {
+        newsz = run->global->mutate.maxInputSz;
+    }
 
+    input_setSize(run, (size_t)newsz);
+    if (newsz > oldsz) {
+        if (printable) {
+            memset(&run->dynfile->data[oldsz], ' ', newsz - oldsz);
+        }
+    }
 }
 
-void mangle_mangleContent(run_t *run, int speed_factor) {
-
-  static void (*const mangleFuncs[])(run_t * run, bool printable) = {
-
-      /* Every *Insert or Expand expands file, so add more Shrink's */
-      mangle_Shrink,
-      mangle_Shrink,
-      mangle_Shrink,
-      mangle_Shrink,
-      mangle_Expand,
-      mangle_Bit,
-      mangle_IncByte,
-      mangle_DecByte,
-      mangle_NegByte,
-      mangle_AddSub,
-      mangle_MemSet,
-      mangle_MemSwap,
-      mangle_MemCopy,
-      mangle_Bytes,
-      mangle_ASCIINum,
-      mangle_ASCIINumChange,
-      mangle_ByteRepeatOverwrite,
-      mangle_ByteRepeatInsert,
-      mangle_Magic,
-      mangle_StaticDict,
-      mangle_ConstFeedbackDict,
-      mangle_RandomOverwrite,
-      mangle_RandomInsert,
-      mangle_Splice,
-
-  };
-
-  if (run->mutationsPerRun == 0U) { return; }
-  if (run->dynfile->size == 0U) {
-
-    mangle_Resize(run, /* printable= */ run->global->cfg.only_printable);
-
-  }
-
-  uint64_t changesCnt;
-
-  if (speed_factor < 5) {
-
-    changesCnt = util_rndGet(1, run->global->mutate.mutationsPerRun);
-
-  } else if (speed_factor < 10) {
-
-    changesCnt = run->global->mutate.mutationsPerRun;
-
-  } else {
-
-    changesCnt = HF_MIN(speed_factor, 12);
-    changesCnt = HF_MAX(changesCnt, run->global->mutate.mutationsPerRun);
-
-  }
-
-  /* If last coverage acquisition was more than 5 secs ago, use splicing more
-   * frequently */
-  if ((time(NULL) - ATOMIC_GET(run->global->timing.lastCovUpdate)) > 5) {
-
-    if (util_rnd64() % 2) {
-
-      mangle_Splice(run, run->global->cfg.only_printable);
-
+void mangle_mangleContent(run_t* run, int speed_factor) {
+    static void (*const mangleFuncs[])(run_t * run, bool printable) = {
+        mangle_Shrink,
+        mangle_Expand,
+        mangle_Bit,
+        mangle_IncByte,
+        mangle_DecByte,
+        mangle_NegByte,
+        mangle_AddSub,
+        mangle_MemSet,
+        mangle_MemClr,
+        mangle_MemSwap,
+        mangle_MemCopy,
+        mangle_Bytes,
+        mangle_ASCIINum,
+        mangle_ASCIINumChange,
+        mangle_ByteRepeat,
+        mangle_Magic,
+        mangle_StaticDict,
+        mangle_ConstFeedbackDict,
+        mangle_RandomBuf,
+        mangle_Splice,
+    };
+
+    if (run->mutationsPerRun == 0U) {
+        return;
+    }
+    if (run->dynfile->size == 0U) {
+        mangle_Resize(run, /* printable= */ run->global->cfg.only_printable);
     }
 
-  }
-
-  for (uint64_t x = 0; x < changesCnt; x++) {
+    uint64_t changesCnt = run->global->mutate.mutationsPerRun;
 
-    uint64_t choice = util_rndGet(0, ARRAYSIZE(mangleFuncs) - 1);
-    mangleFuncs[choice](run, /* printable= */ run->global->cfg.only_printable);
+    if (speed_factor < 5) {
+        changesCnt = util_rndGet(1, run->global->mutate.mutationsPerRun);
+    } else if (speed_factor < 10) {
+        changesCnt = run->global->mutate.mutationsPerRun;
+    } else {
+        changesCnt = HF_MIN(speed_factor, 10);
+        changesCnt = HF_MAX(changesCnt, (run->global->mutate.mutationsPerRun * 5));
+    }
 
-  }
+    /* If last coverage acquisition was more than 5 secs ago, use splicing more frequently */
+    if ((time(NULL) - ATOMIC_GET(run->global->timing.lastCovUpdate)) > 5) {
+        if (util_rnd64() & 0x1) {
+            mangle_Splice(run, run->global->cfg.only_printable);
+        }
+    }
 
-  wmb();
+    for (uint64_t x = 0; x < changesCnt; x++) {
+        if (run->global->feedback.cmpFeedback && (util_rnd64() & 0x1)) {
+            /*
+             * mangle_ConstFeedbackDict() is quite powerful if the dynamic feedback dictionary
+             * exists. If so, give it 50% chance of being used among all mangling functions.
+             */
+            mangle_ConstFeedbackDict(run, /* printable= */ run->global->cfg.only_printable);
+        } else {
+            uint64_t choice = util_rndGet(0, ARRAYSIZE(mangleFuncs) - 1);
+            mangleFuncs[choice](run, /* printable= */ run->global->cfg.only_printable);
+        }
+    }
 
+    wmb();
 }
-
diff --git a/custom_mutators/rust/.gitignore b/custom_mutators/rust/.gitignore
new file mode 100644
index 00000000..088ba6ba
--- /dev/null
+++ b/custom_mutators/rust/.gitignore
@@ -0,0 +1,10 @@
+# Generated by Cargo
+# will have compiled files and executables
+/target/
+
+# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
+# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
+Cargo.lock
+
+# These are backup files generated by rustfmt
+**/*.rs.bk
diff --git a/custom_mutators/rust/Cargo.toml b/custom_mutators/rust/Cargo.toml
new file mode 100644
index 00000000..e36d24b5
--- /dev/null
+++ b/custom_mutators/rust/Cargo.toml
@@ -0,0 +1,8 @@
+[workspace]
+members = [
+    "custom_mutator-sys",
+    "custom_mutator",
+    "example",
+    # Lain needs a nightly toolchain
+    # "example_lain",
+]
\ No newline at end of file
diff --git a/custom_mutators/rust/README.md b/custom_mutators/rust/README.md
new file mode 100644
index 00000000..e2cc38b4
--- /dev/null
+++ b/custom_mutators/rust/README.md
@@ -0,0 +1,11 @@
+# Rust Custom Mutators
+
+Bindings to create custom mutators in Rust.
+
+These bindings are documented with rustdoc. To view the documentation run
+```cargo doc -p custom_mutator --open```.
+
+A minimal example can be found in `example`. Build it using `cargo build --example example_mutator`. 
+
+An example using [lain](https://github.com/microsoft/lain) for structured fuzzing can be found in `example_lain`.
+Since lain requires a nightly rust toolchain, you need to set one up before you can play with it.
diff --git a/custom_mutators/rust/custom_mutator-sys/Cargo.toml b/custom_mutators/rust/custom_mutator-sys/Cargo.toml
new file mode 100644
index 00000000..104f7df0
--- /dev/null
+++ b/custom_mutators/rust/custom_mutator-sys/Cargo.toml
@@ -0,0 +1,12 @@
+[package]
+name = "custom_mutator-sys"
+version = "0.1.0"
+authors = ["Julius Hohnerlein <julihoh@users.noreply.github.com>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+
+[build-dependencies]
+bindgen = "0.56"
diff --git a/custom_mutators/rust/custom_mutator-sys/build.rs b/custom_mutators/rust/custom_mutator-sys/build.rs
new file mode 100644
index 00000000..3c88a90d
--- /dev/null
+++ b/custom_mutators/rust/custom_mutator-sys/build.rs
@@ -0,0 +1,42 @@
+extern crate bindgen;
+
+use std::env;
+use std::path::PathBuf;
+
+// this code is largely taken straight from the handbook: https://github.com/fitzgen/bindgen-tutorial-bzip2-sys
+fn main() {
+    // Tell cargo to invalidate the built crate whenever the wrapper changes
+    println!("cargo:rerun-if-changed=wrapper.h");
+
+    // The bindgen::Builder is the main entry point
+    // to bindgen, and lets you build up options for
+    // the resulting bindings.
+    let bindings = bindgen::Builder::default()
+        // The input header we would like to generate
+        // bindings for.
+        .header("wrapper.h")
+        .whitelist_type("afl_state_t")
+        .blacklist_type(r"u\d+")
+        .opaque_type(r"_.*")
+        .opaque_type("FILE")
+        .opaque_type("in_addr(_t)?")
+        .opaque_type("in_port(_t)?")
+        .opaque_type("sa_family(_t)?")
+        .opaque_type("sockaddr_in(_t)?")
+        .opaque_type("time_t")
+        .rustfmt_bindings(true)
+        .size_t_is_usize(true)
+        // Tell cargo to invalidate the built crate whenever any of the
+        // included header files changed.
+        .parse_callbacks(Box::new(bindgen::CargoCallbacks))
+        // Finish the builder and generate the bindings.
+        .generate()
+        // Unwrap the Result and panic on failure.
+        .expect("Unable to generate bindings");
+
+    // Write the bindings to the $OUT_DIR/bindings.rs file.
+    let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());
+    bindings
+        .write_to_file(out_path.join("bindings.rs"))
+        .expect("Couldn't write bindings!");
+}
diff --git a/custom_mutators/rust/custom_mutator-sys/src/lib.rs b/custom_mutators/rust/custom_mutator-sys/src/lib.rs
new file mode 100644
index 00000000..a38a13a8
--- /dev/null
+++ b/custom_mutators/rust/custom_mutator-sys/src/lib.rs
@@ -0,0 +1,5 @@
+#![allow(non_upper_case_globals)]
+#![allow(non_camel_case_types)]
+#![allow(non_snake_case)]
+
+include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
diff --git a/custom_mutators/rust/custom_mutator-sys/wrapper.h b/custom_mutators/rust/custom_mutator-sys/wrapper.h
new file mode 100644
index 00000000..81cdb90f
--- /dev/null
+++ b/custom_mutators/rust/custom_mutator-sys/wrapper.h
@@ -0,0 +1,4 @@
+#include "../../../include/afl-fuzz.h"
+#include "../../../include/common.h"
+#include "../../../include/config.h"
+#include "../../../include/debug.h"
diff --git a/custom_mutators/rust/custom_mutator/Cargo.toml b/custom_mutators/rust/custom_mutator/Cargo.toml
new file mode 100644
index 00000000..2d3cdbfa
--- /dev/null
+++ b/custom_mutators/rust/custom_mutator/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "custom_mutator"
+version = "0.1.0"
+authors = ["Julius Hohnerlein <julihoh@users.noreply.github.com>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[features]
+afl_internals = ["custom_mutator-sys"]
+
+[dependencies]
+custom_mutator-sys = { path = "../custom_mutator-sys", optional=true }
diff --git a/custom_mutators/rust/custom_mutator/src/lib.rs b/custom_mutators/rust/custom_mutator/src/lib.rs
new file mode 100644
index 00000000..9444e4d1
--- /dev/null
+++ b/custom_mutators/rust/custom_mutator/src/lib.rs
@@ -0,0 +1,740 @@
+#![cfg(unix)]
+//! Somewhat safe and somewhat ergonomic bindings for creating [AFL++](https://github.com/AFLplusplus/AFLplusplus) [custom mutators](https://github.com/AFLplusplus/AFLplusplus/blob/stable/docs/custom_mutators.md) in Rust.
+//!
+//! # Usage
+//! AFL++ custom mutators are expected to be dynamic libraries which expose a set of symbols.
+//! Check out [`CustomMutator`] to see which functions of the API are supported.
+//! Then use [`export_mutator`] to export the correct symbols for your mutator.
+//! In order to use the mutator, your crate needs to be a library crate and have a `crate-type` of `cdylib`.
+//! Putting
+//! ```yaml
+//! [lib]
+//! crate-type = ["cdylib"]
+//! ```
+//! into your `Cargo.toml` should do the trick.
+//! The final executable can be found in `target/(debug|release)/your_crate_name.so`.
+//! # Example
+//! See [`export_mutator`] for an example.
+//!
+//! # On `panic`s
+//! This binding is panic-safe in that it will prevent panics from unwinding into AFL++. Any panic will `abort` at the boundary between the custom mutator and AFL++.
+//!
+//! # Access to AFL++ internals
+//! This crate has an optional feature "afl_internals", which gives access to AFL++'s internal state.
+//! The state is passed to [`CustomMutator::init`], when the feature is activated.
+//!
+//! _This is completely unsafe and uses automatically generated types extracted from the AFL++ source._
+use std::{fmt::Debug, path::Path};
+
+#[cfg(feature = "afl_internals")]
+#[doc(hidden)]
+pub use custom_mutator_sys::afl_state;
+
+#[allow(unused_variables)]
+#[doc(hidden)]
+pub trait RawCustomMutator {
+    #[cfg(feature = "afl_internals")]
+    fn init(afl: &'static afl_state, seed: u32) -> Self
+    where
+        Self: Sized;
+    #[cfg(not(feature = "afl_internals"))]
+    fn init(seed: u32) -> Self
+    where
+        Self: Sized;
+
+    fn fuzz<'b, 's: 'b>(
+        &'s mut self,
+        buffer: &'b mut [u8],
+        add_buff: Option<&[u8]>,
+        max_size: usize,
+    ) -> Option<&'b [u8]>;
+
+    fn fuzz_count(&mut self, buffer: &[u8]) -> u32 {
+        1
+    }
+
+    fn queue_new_entry(&mut self, filename_new_queue: &Path, _filename_orig_queue: Option<&Path>) {}
+
+    fn queue_get(&mut self, filename: &Path) -> bool {
+        true
+    }
+
+    fn describe(&mut self, max_description: usize) -> Option<&str> {
+        Some(default_mutator_describe::<Self>(max_description))
+    }
+
+    fn introspection(&mut self) -> Option<&str> {
+        None
+    }
+
+    /*fn post_process(&self, buffer: &[u8], unsigned char **out_buf)-> usize;
+    int afl_custom_init_trim(&self, buffer: &[u8]);
+    size_t afl_custom_trim(&self, unsigned char **out_buf);
+    int afl_custom_post_trim(&self, unsigned char success);
+    size_t afl_custom_havoc_mutation(&self, buffer: &[u8], unsigned char **out_buf, size_t max_size);
+    unsigned char afl_custom_havoc_mutation_probability(&self);*/
+}
+
+/// Wrappers for the custom mutator which provide the bridging between the C API and CustomMutator.
+/// These wrappers are not intended to be used directly, rather export_mutator will use them to publish the custom mutator C API.
+#[doc(hidden)]
+pub mod wrappers {
+    #[cfg(feature = "afl_internals")]
+    use custom_mutator_sys::afl_state;
+
+    use std::{
+        any::Any,
+        convert::TryInto,
+        ffi::{c_void, CStr, OsStr},
+        mem::ManuallyDrop,
+        os::{raw::c_char, unix::ffi::OsStrExt},
+        panic::catch_unwind,
+        path::Path,
+        process::abort,
+        ptr::null,
+        slice,
+    };
+
+    use crate::RawCustomMutator;
+
+    /// A structure to be used as the data pointer for our custom mutator. This was used as additional storage and is kept for now in case its needed later.
+    /// Also has some convenience functions for FFI conversions (from and to ptr) and tries to make misuse hard (see [`FFIContext::from`]).
+    struct FFIContext<M: RawCustomMutator> {
+        mutator: M,
+        /// buffer for storing the description returned by [`RawCustomMutator::describe`] as a CString
+        description_buffer: Vec<u8>,
+        /// buffer for storing the introspection returned by [`RawCustomMutator::introspect`] as a CString
+        introspection_buffer: Vec<u8>,
+    }
+
+    impl<M: RawCustomMutator> FFIContext<M> {
+        fn from(ptr: *mut c_void) -> ManuallyDrop<Box<Self>> {
+            assert!(!ptr.is_null());
+            ManuallyDrop::new(unsafe { Box::from_raw(ptr as *mut Self) })
+        }
+
+        fn into_ptr(self: Box<Self>) -> *const c_void {
+            Box::into_raw(self) as *const c_void
+        }
+
+        #[cfg(feature = "afl_internals")]
+        fn new(afl: &'static afl_state, seed: u32) -> Box<Self> {
+            Box::new(Self {
+                mutator: M::init(afl, seed),
+                description_buffer: Vec::new(),
+                introspection_buffer: Vec::new(),
+            })
+        }
+        #[cfg(not(feature = "afl_internals"))]
+        fn new(seed: u32) -> Box<Self> {
+            Box::new(Self {
+                mutator: M::init(seed),
+                description_buffer: Vec::new(),
+                introspection_buffer: Vec::new(),
+            })
+        }
+    }
+
+    /// panic handler called for every panic
+    fn panic_handler(method: &str, panic_info: Box<dyn Any + Send + 'static>) -> ! {
+        use std::ops::Deref;
+        let cause = panic_info
+            .downcast_ref::<String>()
+            .map(String::deref)
+            .unwrap_or_else(|| {
+                panic_info
+                    .downcast_ref::<&str>()
+                    .copied()
+                    .unwrap_or("<cause unknown>")
+            });
+        eprintln!("A panic occurred at {}: {}", method, cause);
+        abort()
+    }
+
+    /// Internal function used in the macro
+    #[cfg(not(feature = "afl_internals"))]
+    pub fn afl_custom_init_<M: RawCustomMutator>(seed: u32) -> *const c_void {
+        match catch_unwind(|| FFIContext::<M>::new(seed).into_ptr()) {
+            Ok(ret) => ret,
+            Err(err) => panic_handler("afl_custom_init", err),
+        }
+    }
+
+    /// Internal function used in the macro
+    #[cfg(feature = "afl_internals")]
+    pub fn afl_custom_init_<M: RawCustomMutator>(
+        afl: Option<&'static afl_state>,
+        seed: u32,
+    ) -> *const c_void {
+        match catch_unwind(|| {
+            let afl = afl.expect("mutator func called with NULL afl");
+            FFIContext::<M>::new(afl, seed).into_ptr()
+        }) {
+            Ok(ret) => ret,
+            Err(err) => panic_handler("afl_custom_init", err),
+        }
+    }
+
+    /// Internal function used in the macro
+    pub unsafe fn afl_custom_fuzz_<M: RawCustomMutator>(
+        data: *mut c_void,
+        buf: *mut u8,
+        buf_size: usize,
+        out_buf: *mut *const u8,
+        add_buf: *mut u8,
+        add_buf_size: usize,
+        max_size: usize,
+    ) -> usize {
+        match catch_unwind(|| {
+            let mut context = FFIContext::<M>::from(data);
+            if buf.is_null() {
+                panic!("null buf passed to afl_custom_fuzz")
+            }
+            if out_buf.is_null() {
+                panic!("null out_buf passed to afl_custom_fuzz")
+            }
+            let buff_slice = slice::from_raw_parts_mut(buf, buf_size);
+            let add_buff_slice = if add_buf.is_null() {
+                None
+            } else {
+                Some(slice::from_raw_parts(add_buf, add_buf_size))
+            };
+            match context
+                .mutator
+                .fuzz(buff_slice, add_buff_slice, max_size.try_into().unwrap())
+            {
+                Some(buffer) => {
+                    *out_buf = buffer.as_ptr();
+                    buffer.len().try_into().unwrap()
+                }
+                None => {
+                    // return the input buffer with 0-length to let AFL skip this mutation attempt
+                    *out_buf = buf;
+                    0
+                }
+            }
+        }) {
+            Ok(ret) => ret,
+            Err(err) => panic_handler("afl_custom_fuzz", err),
+        }
+    }
+
+    /// Internal function used in the macro
+    pub unsafe fn afl_custom_fuzz_count_<M: RawCustomMutator>(
+        data: *mut c_void,
+        buf: *const u8,
+        buf_size: usize,
+    ) -> u32 {
+        match catch_unwind(|| {
+            let mut context = FFIContext::<M>::from(data);
+            if buf.is_null() {
+                panic!("null buf passed to afl_custom_fuzz")
+            }
+            let buf_slice = slice::from_raw_parts(buf, buf_size);
+            // see https://doc.rust-lang.org/nomicon/borrow-splitting.html
+            let ctx = &mut **context;
+            let mutator = &mut ctx.mutator;
+            mutator.fuzz_count(buf_slice)
+        }) {
+            Ok(ret) => ret,
+            Err(err) => panic_handler("afl_custom_fuzz_count", err),
+        }
+    }
+
+    /// Internal function used in the macro
+    pub fn afl_custom_queue_new_entry_<M: RawCustomMutator>(
+        data: *mut c_void,
+        filename_new_queue: *const c_char,
+        filename_orig_queue: *const c_char,
+    ) {
+        match catch_unwind(|| {
+            let mut context = FFIContext::<M>::from(data);
+            if filename_new_queue.is_null() {
+                panic!("received null filename_new_queue in afl_custom_queue_new_entry");
+            }
+            let filename_new_queue = Path::new(OsStr::from_bytes(
+                unsafe { CStr::from_ptr(filename_new_queue) }.to_bytes(),
+            ));
+            let filename_orig_queue = if !filename_orig_queue.is_null() {
+                Some(Path::new(OsStr::from_bytes(
+                    unsafe { CStr::from_ptr(filename_orig_queue) }.to_bytes(),
+                )))
+            } else {
+                None
+            };
+            context
+                .mutator
+                .queue_new_entry(filename_new_queue, filename_orig_queue);
+        }) {
+            Ok(ret) => ret,
+            Err(err) => panic_handler("afl_custom_queue_new_entry", err),
+        }
+    }
+
+    /// Internal function used in the macro
+    pub unsafe fn afl_custom_deinit_<M: RawCustomMutator>(data: *mut c_void) {
+        match catch_unwind(|| {
+            // drop the context
+            ManuallyDrop::into_inner(FFIContext::<M>::from(data));
+        }) {
+            Ok(ret) => ret,
+            Err(err) => panic_handler("afl_custom_deinit", err),
+        }
+    }
+
+    /// Internal function used in the macro
+    pub fn afl_custom_introspection_<M: RawCustomMutator>(data: *mut c_void) -> *const c_char {
+        match catch_unwind(|| {
+            let context = &mut *FFIContext::<M>::from(data);
+            if let Some(res) = context.mutator.introspection() {
+                let buf = &mut context.introspection_buffer;
+                buf.clear();
+                buf.extend_from_slice(res.as_bytes());
+                buf.push(0);
+                // unwrapping here, as the error case should be extremely rare
+                CStr::from_bytes_with_nul(&buf).unwrap().as_ptr()
+            } else {
+                null()
+            }
+        }) {
+            Ok(ret) => ret,
+            Err(err) => panic_handler("afl_custom_introspection", err),
+        }
+    }
+
+    /// Internal function used in the macro
+    pub fn afl_custom_describe_<M: RawCustomMutator>(
+        data: *mut c_void,
+        max_description_len: usize,
+    ) -> *const c_char {
+        match catch_unwind(|| {
+            let context = &mut *FFIContext::<M>::from(data);
+            if let Some(res) = context.mutator.describe(max_description_len) {
+                let buf = &mut context.description_buffer;
+                buf.clear();
+                buf.extend_from_slice(res.as_bytes());
+                buf.push(0);
+                // unwrapping here, as the error case should be extremely rare
+                CStr::from_bytes_with_nul(&buf).unwrap().as_ptr()
+            } else {
+                null()
+            }
+        }) {
+            Ok(ret) => ret,
+            Err(err) => panic_handler("afl_custom_describe", err),
+        }
+    }
+
+    /// Internal function used in the macro
+    pub fn afl_custom_queue_get_<M: RawCustomMutator>(
+        data: *mut c_void,
+        filename: *const c_char,
+    ) -> u8 {
+        match catch_unwind(|| {
+            let mut context = FFIContext::<M>::from(data);
+            assert!(!filename.is_null());
+
+            context.mutator.queue_get(Path::new(OsStr::from_bytes(
+                unsafe { CStr::from_ptr(filename) }.to_bytes(),
+            ))) as u8
+        }) {
+            Ok(ret) => ret,
+            Err(err) => panic_handler("afl_custom_queue_get", err),
+        }
+    }
+}
+
+/// exports the given Mutator as a custom mutator as the C interface that AFL++ expects.
+/// It is not possible to call this macro multiple times, because it would define the custom mutator symbols multiple times.
+/// # Example
+/// ```
+/// # #[macro_use] extern crate custom_mutator;
+/// # #[cfg(feature = "afl_internals")]
+/// # use custom_mutator::afl_state;
+/// # use custom_mutator::CustomMutator;
+/// struct MyMutator;
+/// impl CustomMutator for MyMutator {
+///     /// ...
+/// #  type Error = ();
+/// #  #[cfg(feature = "afl_internals")]
+/// #  fn init(_afl_state: &afl_state, _seed: u32) -> Result<Self,()> {unimplemented!()}
+/// #  #[cfg(not(feature = "afl_internals"))]
+/// #  fn init(_seed: u32) -> Result<Self, Self::Error> {unimplemented!()}
+/// #  fn fuzz<'b,'s:'b>(&'s mut self, _buffer: &'b mut [u8], _add_buff: Option<&[u8]>, _max_size: usize) -> Result<Option<&'b [u8]>, Self::Error> {unimplemented!()}
+/// }
+/// export_mutator!(MyMutator);
+/// ```
+#[macro_export]
+macro_rules! export_mutator {
+    ($mutator_type:ty) => {
+        #[cfg(feature = "afl_internals")]
+        #[no_mangle]
+        pub extern "C" fn afl_custom_init(
+            afl: ::std::option::Option<&'static $crate::afl_state>,
+            seed: ::std::os::raw::c_uint,
+        ) -> *const ::std::os::raw::c_void {
+            $crate::wrappers::afl_custom_init_::<$mutator_type>(afl, seed as u32)
+        }
+
+        #[cfg(not(feature = "afl_internals"))]
+        #[no_mangle]
+        pub extern "C" fn afl_custom_init(
+            _afl: *const ::std::os::raw::c_void,
+            seed: ::std::os::raw::c_uint,
+        ) -> *const ::std::os::raw::c_void {
+            $crate::wrappers::afl_custom_init_::<$mutator_type>(seed as u32)
+        }
+
+        #[no_mangle]
+        pub extern "C" fn afl_custom_fuzz_count(
+            data: *mut ::std::os::raw::c_void,
+            buf: *const u8,
+            buf_size: usize,
+        ) -> u32 {
+            unsafe {
+                $crate::wrappers::afl_custom_fuzz_count_::<$mutator_type>(data, buf, buf_size)
+            }
+        }
+
+        #[no_mangle]
+        pub extern "C" fn afl_custom_fuzz(
+            data: *mut ::std::os::raw::c_void,
+            buf: *mut u8,
+            buf_size: usize,
+            out_buf: *mut *const u8,
+            add_buf: *mut u8,
+            add_buf_size: usize,
+            max_size: usize,
+        ) -> usize {
+            unsafe {
+                $crate::wrappers::afl_custom_fuzz_::<$mutator_type>(
+                    data,
+                    buf,
+                    buf_size,
+                    out_buf,
+                    add_buf,
+                    add_buf_size,
+                    max_size,
+                )
+            }
+        }
+
+        #[no_mangle]
+        pub extern "C" fn afl_custom_queue_new_entry(
+            data: *mut ::std::os::raw::c_void,
+            filename_new_queue: *const ::std::os::raw::c_char,
+            filename_orig_queue: *const ::std::os::raw::c_char,
+        ) {
+            $crate::wrappers::afl_custom_queue_new_entry_::<$mutator_type>(
+                data,
+                filename_new_queue,
+                filename_orig_queue,
+            )
+        }
+
+        #[no_mangle]
+        pub extern "C" fn afl_custom_queue_get(
+            data: *mut ::std::os::raw::c_void,
+            filename: *const ::std::os::raw::c_char,
+        ) -> u8 {
+            $crate::wrappers::afl_custom_queue_get_::<$mutator_type>(data, filename)
+        }
+
+        #[no_mangle]
+        pub extern "C" fn afl_custom_introspection(
+            data: *mut ::std::os::raw::c_void,
+        ) -> *const ::std::os::raw::c_char {
+            $crate::wrappers::afl_custom_introspection_::<$mutator_type>(data)
+        }
+
+        #[no_mangle]
+        pub extern "C" fn afl_custom_describe(
+            data: *mut ::std::os::raw::c_void,
+            max_description_len: usize,
+        ) -> *const ::std::os::raw::c_char {
+            $crate::wrappers::afl_custom_describe_::<$mutator_type>(data, max_description_len)
+        }
+
+        #[no_mangle]
+        pub extern "C" fn afl_custom_deinit(data: *mut ::std::os::raw::c_void) {
+            unsafe { $crate::wrappers::afl_custom_deinit_::<$mutator_type>(data) }
+        }
+    };
+}
+
+#[cfg(test)]
+/// this sanity test is supposed to just find out whether an empty mutator being exported by the macro compiles
+mod sanity_test {
+    #[cfg(feature = "afl_internals")]
+    use super::afl_state;
+
+    use super::{export_mutator, RawCustomMutator};
+
+    struct ExampleMutator;
+
+    impl RawCustomMutator for ExampleMutator {
+        #[cfg(feature = "afl_internals")]
+        fn init(_afl: &afl_state, _seed: u32) -> Self {
+            unimplemented!()
+        }
+
+        #[cfg(not(feature = "afl_internals"))]
+        fn init(_seed: u32) -> Self {
+            unimplemented!()
+        }
+
+        fn fuzz<'b, 's: 'b>(
+            &'s mut self,
+            _buffer: &'b mut [u8],
+            _add_buff: Option<&[u8]>,
+            _max_size: usize,
+        ) -> Option<&'b [u8]> {
+            unimplemented!()
+        }
+    }
+
+    export_mutator!(ExampleMutator);
+}
+
+#[allow(unused_variables)]
+/// A custom mutator.
+/// [`CustomMutator::handle_error`] will be called in case any method returns an [`Result::Err`].
+pub trait CustomMutator {
+    /// The error type. All methods must return the same error type.
+    type Error: Debug;
+
+    /// The method which handles errors.
+    /// By default, this method will log the error to stderr if the environment variable "`AFL_CUSTOM_MUTATOR_DEBUG`" is set and non-empty.
+    /// After logging the error, execution will continue on a best-effort basis.
+    ///
+    /// This default behaviour can be customized by implementing this method.
+    fn handle_error(err: Self::Error) {
+        if std::env::var("AFL_CUSTOM_MUTATOR_DEBUG")
+            .map(|v| !v.is_empty())
+            .unwrap_or(false)
+        {
+            eprintln!("Error in custom mutator: {:?}", err)
+        }
+    }
+
+    #[cfg(feature = "afl_internals")]
+    fn init(afl: &'static afl_state, seed: u32) -> Result<Self, Self::Error>
+    where
+        Self: Sized;
+
+    #[cfg(not(feature = "afl_internals"))]
+    fn init(seed: u32) -> Result<Self, Self::Error>
+    where
+        Self: Sized;
+
+    fn fuzz_count(&mut self, buffer: &[u8]) -> Result<u32, Self::Error> {
+        Ok(1)
+    }
+
+    fn fuzz<'b, 's: 'b>(
+        &'s mut self,
+        buffer: &'b mut [u8],
+        add_buff: Option<&[u8]>,
+        max_size: usize,
+    ) -> Result<Option<&'b [u8]>, Self::Error>;
+
+    fn queue_new_entry(
+        &mut self,
+        filename_new_queue: &Path,
+        filename_orig_queue: Option<&Path>,
+    ) -> Result<(), Self::Error> {
+        Ok(())
+    }
+
+    fn queue_get(&mut self, filename: &Path) -> Result<bool, Self::Error> {
+        Ok(true)
+    }
+
+    fn describe(&mut self, max_description: usize) -> Result<Option<&str>, Self::Error> {
+        Ok(Some(default_mutator_describe::<Self>(max_description)))
+    }
+
+    fn introspection(&mut self) -> Result<Option<&str>, Self::Error> {
+        Ok(None)
+    }
+}
+
+impl<M> RawCustomMutator for M
+where
+    M: CustomMutator,
+    M::Error: Debug,
+{
+    #[cfg(feature = "afl_internals")]
+    fn init(afl: &'static afl_state, seed: u32) -> Self
+    where
+        Self: Sized,
+    {
+        match Self::init(afl, seed) {
+            Ok(r) => r,
+            Err(e) => {
+                Self::handle_error(e);
+                panic!("Error in afl_custom_init")
+            }
+        }
+    }
+
+    #[cfg(not(feature = "afl_internals"))]
+    fn init(seed: u32) -> Self
+    where
+        Self: Sized,
+    {
+        match Self::init(seed) {
+            Ok(r) => r,
+            Err(e) => {
+                Self::handle_error(e);
+                panic!("Error in afl_custom_init")
+            }
+        }
+    }
+
+    fn fuzz_count(&mut self, buffer: &[u8]) -> u32 {
+        match self.fuzz_count(buffer) {
+            Ok(r) => r,
+            Err(e) => {
+                Self::handle_error(e);
+                0
+            }
+        }
+    }
+
+    fn fuzz<'b, 's: 'b>(
+        &'s mut self,
+        buffer: &'b mut [u8],
+        add_buff: Option<&[u8]>,
+        max_size: usize,
+    ) -> Option<&'b [u8]> {
+        match self.fuzz(buffer, add_buff, max_size) {
+            Ok(r) => r,
+            Err(e) => {
+                Self::handle_error(e);
+                None
+            }
+        }
+    }
+
+    fn queue_new_entry(&mut self, filename_new_queue: &Path, filename_orig_queue: Option<&Path>) {
+        match self.queue_new_entry(filename_new_queue, filename_orig_queue) {
+            Ok(r) => r,
+            Err(e) => {
+                Self::handle_error(e);
+            }
+        }
+    }
+
+    fn queue_get(&mut self, filename: &Path) -> bool {
+        match self.queue_get(filename) {
+            Ok(r) => r,
+            Err(e) => {
+                Self::handle_error(e);
+                false
+            }
+        }
+    }
+
+    fn describe(&mut self, max_description: usize) -> Option<&str> {
+        match self.describe(max_description) {
+            Ok(r) => r,
+            Err(e) => {
+                Self::handle_error(e);
+                None
+            }
+        }
+    }
+
+    fn introspection(&mut self) -> Option<&str> {
+        match self.introspection() {
+            Ok(r) => r,
+            Err(e) => {
+                Self::handle_error(e);
+                None
+            }
+        }
+    }
+}
+
+/// the default value to return from [`CustomMutator::describe`].
+fn default_mutator_describe<T: ?Sized>(max_len: usize) -> &'static str {
+    truncate_str_unicode_safe(std::any::type_name::<T>(), max_len)
+}
+
+#[cfg(all(test, not(feature = "afl_internals")))]
+mod default_mutator_describe {
+    struct MyMutator;
+    use super::CustomMutator;
+    impl CustomMutator for MyMutator {
+        type Error = ();
+
+        fn init(_: u32) -> Result<Self, Self::Error> {
+            Ok(Self)
+        }
+
+        fn fuzz<'b, 's: 'b>(
+            &'s mut self,
+            _: &'b mut [u8],
+            _: Option<&[u8]>,
+            _: usize,
+        ) -> Result<Option<&'b [u8]>, Self::Error> {
+            unimplemented!()
+        }
+    }
+
+    #[test]
+    fn test_default_describe() {
+        assert_eq!(
+            MyMutator::init(0).unwrap().describe(64).unwrap().unwrap(),
+            "custom_mutator::default_mutator_describe::MyMutator"
+        );
+    }
+}
+
+/// little helper function to truncate a `str` to a maximum of bytes while retaining unicode safety
+fn truncate_str_unicode_safe(s: &str, max_len: usize) -> &str {
+    if s.len() <= max_len {
+        s
+    } else {
+        if let Some((last_index, _)) = s
+            .char_indices()
+            .take_while(|(index, _)| *index <= max_len)
+            .last()
+        {
+            &s[..last_index]
+        } else {
+            ""
+        }
+    }
+}
+
+#[cfg(test)]
+mod truncate_test {
+    use super::truncate_str_unicode_safe;
+
+    #[test]
+    fn test_truncate() {
+        for (max_len, input, expected_output) in &[
+            (0usize, "a", ""),
+            (1, "a", "a"),
+            (1, "ä", ""),
+            (2, "ä", "ä"),
+            (3, "äa", "äa"),
+            (4, "äa", "äa"),
+            (1, "👎", ""),
+            (2, "👎", ""),
+            (3, "👎", ""),
+            (4, "👎", "👎"),
+            (1, "abc", "a"),
+            (2, "abc", "ab"),
+        ] {
+            let actual_output = truncate_str_unicode_safe(input, *max_len);
+            assert_eq!(
+                &actual_output, expected_output,
+                "{:#?} truncated to {} bytes should be {:#?}, but is {:#?}",
+                input, max_len, expected_output, actual_output
+            );
+        }
+    }
+}
diff --git a/custom_mutators/rust/example/Cargo.toml b/custom_mutators/rust/example/Cargo.toml
new file mode 100644
index 00000000..070d23b1
--- /dev/null
+++ b/custom_mutators/rust/example/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "example_mutator"
+version = "0.1.0"
+authors = ["Julius Hohnerlein <julihoh@users.noreply.github.com>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+custom_mutator = { path = "../custom_mutator" }
+
+[[example]]
+name = "example_mutator"
+path = "./src/example_mutator.rs"
+crate-type = ["cdylib"]
\ No newline at end of file
diff --git a/custom_mutators/rust/example/src/example_mutator.rs b/custom_mutators/rust/example/src/example_mutator.rs
new file mode 100644
index 00000000..c4711dd1
--- /dev/null
+++ b/custom_mutators/rust/example/src/example_mutator.rs
@@ -0,0 +1,50 @@
+#![cfg(unix)]
+#![allow(unused_variables)]
+
+use custom_mutator::{export_mutator, CustomMutator};
+
+struct ExampleMutator;
+
+impl CustomMutator for ExampleMutator {
+    type Error = ();
+
+    fn init(seed: u32) -> Result<Self, Self::Error> {
+        Ok(Self)
+    }
+
+    fn fuzz<'b, 's: 'b>(
+        &'s mut self,
+        buffer: &'b mut [u8],
+        add_buff: Option<&[u8]>,
+        max_size: usize,
+    ) -> Result<Option<&'b [u8]>, Self::Error> {
+        buffer.reverse();
+        Ok(Some(buffer))
+    }
+}
+
+struct OwnBufferExampleMutator {
+    own_buffer: Vec<u8>,
+}
+
+impl CustomMutator for OwnBufferExampleMutator {
+    type Error = ();
+
+    fn init(seed: u32) -> Result<Self, Self::Error> {
+        Ok(Self {
+            own_buffer: Vec::new(),
+        })
+    }
+
+    fn fuzz<'b, 's: 'b>(
+        &'s mut self,
+        buffer: &'b mut [u8],
+        add_buff: Option<&[u8]>,
+        max_size: usize,
+    ) -> Result<Option<&'b [u8]>, ()> {
+        self.own_buffer.reverse();
+        Ok(Some(self.own_buffer.as_slice()))
+    }
+}
+
+export_mutator!(ExampleMutator);
diff --git a/custom_mutators/rust/example_lain/Cargo.toml b/custom_mutators/rust/example_lain/Cargo.toml
new file mode 100644
index 00000000..29d606a4
--- /dev/null
+++ b/custom_mutators/rust/example_lain/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "example_lain"
+version = "0.1.0"
+authors = ["Julius Hohnerlein <julihoh@users.noreply.github.com>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+custom_mutator = { path = "../custom_mutator" }
+lain="0.5"
+
+[[example]]
+name = "example_lain"
+path = "./src/lain_mutator.rs"
+crate-type = ["cdylib"]
\ No newline at end of file
diff --git a/custom_mutators/rust/example_lain/rust-toolchain b/custom_mutators/rust/example_lain/rust-toolchain
new file mode 100644
index 00000000..07ade694
--- /dev/null
+++ b/custom_mutators/rust/example_lain/rust-toolchain
@@ -0,0 +1 @@
+nightly
\ No newline at end of file
diff --git a/custom_mutators/rust/example_lain/src/lain_mutator.rs b/custom_mutators/rust/example_lain/src/lain_mutator.rs
new file mode 100644
index 00000000..7099aeae
--- /dev/null
+++ b/custom_mutators/rust/example_lain/src/lain_mutator.rs
@@ -0,0 +1,61 @@
+#![cfg(unix)]
+
+use custom_mutator::{export_mutator, CustomMutator};
+use lain::{
+    mutator::Mutator,
+    prelude::*,
+    rand::{rngs::StdRng, SeedableRng},
+};
+
+#[derive(Debug, Mutatable, NewFuzzed, BinarySerialize)]
+struct MyStruct {
+    field_1: u8,
+
+    #[lain(bits = 3)]
+    field_2: u8,
+
+    #[lain(bits = 5)]
+    field_3: u8,
+
+    #[lain(min = 5, max = 10000)]
+    field_4: u32,
+
+    #[lain(ignore)]
+    ignored_field: u64,
+}
+
+struct LainMutator {
+    mutator: Mutator<StdRng>,
+    buffer: Vec<u8>,
+}
+
+impl CustomMutator for LainMutator {
+    type Error = ();
+
+    fn init(seed: u32) -> Result<Self, ()> {
+        Ok(Self {
+            mutator: Mutator::new(StdRng::seed_from_u64(seed as u64)),
+            buffer: Vec::new(),
+        })
+    }
+
+    fn fuzz<'b, 's: 'b>(
+        &'s mut self,
+        _buffer: &'b mut [u8],
+        _add_buff: Option<&[u8]>,
+        max_size: usize,
+    ) -> Result<Option<&'b [u8]>, ()> {
+        // we just sample an instance of MyStruct, ignoring the current input
+        let instance = MyStruct::new_fuzzed(&mut self.mutator, None);
+        let size = instance.serialized_size();
+        if size > max_size {
+            return Err(());
+        }
+        self.buffer.clear();
+        self.buffer.reserve(size);
+        instance.binary_serialize::<_, BigEndian>(&mut self.buffer);
+        Ok(Some(self.buffer.as_slice()))
+    }
+}
+
+export_mutator!(LainMutator);
diff --git a/docs/Changelog.md b/docs/Changelog.md
index e9efdf38..6b7ebf15 100644
--- a/docs/Changelog.md
+++ b/docs/Changelog.md
@@ -8,59 +8,130 @@
 Want to stay in the loop on major new features? Join our mailing list by
 sending a mail to <afl-users+subscribe@googlegroups.com>.
 
+### Version ++3.13a (development)
+  - ...
 
-### Version ++3.01a (dev)
+### Version ++3.12c (release)
+  - afl-fuzz:
+    - added AFL_TARGET_ENV variable to pass extra env vars to the target
+      (for things like LD_LIBRARY_PATH)
+    - fix map detection, AFL_MAP_SIZE not needed anymore for most cases
+    - fix counting favorites (just a display thing)
+  - afl-cc:
+    - fix cmplog rtn (rare crash and not being able to gather ptr data)
+    - fix our own PCGUARD implementation to compile with llvm 10.0.1
+    - link runtime not to shared libs
+    - ensure shared libraries are properly built and instrumented
+    - AFL_LLVM_INSTRUMENT_ALLOW/DENY were not implemented for LTO, added
+    - show correct LLVM PCGUARD NATIVE mode when auto switching to it
+      and keep fsanitize-coverage-*list=...
+      Short mnemnonic NATIVE is now also accepted.
+  - qemu_mode (thanks @realmadsci):
+    - move AFL_PRELOAD and AFL_USE_QASAN logic inside afl-qemu-trace
+    - add AFL_QEMU_CUSTOM_BIN
+  - unicorn_mode
+    - accidently removed the subfolder from github, re-added
+  - added DEFAULT_PERMISSION to config.h for all files created, default
+    to 0600
+
+### Version ++3.11c (release)
+  - afl-fuzz:
+    - better auto detection of map size
+    - fix sanitizer settings (bug since 3.10c)
+    - fix an off-by-one overwrite in cmplog
+    - add non-unicode variants from unicode-looking dictionary entries
+    - Rust custom mutator API improvements
+    - Imported crash stats painted yellow on resume (only new ones are red)
+  - afl-cc:
+    - added AFL_NOOPT that will just pass everything to the normal
+      gcc/clang compiler without any changes - to pass weird configure
+      scripts
+    - fixed a crash that can occur with ASAN + CMPLOG together plus
+      better support for unicode (thanks to @stbergmann for reporting!)
+    - fixed a crash in LAF transform for empty strings
+    - handle erroneous setups in which multiple afl-compiler-rt are
+      compiled into the target. This now also supports dlopen()
+      instrumented libs loaded before the forkserver and even after the
+      forkserver is started (then with collisions though)
+    - the compiler rt was added also in object building (-c) which
+      should have been fixed years ago but somewhere got lost :(
+    - Renamed CTX to CALLER, added correct/real CTX implementation to
+      CLASSIC
+  - qemu_mode:
+    - added AFL_QEMU_EXCLUDE_RANGES env by @realmadsci, thanks!
+    - if no new/updated checkout is wanted, build with:
+      NO_CHECKOUT=1 ./build_qemu_support.sh
+    - we no longer perform a "git drop"
+  - afl-cmin: support filenames with spaces
+
+### Version ++3.10c (release)
   - Mac OS ARM64 support
   - Android support fixed and updated by Joey Jiaojg - thanks!
   - New selective instrumentation option with __AFL_COVERAGE_* commands
     to be placed in the source code.
     Check out instrumentation/README.instrument_list.md
   - afl-fuzz
-    - Making AFL_MAP_SIZE obsolete - afl-fuzz now learns on start the
-      target map size
+    - Making AFL_MAP_SIZE (mostly) obsolete - afl-fuzz now learns on
+      start the target map size
     - upgraded cmplog/redqueen: solving for floating point, solving
       transformations (e.g. toupper, tolower, to/from hex, xor,
-      arithmetics, etc.). this is costly hence new command line option
-      -l that sets the intensity (values 1 to 3). recommended is 1 or 2.
-    - added `AFL_CMPLOG_ONLY_NEW` to not use cmplog on initial testcases from
-      `-i` or resumes (as these have most likely already been done)
+      arithmetics, etc.). This is costly hence new command line option
+      `-l` that sets the intensity (values 1 to 3). Recommended is 2.
+    - added `AFL_CMPLOG_ONLY_NEW` to not use cmplog on initial seeds
+      from `-i` or resumes (these have most likely already been done)
     - fix crash for very, very fast targets+systems (thanks to mhlakhani
       for reporting)
-    - if determinstic mode is active (-D, or -M without -d) then we sync
-      after every queue entry as this can take very long time otherwise
+    - on restarts (`-i`)/autoresume (AFL_AUTORESUME) the stats are now
+      reloaded and used, thanks to Vimal Joseph for this patch! 
+    - changed the meaning of '+' of the '-t' option, it now means to
+      auto-calculate the timeout with the value given being the max
+      timeout. The original meaning of skipping timeouts instead of
+      abort is now inherent to the -t option.
+    - if deterministic mode is active (`-D`, or `-M` without `-d`) then
+      we sync after every queue entry as this can take very long time
+      otherwise
+    - added minimum SYNC_TIME to include/config.h (30 minutes default)
     - better detection if a target needs a large shared map
-    - fix for -Z
+    - fix for `-Z`
+    - fixed a few crashes
     - switched to an even faster RNG
     - added hghwng's patch for faster trace map analysis
+    - printing suggestions for mistyped `AFL_` env variables
+    - added Rust bindings for custom mutators (thanks @julihoh)
   - afl-cc
     - allow instrumenting LLVMFuzzerTestOneInput
     - fixed endless loop for allow/blocklist lines starting with a
       comment (thanks to Zherya for reporting)
     - cmplog/redqueen now also tracks floating point, _ExtInt() + 128bit
     - cmplog/redqueen can now process basic libc++ and libstdc++
-      std::string comparisons (though no position or length type variants)
-    - added support for __afl_coverage_interesting() for LTO and
-      and our own PCGUARD (llvm 10.0.1+), read more about this function
-      and selective coverage in instrumentation/README.instrument_list.md
+      std::string comparisons (no position or length type variants)
+    - added support for __afl_coverage_interesting() for LTO and our
+      own PCGUARD (llvm 10.0.1+), read more about this function and
+      selective coverage in instrumentation/README.instrument_list.md
     - added AFL_LLVM_INSTRUMENT option NATIVE for native clang pc-guard
       support (less performant than our own), GCC for old afl-gcc and
       CLANG for old afl-clang
+    - fixed a potential crash in the LAF feature
+    - workaround for llvm bitcast lto bug
+    - workaround for llvm 13
   - qemuafl
-    - ported QASan to qemuafl! see qemu_mode/libqasan/README.md
+    - QASan (address sanitizer for Qemu) ported to qemuafl!
+      See qemu_mode/libqasan/README.md
     - solved some persistent mode bugs (thanks Dil4rd)
     - solved an issue when dumping the memory maps (thanks wizche)
     - Android support for QASan
   - unicornafl
-    - Substential speed gains in python bindings for certain use cases
+    - Substantial speed gains in python bindings for certain use cases
     - Improved rust bindings
-    - Added a new example harness to compare python, c, and rust bindings
+    - Added a new example harness to compare python, c and rust bindings
+  - afl-cmin and afl-showmap now support the -f option
+  - afl_plot now also generates a graph on the discovered edges
   - changed default: no memory limit for afl-cmin and afl-cmin.bash
-  - warn on any _AFL and __AFL env vars
-  - LLVM mode is now compiled with -j4, unicorn with all cores. qemu was
-    already building with all cores, the gcc plugin needs only one.
+  - warn on any _AFL and __AFL env vars.
+  - set AFL_IGNORE_UNKNOWN_ENVS to not warn on unknown AFL_... env vars
   - added dummy Makefile to instrumentation/
   - Updated utils/afl_frida to be 5% faster, 7% on x86_x64
-  - Added AFL_KILL_SIGNAL env variable (thanks @v-p-b)
+  - Added `AFL_KILL_SIGNAL` env variable (thanks @v-p-b)
   - @Edznux added a nice documentation on how to use rpc.statsd with
     afl++ in docs/rpc_statsd.md, thanks!
 
diff --git a/docs/custom_mutators.md b/docs/custom_mutators.md
index a2c544e3..61d711e4 100644
--- a/docs/custom_mutators.md
+++ b/docs/custom_mutators.md
@@ -4,6 +4,11 @@ This file describes how you can implement custom mutations to be used in AFL.
 For now, we support C/C++ library and Python module, collectivelly named as the
 custom mutator.
 
+There is also experimental support for Rust in `custom_mutators/rust`.
+Please refer to that directory for documentation.
+Run ```cargo doc -p custom_mutator --open``` in that directory to view the
+documentation in your web browser.
+
 Implemented by
 - C/C++ library (`*.so`): Khaled Yakdan from Code Intelligence (<yakdan@code-intelligence.de>)
 - Python module: Christian Holler from Mozilla (<choller@mozilla.com>)
diff --git a/docs/docs.md b/docs/docs.md
new file mode 100644
index 00000000..ed6ec85e
--- /dev/null
+++ b/docs/docs.md
@@ -0,0 +1,122 @@
+# Restructure afl++'s documentation
+
+## About us
+
+We are dedicated to everything around fuzzing, our main and most well known
+contribution is the fuzzer `afl++` which is part of all major Unix
+distributions (e.g. Debian, Arch, FreeBSD, etc.) and is deployed on Google's
+oss-fuzz and clusterfuzz. It is rated the top fuzzer on Google's fuzzbench.
+
+We are four individuals from Europe supported by a large community.
+
+All our tools are open source.
+
+## About the afl++ fuzzer project
+
+afl++ inherited it's documentation from the original Google afl project.
+Since then it has been massively improved - feature and performance wise -
+and although the documenation has likewise been continued it has grown out
+of proportion.
+The documentation is done by non-natives to the English language, plus
+none of us has a writer background.
+
+We see questions on afl++ usage on mailing lists (e.g. afl-users), discord
+channels, web forums and as issues in our repository.
+
+This only increases as afl++ has been on the top of Google's fuzzbench
+statistics (which measures the performance of fuzzers) and is now being
+integrated in Google's oss-fuzz and clusterfuzz - and is in many Unix
+packaging repositories, e.g. Debian, FreeBSD, etc.
+
+afl++ now has 44 (!) documentation files with 13k total lines of content.
+This is way too much.
+
+Hence afl++ needs a complete overhaul of it's documentation, both on a 
+organisation/structural level as well as the content.
+
+Overall the following actions have to be performed:
+  * Create a better structure of documentation so it is easier to find the
+    information that is being looked for, combining and/or splitting up the
+    existing documents as needed.
+  * Rewrite some documentation to remove duplication. Several information is
+    present several times in the documentation. These should be removed to
+    where needed so that we have as little bloat as possible.
+  * The documents have been written and modified by a lot of different people,
+    most of them non-native English speaker. Hence an overall review where
+    parts should be rewritten has to be performed and then the rewrite done.
+  * Create a cheat-sheet for a very short best-setup build and run of afl++
+  * Pictures explain more than 1000 words. We need at least 4 images that
+    explain the workflow with afl++:
+      - the build workflow
+      - the fuzzing workflow
+      - the fuzzing campaign management workflow
+      - the overall workflow that is an overview of the above
+      - maybe more? where the technical writes seems it necessary for
+        understanding.
+
+Requirements:
+  * Documentation has to be in Markdown format
+  * Images have to be either in SVG or PNG format.
+  * All documentation should be (moved) in(to) docs/
+
+The project does not require writing new documentation or tutorials beside the
+cheat sheet. The technical information for the cheat sheet will be provided by
+us.
+
+## Metrics
+
+afl++ is a the highest performant fuzzer publicly available - but is also the
+most feature rich and complex. With the publicity of afl++' success and
+deployment in Google projects internally and externally and availability as
+a package on most Linux distributions we see more and more issues being
+created and help requests on our Discord channel that would not be
+necessary if people would have read through all our documentation - which
+is unrealistic.
+
+We expect the the new documenation after this project to be cleaner, easier
+accessible and lighter to digest by our users, resulting in much less
+help requests. On the other hand the amount of users using afl++ should
+increase as well as it will be more accessible which would also increase
+questions again - but overall resulting in a reduction of help requests.
+
+In numbers: we currently have per week on average 5 issues on Github,
+10 questions on discord and 1 on mailing lists that would not be necessary
+with perfect documentation and perfect people.
+
+We would consider this project a success if afterwards we only have
+2 issues on Github and 3 questions on discord anymore that would be answered
+by reading the documentation. The mailing list is usually used by the most
+novice users and we don't expect any less questions there.
+
+## Project Budget
+
+We have zero experience with technical writers, so this is very hard for us
+to calculate. We expect it to be a lot of work though because of the amount
+of documentation we have that needs to be restructured and partially rewritten
+(44 documents with 13k total lines of content).
+
+We assume the daily rate of a very good and experienced technical writer in
+times of a pandemic to be ~500$ (according to web research), and calculate
+the overall amout of work to be around 20 days for everything incl. the
+graphics (but again - this is basically just guessing).
+
+Technical Writer                                              10000$
+Volunteer stipends                                                0$ (waved)
+T-Shirts for the top 10 contributors and helpers to this documentation project:
+	10 afl++ logo t-shirts 		20$ each		200$
+	10 shipping cost of t-shirts    10$ each		100$
+
+Total: 10.300$
+(in the submission form 10.280$ was entered)
+
+## Additional Information
+
+We have participated in Google Summer of Code in 2020 and hope to be selected
+again in 2021.
+
+We have no experience with a technical writer, but we will support that person
+with video calls, chats, emails and messaging, provide all necessary information
+and write technical contents that is required for the success of this project.
+It is clear to us that a technical writer knows how to write, but cannot know
+the technical details in a complex tooling like in afl++. This guidance, input,
+etc. has to come from us.
diff --git a/docs/env_variables.md b/docs/env_variables.md
index 41733f1b..de6b4bd8 100644
--- a/docs/env_variables.md
+++ b/docs/env_variables.md
@@ -5,6 +5,10 @@
   users or for some types of custom fuzzing setups. See [README.md](README.md) for the general
   instruction manual.
 
+  Note that most tools will warn on any unknown AFL environment variables.
+  This is for warning on typos that can happen. If you want to disable this
+  check then set the `AFL_IGNORE_UNKNOWN_ENVS` environment variable.
+
 ## 1) Settings for all compilers
 
 Starting with afl++ 3.0 there is only one compiler: afl-cc
@@ -18,11 +22,21 @@ To select the different instrumentation modes this can be done by
 `MODE` can be one of `LTO` (afl-clang-lto*), `LLVM` (afl-clang-fast*), `GCC_PLUGIN`
 (afl-g*-fast) or `GCC` (afl-gcc/afl-g++).
 
-
 Because (with the exception of the --afl-MODE command line option) the
 compile-time tools do not accept afl specific command-line options, they
 make fairly broad use of environmental variables instead:
 
+  - Some build/configure scripts break with afl++ compilers. To be able to
+    pass them, do:
+```
+       export CC=afl-cc
+       export CXX=afl-c++
+       export AFL_NOOPT=1
+       ./configure --disable-shared --disabler-werror
+       unset AFL_NOOPT
+       make
+```
+
   - Most afl tools do not print any output if stdout/stderr are redirected.
     If you want to get the output into a file then set the `AFL_DEBUG`
     environment variable.
@@ -379,6 +393,10 @@ checks or alter some of the more exotic semantics of the tool:
 
   - In QEMU mode (-Q), `AFL_PATH` will be searched for afl-qemu-trace.
 
+  - In QEMU mode (-Q), setting `AFL_QEMU_CUSTOM_BIN` cause afl-fuzz to skip
+    prepending `afl-qemu-trace` to your command line. Use this if you wish to use a
+    custom afl-qemu-trace or if you need to modify the afl-qemu-trace arguments.
+
   - Setting `AFL_CYCLE_SCHEDULES` will switch to a different schedule everytime
     a cycle is finished.
 
@@ -390,6 +408,12 @@ checks or alter some of the more exotic semantics of the tool:
     without disrupting the afl-fuzz process itself. This is useful, among other
     things, for bootstrapping libdislocator.so.
 
+  - Setting `AFL_TARGET_ENV` causes AFL++ to set extra environment variables
+    for the target binary. Example: `AFL_TARGET_ENV="VAR1=1 VAR2='a b c'" afl-fuzz ... `
+    This exists mostly for things like `LD_LIBRARY_PATH` but it would theoretically
+    allow fuzzing of AFL++ itself (with 'target' AFL++ using some AFL_ vars that
+    would disrupt work of 'fuzzer' AFL++).
+
   - Setting `AFL_NO_UI` inhibits the UI altogether, and just periodically prints
     some basic stats. This behavior is also automatically triggered when the
     output from afl-fuzz is redirected to a file or to a pipe.
@@ -410,7 +434,8 @@ checks or alter some of the more exotic semantics of the tool:
     and RECORD:000000,cnt:000009 being the crash case.
 
   - If you are Jakub, you may need `AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES`.
-    Others need not apply.
+    Others need not apply, unless they also want to disable the
+    `/proc/sys/kernel/core_pattern` check.
 
   - Benchmarking only: `AFL_BENCH_JUST_ONE` causes the fuzzer to exit after
     processing the first queue entry; and `AFL_BENCH_UNTIL_CRASH` causes it to
@@ -457,6 +482,7 @@ checks or alter some of the more exotic semantics of the tool:
     `banner` corresponds to the name of the fuzzer provided through `-M/-S`.
     `afl_version` corresponds to the currently running afl version (e.g `++3.0c`).
     Default (empty/non present) will add no tags to the metrics.
+    See [rpc_statsd.md](rpc_statsd.md) for more information.
 
   - Setting `AFL_CRASH_EXITCODE` sets the exit code afl treats as crash.
     For example, if `AFL_CRASH_EXITCODE='-1'` is set, each input resulting
@@ -523,6 +549,12 @@ The QEMU wrapper used to instrument binary-only code supports several settings:
     stack pointer in which QEMU can find the return address when `start addr` is
     hit.
 
+  - With `AFL_USE_QASAN` you can enable QEMU AddressSanitizer for dynamically
+    linked binaries.
+
+  - With `AFL_QEMU_FORCE_DFL` you force QEMU to ignore the registered signal
+    handlers of the target.
+
 ## 6) Settings for afl-cmin
 
 The corpus minimization script offers very little customization:
diff --git a/docs/ideas.md b/docs/ideas.md
index 7cbe60a5..0130cf61 100644
--- a/docs/ideas.md
+++ b/docs/ideas.md
@@ -3,6 +3,42 @@
 In the following, we describe a variety of ideas that could be implemented
 for future AFL++ versions.
 
+# GSoC 2021
+
+All GSoC 2021 projects will be in the Rust development language!
+
+## UI for libaflrs
+
+Write a user interface to libaflrs, the upcoming backend of afl++.
+This might look like the afl-fuzz UI, but you can improve on it - and should!
+
+## Schedulers for libaflrs
+
+Schedulers is a mechanism that selects items from the fuzzing corpus based
+on strategy and randomness. One scheduler might focus on long paths,
+another on rarity of edges disocvered, still another on a combination on
+things. Some of the schedulers in afl++ have to be ported, but you are free
+to come up with your own if you want to - and see how it performs.
+
+## Forkserver support for libaflrs
+
+The current libaflrs implementation fuzzes in-memory, however obviously we
+want to support afl instrumented binaries as well.
+Hence a forkserver support needs to be implemented - forking off the target
+and talking to the target via a socketpair and the communication protocol
+within.
+
+## More Observers for libaflrs
+
+An observer is measuring functionality that looks at the target being fuzzed
+and documents something about it. In traditional fuzzing this is the coverage
+in the target, however we want to add various more observers, e.g. stack depth,
+heap usage, etc. - this is a topic for an experienced Rust developer.
+
+# Generic ideas and wishlist - NOT PART OF GSoC 2021 !
+
+The below list is not part of GSoC 2021.
+
 ## Analysis software
 
 Currently analysis is done by using afl-plot, which is rather outdated.
diff --git a/docs/rpc_statsd.md b/docs/rpc_statsd.md
index 02f72be6..fb97aa09 100644
--- a/docs/rpc_statsd.md
+++ b/docs/rpc_statsd.md
@@ -1,6 +1,6 @@
 # Remote monitoring with StatsD
 
-StatsD allows you to receive and aggregate metrics from a wide range of application and retransmit them to the backend of your choice.
+StatsD allows you to receive and aggregate metrics from a wide range of applications and retransmit them to the backend of your choice.
 This enables you to create nice and readable dashboards containing all the information you need on your fuzzer instances.
 No need to write your own statistics parsing system, deploy and maintain it to all your instances, sync with your graph rendering system...
 
@@ -45,7 +45,7 @@ For more information on these env vars, check out `docs/env_variables.md`.
 
 The simplest way of using this feature is to use any metric provider and change the host/port of your StatsD daemon,
 with `AFL_STATSD_HOST` and `AFL_STATSD_PORT`, if required (defaults are `localhost` and port `8125`).
-To get started, here are some instruction with free and open source tools.
+To get started, here are some instructions with free and open source tools.
 The following setup is based on Prometheus, statsd_exporter and Grafana.
 Grafana here is not mandatory, but gives you some nice graphs and features.
 
@@ -131,7 +131,7 @@ mappings:
 
 Run `docker-compose up -d`.
 
-Everything should be now setup, you are now able to run your fuzzers with
+Everything should now be setup, you are now able to run your fuzzers with
 
 ```
 AFL_STATSD_TAGS_FLAVOR=dogstatsd AFL_STATSD=1 afl-fuzz -M test-fuzzer-1 -i i -o o ./bin/my-application @@
@@ -139,5 +139,5 @@ AFL_STATSD_TAGS_FLAVOR=dogstatsd AFL_STATSD=1 afl-fuzz -S test-fuzzer-2 -i i -o
 ...
 ```
 
-This setup may be modified before use in production environment. Depending on your needs: addind passwords, creating volumes for storage,
+This setup may be modified before use in a production environment. Depending on your needs: adding passwords, creating volumes for storage,
 tweaking the metrics gathering to get host metrics (CPU, RAM ...).
diff --git a/dynamic_list.txt b/dynamic_list.txt
index 3c0b054f..f0e54d92 100644
--- a/dynamic_list.txt
+++ b/dynamic_list.txt
@@ -5,6 +5,43 @@
   "__afl_auto_init";
   "__afl_area_initial";
   "__afl_prev_loc";
+  "__afl_prev_caller";
+  "__afl_prev_ctx";
+  "__afl_final_loc";
+  "__afl_map_addr";
+  "__afl_dictionary";
+  "__afl_dictionary_len";
+  "__afl_selective_coverage";
+  "__afl_selective_coverage_start_off";
+  "__afl_selective_coverage_temp";
+  "__afl_coverage_discard";
+  "__afl_coverage_skip";
+  "__afl_coverage_on";
+  "__afl_coverage_off";
+  "__afl_coverage_interesting";
+  "__afl_fuzz_len";
+  "__afl_fuzz_ptr";
   "__sanitizer_cov_trace_pc_guard";
   "__sanitizer_cov_trace_pc_guard_init";
+  "__cmplog_ins_hook1";
+  "__cmplog_ins_hook2";
+  "__cmplog_ins_hook4";
+  "__cmplog_ins_hookN";
+  "__cmplog_ins_hook16";
+  "__sanitizer_cov_trace_cmp1";
+  "__sanitizer_cov_trace_const_cmp1";
+  "__sanitizer_cov_trace_cmp2";
+  "__sanitizer_cov_trace_const_cmp2";
+  "__sanitizer_cov_trace_cmp4";
+  "__sanitizer_cov_trace_const_cmp4";
+  "__sanitizer_cov_trace_cmp8";
+  "__sanitizer_cov_trace_const_cmp8";
+  "__sanitizer_cov_trace_cmp16";
+  "__sanitizer_cov_trace_const_cmp16";
+  "__sanitizer_cov_trace_switch";
+  "__cmplog_rtn_hook";
+  "__cmplog_rtn_gcc_stdstring_cstring";
+  "__cmplog_rtn_gcc_stdstring_stdstring";
+  "__cmplog_rtn_llvm_stdstring_cstring";
+  "__cmplog_rtn_llvm_stdstring_stdstring";
 };
diff --git a/include/afl-fuzz.h b/include/afl-fuzz.h
index b1fba884..691ba148 100644
--- a/include/afl-fuzz.h
+++ b/include/afl-fuzz.h
@@ -390,7 +390,7 @@ typedef struct afl_env_vars {
       *afl_hang_tmout, *afl_forksrv_init_tmout, *afl_skip_crashes, *afl_preload,
       *afl_max_det_extras, *afl_statsd_host, *afl_statsd_port,
       *afl_crash_exitcode, *afl_statsd_tags_flavor, *afl_testcache_size,
-      *afl_testcache_entries, *afl_kill_signal, *afl_persistent_record;
+      *afl_testcache_entries, *afl_kill_signal, *afl_target_env, *afl_persistent_record;
 
 } afl_env_vars_t;
 
@@ -425,7 +425,8 @@ typedef struct afl_state {
     really makes no sense to haul them around as function parameters. */
   u64 orig_hit_cnt_puppet, last_limit_time_start, tmp_pilot_time,
       total_pacemaker_time, total_puppet_find, temp_puppet_find, most_time_key,
-      most_time, most_execs_key, most_execs, old_hit_count, force_ui_update;
+      most_time, most_execs_key, most_execs, old_hit_count, force_ui_update,
+      prev_run_time;
 
   MOpt_globals_t mopt_globals_core, mopt_globals_pilot;
 
@@ -569,6 +570,7 @@ typedef struct afl_state {
       blocks_eff_total,                 /* Blocks subject to effector maps  */
       blocks_eff_select,                /* Blocks selected as fuzzable      */
       start_time,                       /* Unix start time (ms)             */
+      last_sync_time,                   /* Time of last sync                */
       last_path_time,                   /* Time for most recent path (ms)   */
       last_crash_time,                  /* Time for most recent crash (ms)  */
       last_hang_time;                   /* Time for most recent hang (ms)   */
@@ -648,6 +650,7 @@ typedef struct afl_state {
   u32 cmplog_max_filesize;
   u32 cmplog_lvl;
   u32 colorize_success;
+  u8  cmplog_enable_arith, cmplog_enable_transform;
 
   struct afl_pass_stat *pass_stats;
   struct cmp_map *      orig_cmp_map;
@@ -1059,6 +1062,7 @@ u8 has_new_bits_unclassified(afl_state_t *, u8 *);
 void load_extras_file(afl_state_t *, u8 *, u32 *, u32 *, u32);
 void load_extras(afl_state_t *, u8 *);
 void dedup_extras(afl_state_t *);
+void deunicode_extras(afl_state_t *);
 void add_extra(afl_state_t *afl, u8 *mem, u32 len);
 void maybe_add_auto(afl_state_t *, u8 *, u32);
 void save_auto(afl_state_t *);
@@ -1067,9 +1071,10 @@ void destroy_extras(afl_state_t *);
 
 /* Stats */
 
+void load_stats_file(afl_state_t *);
 void write_setup_file(afl_state_t *, u32, char **);
-void write_stats_file(afl_state_t *, double, double, double);
-void maybe_update_plot_file(afl_state_t *, double, double);
+void write_stats_file(afl_state_t *, u32, double, double, double);
+void maybe_update_plot_file(afl_state_t *, u32, double, double);
 void show_stats(afl_state_t *);
 void show_init_stats(afl_state_t *);
 
diff --git a/include/common.h b/include/common.h
index bb8831f2..7bba9e91 100644
--- a/include/common.h
+++ b/include/common.h
@@ -39,6 +39,7 @@
 #define STRINGIFY_VAL_SIZE_MAX (16)
 
 void detect_file_args(char **argv, u8 *prog_in, bool *use_stdin);
+void print_suggested_envs(char *mispelled_env);
 void check_environment_vars(char **env);
 
 char **argv_cpy_dup(int argc, char **argv);
@@ -47,7 +48,10 @@ void   argv_cpy_free(char **argv);
 char **get_qemu_argv(u8 *own_loc, u8 **target_path_p, int argc, char **argv);
 char **get_wine_argv(u8 *own_loc, u8 **target_path_p, int argc, char **argv);
 char * get_afl_env(char *env);
-u8 *   get_libqasan_path(u8 *own_loc);
+
+/* Extract env vars from input string and set them using setenv()
+   For use with AFL_TARGET_ENV, ... */
+bool extract_and_set_env(u8 *env_str);
 
 extern u8  be_quiet;
 extern u8 *doc_path;                    /* path to documentation dir        */
@@ -57,6 +61,10 @@ extern u8 *doc_path;                    /* path to documentation dir        */
 
 u8 *find_binary(u8 *fname);
 
+/* find an afl binary */
+
+u8 *find_afl_binary(u8 *own_loc, u8 *fname);
+
 /* Parses the kill signal environment variable, FATALs on error.
   If the env is not set, sets the env to default_signal for the signal handlers
   and returns the default_signal. */
diff --git a/include/config.h b/include/config.h
index f6dbfae0..ab4c49f2 100644
--- a/include/config.h
+++ b/include/config.h
@@ -10,7 +10,7 @@
                      Dominik Maier <mail@dmnk.co>
 
    Copyright 2016, 2017 Google Inc. All rights reserved.
-   Copyright 2019-2020 AFLplusplus Project. All rights reserved.
+   Copyright 2019-2021 AFLplusplus Project. All rights reserved.
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
@@ -25,8 +25,8 @@
 
 /* Version string: */
 
-// c = release, d = volatile github dev, e = experimental branch
-#define VERSION "++3.01a"
+// c = release, a = volatile github dev, e = experimental branch
+#define VERSION "++3.13a"
 
 /******************************************************
  *                                                    *
@@ -34,28 +34,41 @@
  *                                                    *
  ******************************************************/
 
+/* Default shared memory map size. Most targets just need a coverage map
+   between 20-250kb. Plus there is an auto-detection feature in afl-fuzz.
+   However if a target has problematic constructors and init arrays then
+   this can fail. Hence afl-fuzz deploys a larger default map. The largest
+   map seen so far is the xlsx fuzzer for libreoffice which is 5MB.
+   At runtime this value can be overriden via AFL_MAP_SIZE.
+   Default: 8MB (defined in bytes) */
+#define DEFAULT_SHMEM_SIZE (8 * 1024 * 1024)
+
+/* Default file permission umode when creating files (default: 0600) */
+#define DEFAULT_PERMISSION 0600
+
 /* CMPLOG/REDQUEEN TUNING
  *
- * Here you can tuning and solving options for cmplog.
+ * Here you can modify tuning and solving options for CMPLOG.
  * Note that these are run-time options for afl-fuzz, no target
  * recompilation required.
  *
  */
 
-/* Enable transform following (XOR/ADD/SUB manipulations, hex en/decoding) */
-// #define CMPLOG_TRANSFORM
+/* if TRANSFORM is enabled with '-l T', this additionally enables base64
+   encoding/decoding */
+// #define CMPLOG_SOLVE_TRANSFORM_BASE64
 
-/* if TRANSFORM is enabled, this additionally enables base64 en/decoding */
-// #define CMPLOG_TRANSFORM_BASE64
+/* If a redqueen pass finds more than one solution, try to combine them? */
+#define CMPLOG_COMBINE
 
-/* Minimum % of the corpus to perform cmplog on. Default: 20% */
-#define CMPLOG_CORPUS_PERCENT 20U
+/* Minimum % of the corpus to perform cmplog on. Default: 10% */
+#define CMPLOG_CORPUS_PERCENT 5U
 
-/* Number of potential posititions from which we decide the cmplog becomes
-   useless, default 16384 */
-#define CMPLOG_POSITIONS_MAX 16384U
+/* Number of potential positions from which we decide if cmplog becomes
+   useless, default 8096 */
+#define CMPLOG_POSITIONS_MAX (12 * 1024)
 
-/* Maximum allowed fails per CMP value. Default: 32 * 3 */
+/* Maximum allowed fails per CMP value. Default: 128 */
 #define CMPLOG_FAIL_MAX 96
 
 /* Now non-cmplog configuration options */
@@ -288,6 +301,11 @@
 
 #define SYNC_INTERVAL 8
 
+/* Sync time (minimum time between syncing in ms, time is halfed for -M main
+   nodes) - default is 30 minutes: */
+
+#define SYNC_TIME (30 * 60 * 1000)
+
 /* Output directory reuse grace period (minutes): */
 
 #define OUTPUT_GRACE 25
diff --git a/include/coverage-32.h b/include/coverage-32.h
index a5cc498c..ca36c29f 100644
--- a/include/coverage-32.h
+++ b/include/coverage-32.h
@@ -97,7 +97,7 @@ inline void discover_word(u8 *ret, u32 *current, u32 *virgin) {
 #define PACK_SIZE 16
 inline u32 skim(const u32 *virgin, const u32 *current, const u32 *current_end) {
 
-  for (; current != current_end; virgin += 4, current += 4) {
+  for (; current < current_end; virgin += 4, current += 4) {
 
     if (current[0] && classify_word(current[0]) & virgin[0]) return 1;
     if (current[1] && classify_word(current[1]) & virgin[1]) return 1;
diff --git a/include/coverage-64.h b/include/coverage-64.h
index 0ede5fa5..54fe9d33 100644
--- a/include/coverage-64.h
+++ b/include/coverage-64.h
@@ -145,7 +145,7 @@ inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
 
   __m256i zeroes = _mm256_setzero_si256();
 
-  for (; current != current_end; virgin += 4, current += 4) {
+  for (; current < current_end; virgin += 4, current += 4) {
 
     __m256i value = *(__m256i *)current;
     __m256i cmp = _mm256_cmpeq_epi64(value, zeroes);
@@ -172,7 +172,7 @@ inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
   #define PACK_SIZE 32
 inline u32 skim(const u64 *virgin, const u64 *current, const u64 *current_end) {
 
-  for (; current != current_end; virgin += 4, current += 4) {
+  for (; current < current_end; virgin += 4, current += 4) {
 
     if (current[0] && classify_word(current[0]) & virgin[0]) return 1;
     if (current[1] && classify_word(current[1]) & virgin[1]) return 1;
diff --git a/include/envs.h b/include/envs.h
index 6ba69f26..37adeff2 100644
--- a/include/envs.h
+++ b/include/envs.h
@@ -42,6 +42,7 @@ static char *afl_environment_variables[] = {
     "AFL_DEBUG_CHILD",
     "AFL_DEBUG_GDB",
     "AFL_DISABLE_TRIM",
+    "AFL_DISABLE_LLVM_INSTRUMENTATION",
     "AFL_DONT_OPTIMIZE",
     "AFL_DRIVER_STDERR_DUPLICATE_FILENAME",
     "AFL_DUMB_FORKSRV",
@@ -50,6 +51,7 @@ static char *afl_environment_variables[] = {
     "AFL_FAST_CAL",
     "AFL_FORCE_UI",
     "AFL_FUZZER_ARGS",  // oss-fuzz
+    "AFL_GDB",
     "AFL_GCC_ALLOWLIST",
     "AFL_GCC_DENYLIST",
     "AFL_GCC_BLOCKLIST",
@@ -61,6 +63,7 @@ static char *afl_environment_variables[] = {
     "AFL_FORKSRV_INIT_TMOUT",
     "AFL_HARDEN",
     "AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES",
+    "AFL_IGNORE_UNKNOWN_ENVS",
     "AFL_IMPORT_FIRST",
     "AFL_INST_LIBS",
     "AFL_INST_RATIO",
@@ -79,7 +82,9 @@ static char *afl_environment_variables[] = {
     "AFL_LLVM_BLOCKLIST",
     "AFL_LLVM_CMPLOG",
     "AFL_LLVM_INSTRIM",
+    "AFL_LLVM_CALLER",
     "AFL_LLVM_CTX",
+    "AFL_LLVM_CTX_K",
     "AFL_LLVM_DICT2FILE",
     "AFL_LLVM_DOCUMENT_IDS",
     "AFL_LLVM_INSTRIM_LOOPHEAD",
@@ -117,20 +122,25 @@ static char *afl_environment_variables[] = {
     "AFL_NO_PYTHON",
     "AFL_UNTRACER_FILE",
     "AFL_LLVM_USE_TRACE_PC",
-    "AFL_NO_X86",  // not really an env but we dont want to warn on it
     "AFL_MAP_SIZE",
     "AFL_MAPSIZE",
     "AFL_MAX_DET_EXTRAS",
+    "AFL_NO_X86",  // not really an env but we dont want to warn on it
+    "AFL_NOOPT",
+    "AFL_PASSTHROUGH",
     "AFL_PATH",
     "AFL_PERFORMANCE_FILE",
     "AFL_PERSISTENT_RECORD",
     "AFL_PRELOAD",
+    "AFL_TARGET_ENV",
     "AFL_PYTHON_MODULE",
+    "AFL_QEMU_CUSTOM_BIN",
     "AFL_QEMU_COMPCOV",
     "AFL_QEMU_COMPCOV_DEBUG",
     "AFL_QEMU_DEBUG_MAPS",
     "AFL_QEMU_DISABLE_CACHE",
     "AFL_QEMU_DRIVER_NO_HOOK",
+    "AFL_QEMU_FORCE_DFL",
     "AFL_QEMU_PERSISTENT_ADDR",
     "AFL_QEMU_PERSISTENT_CNT",
     "AFL_QEMU_PERSISTENT_GPR",
@@ -140,6 +150,7 @@ static char *afl_environment_variables[] = {
     "AFL_QEMU_PERSISTENT_RETADDR_OFFSET",
     "AFL_QEMU_PERSISTENT_EXITS",
     "AFL_QEMU_INST_RANGES",
+    "AFL_QEMU_EXCLUDE_RANGES",
     "AFL_QEMU_SNAPSHOT",
     "AFL_QUIET",
     "AFL_RANDOM_ALLOC_CANARY",
diff --git a/instrumentation/LLVMInsTrim.so.cc b/instrumentation/LLVMInsTrim.so.cc
index 235ee30f..62de6ec5 100644
--- a/instrumentation/LLVMInsTrim.so.cc
+++ b/instrumentation/LLVMInsTrim.so.cc
@@ -38,7 +38,7 @@ typedef long double max_align_t;
 
 #include "MarkNodes.h"
 #include "afl-llvm-common.h"
-#include "llvm-ngram-coverage.h"
+#include "llvm-alternative-coverage.h"
 
 #include "config.h"
 #include "debug.h"
@@ -135,7 +135,7 @@ struct InsTrim : public ModulePass {
     unsigned int PrevLocSize = 0;
     char *       ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE");
     if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE");
-    char *ctx_str = getenv("AFL_LLVM_CTX");
+    char *caller_str = getenv("AFL_LLVM_CALLER");
 
 #ifdef AFL_HAVE_VECTOR_INTRINSICS
     unsigned int ngram_size = 0;
@@ -197,9 +197,9 @@ struct InsTrim : public ModulePass {
                            GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
     GlobalVariable *AFLPrevLoc;
     GlobalVariable *AFLContext = NULL;
-    LoadInst *      PrevCtx = NULL;  // for CTX sensitive coverage
+    LoadInst *      PrevCaller = NULL;  // for CALLER sensitive coverage
 
-    if (ctx_str)
+    if (caller_str)
 #if defined(__ANDROID__) || defined(__HAIKU__)
       AFLContext = new GlobalVariable(
           M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx");
@@ -398,11 +398,11 @@ struct InsTrim : public ModulePass {
         unsigned int cur_loc;
 
         // Context sensitive coverage
-        if (ctx_str && &BB == &F.getEntryBlock()) {
+        if (caller_str && &BB == &F.getEntryBlock()) {
 
-          PrevCtx = IRB.CreateLoad(AFLContext);
-          PrevCtx->setMetadata(M.getMDKindID("nosanitize"),
-                               MDNode::get(C, None));
+          PrevCaller = IRB.CreateLoad(AFLContext);
+          PrevCaller->setMetadata(M.getMDKindID("nosanitize"),
+                                  MDNode::get(C, None));
 
           // does the function have calls? and is any of the calls larger than
           // one basic block?
@@ -441,7 +441,7 @@ struct InsTrim : public ModulePass {
 
           }
 
-        }  // END of ctx_str
+        }  // END of caller_str
 
         if (MarkSetOpt && MS.find(&BB) == MS.end()) { continue; }
 
@@ -459,7 +459,7 @@ struct InsTrim : public ModulePass {
             BasicBlock *PBB = *PI;
             auto        It = PredMap.insert({PBB, genLabel()});
             unsigned    Label = It.first->second;
-            cur_loc = Label;
+            // cur_loc = Label;
             PN->addIncoming(ConstantInt::get(Int32Ty, Label), PBB);
 
           }
@@ -485,9 +485,9 @@ struct InsTrim : public ModulePass {
 #endif
           PrevLocTrans = IRB.CreateZExt(PrevLoc, IRB.getInt32Ty());
 
-        if (ctx_str)
+        if (caller_str)
           PrevLocTrans =
-              IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCtx), Int32Ty);
+              IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCaller), Int32Ty);
 
         /* Load SHM pointer */
         LoadInst *MapPtr = IRB.CreateLoad(AFLMapPtr);
@@ -535,16 +535,17 @@ struct InsTrim : public ModulePass {
         IRB.CreateStore(Incr, MapPtrIdx)
             ->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
-        if (ctx_str && has_calls) {
+        if (caller_str && has_calls) {
 
-          // in CTX mode we have to restore the original context for the
+          // in CALLER mode we have to restore the original context for the
           // caller - she might be calling other functions which need the
-          // correct CTX
+          // correct CALLER
           Instruction *Inst = BB.getTerminator();
           if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
 
             IRBuilder<> Post_IRB(Inst);
-            StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
+            StoreInst * RestoreCtx =
+                Post_IRB.CreateStore(PrevCaller, AFLContext);
             RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
                                     MDNode::get(C, None));
 
diff --git a/instrumentation/README.cmplog.md b/instrumentation/README.cmplog.md
index 5f855e1f..a796c7a7 100644
--- a/instrumentation/README.cmplog.md
+++ b/instrumentation/README.cmplog.md
@@ -1,10 +1,11 @@
 # CmpLog instrumentation
 
-The CmpLog instrumentation enables the logging of the comparisons operands in a
+The CmpLog instrumentation enables logging of comparison operands in a
 shared memory.
 
 These values can be used by various mutators built on top of it.
-At the moment we support the RedQueen mutator (input-2-state instructions only).
+At the moment we support the RedQueen mutator (input-2-state instructions only), 
+for details see [the RedQueen paper](https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Redqueen.pdf).
 
 ## Build
 
@@ -13,7 +14,7 @@ program.
 
 The first version is built using the regular AFL++ instrumentation.
 
-The second one, the CmpLog binary, with setting AFL_LLVM_CMPLOG during the compilation.
+The second one, the CmpLog binary, is built with setting AFL_LLVM_CMPLOG during the compilation.
 
 For example:
 
@@ -26,11 +27,12 @@ export AFL_LLVM_CMPLOG=1
 ./configure --cc=~/path/to/afl-clang-fast
 make
 cp ./program ./program.cmplog
+unset AFL_LLVM_CMPLOG
 ```
 
 ## Use
 
-AFL++ has the new -c option that needs to be used to specify the CmpLog binary (the second
+AFL++ has the new `-c` option that needs to be used to specify the CmpLog binary (the second
 build).
 
 For example:
diff --git a/instrumentation/README.ctx.md b/instrumentation/README.ctx.md
index caf2c09a..335e9921 100644
--- a/instrumentation/README.ctx.md
+++ b/instrumentation/README.ctx.md
@@ -4,14 +4,19 @@
 
 This is an LLVM-based implementation of the context sensitive branch coverage.
 
-Basically every function gets its own ID and that ID is combined with the
-edges of the called functions.
+Basically every function gets its own ID and, every time when an edge is logged,
+all the IDs in the callstack are hashed and combined with the edge transition
+hash to augment the classic edge coverage with the information about the
+calling context.
 
 So if both function A and function B call a function C, the coverage
 collected in C will be different.
 
 In math the coverage is collected as follows:
-`map[current_location_ID ^ previous_location_ID >> 1 ^ previous_callee_ID] += 1`
+`map[current_location_ID ^ previous_location_ID >> 1 ^ hash_callstack_IDs] += 1`
+
+The callstack hash is produced XOR-ing the function IDs to avoid explosion with
+recursive functions.
 
 ## Usage
 
@@ -20,3 +25,14 @@ Set the `AFL_LLVM_INSTRUMENT=CTX` or `AFL_LLVM_CTX=1` environment variable.
 It is highly recommended to increase the MAP_SIZE_POW2 definition in
 config.h to at least 18 and maybe up to 20 for this as otherwise too
 many map collisions occur.
+
+## Caller Branch Coverage
+
+If the context sensitive coverage introduces too may collisions and becoming
+detrimental, the user can choose to augment edge coverage with just the
+called function ID, instead of the entire callstack hash.
+
+In math the coverage is collected as follows:
+`map[current_location_ID ^ previous_location_ID >> 1 ^ previous_callee_ID] += 1`
+
+Set the `AFL_LLVM_INSTRUMENT=CALLER` or `AFL_LLVM_CALLER=1` environment variable.
diff --git a/instrumentation/README.gcc_plugin.md b/instrumentation/README.gcc_plugin.md
index 12449efd..230ceb73 100644
--- a/instrumentation/README.gcc_plugin.md
+++ b/instrumentation/README.gcc_plugin.md
@@ -3,16 +3,20 @@
 See [../README.md](../README.md) for the general instruction manual.
 See [README.llvm.md](README.llvm.md) for the LLVM-based instrumentation.
 
+This document describes how to build and use `afl-gcc-fast` and `afl-g++-fast`,
+which instrument the target with the help of gcc plugins.
+
 TLDR:
-  * `apt-get install gcc-VERSION-plugin-dev`
-  * `make`
-  * gcc and g++ must point to the gcc-VERSION you you have to set AFL_CC/AFL_CXX
+  * check the version of your gcc compiler: `gcc --version`
+  * `apt-get install gcc-VERSION-plugin-dev` or similar to install headers for gcc plugins
+  * `gcc` and `g++` must match the gcc-VERSION you installed headers for. You can set `AFL_CC`/`AFL_CXX`
     to point to these!
-  * just use afl-gcc-fast/afl-g++-fast normally like you would afl-clang-fast
+  * `make`
+  * just use `afl-gcc-fast`/`afl-g++-fast` normally like you would do with `afl-clang-fast`
 
 ## 1) Introduction
 
-The code in this directory allows you to instrument programs for AFL using
+The code in this directory allows to instrument programs for AFL using
 true compiler-level instrumentation, instead of the more crude
 assembly-level rewriting approach taken by afl-gcc and afl-clang. This has
 several interesting properties:
@@ -27,10 +31,10 @@ several interesting properties:
 
   - The instrumentation is CPU-independent. At least in principle, you should
     be able to rely on it to fuzz programs on non-x86 architectures (after
-    building afl-fuzz with AFL_NOX86=1).
+    building `afl-fuzz` with `AFL_NOX86=1`).
 
   - Because the feature relies on the internals of GCC, it is gcc-specific
-    and will *not* work with LLVM (see ../llvm_mode for an alternative).
+    and will *not* work with LLVM (see [README.llvm.md](README.llvm.md) for an alternative).
 
 Once this implementation is shown to be sufficiently robust and portable, it
 will probably replace afl-gcc. For now, it can be built separately and
@@ -41,29 +45,32 @@ The idea and much of the implementation comes from Laszlo Szekeres.
 ## 2) How to use
 
 In order to leverage this mechanism, you need to have modern enough GCC
-(>= version 4.5.0) and the plugin headers installed on your system. That
+(>= version 4.5.0) and the plugin development headers installed on your system. That
 should be all you need. On Debian machines, these headers can be acquired by
 installing the `gcc-VERSION-plugin-dev` packages.
 
-To build the instrumentation itself, type 'make'. This will generate binaries
-called afl-gcc-fast and afl-g++-fast in the parent directory. 
+To build the instrumentation itself, type `make`. This will generate binaries
+called `afl-gcc-fast` and `afl-g++-fast` in the parent directory. 
 
 The gcc and g++ compiler links have to point to gcc-VERSION - or set these
-by pointing the environment variables AFL_CC/AFL_CXX to them.
-If the CC/CXX have been overridden, those compilers will be used from
-those wrappers without using AFL_CXX/AFL_CC settings.
+by pointing the environment variables `AFL_CC`/`AFL_CXX` to them.
+If the `CC`/`CXX` environment variables have been set, those compilers will be 
+preferred over those from the `AFL_CC`/`AFL_CXX` settings.
 
 Once this is done, you can instrument third-party code in a way similar to the
 standard operating mode of AFL, e.g.:
-
-  CC=/path/to/afl/afl-gcc-fast ./configure [...options...]
+```
+  CC=/path/to/afl/afl-gcc-fast
+  CXX=/path/to/afl/afl-g++-fast
+  export CC CXX
+  ./configure [...options...]
   make
+```
+Note: We also used `CXX` to set the C++ compiler to `afl-g++-fast` for C++ code.
 
-Be sure to also include CXX set to afl-g++-fast for C++ code.
-
-The tool honors roughly the same environmental variables as afl-gcc (see
-[env_variables.md](../docs/env_variables.md). This includes AFL_INST_RATIO,
-AFL_USE_ASAN, AFL_HARDEN, and AFL_DONT_OPTIMIZE.
+The tool honors roughly the same environmental variables as `afl-gcc` (see
+[env_variables.md](../docs/env_variables.md). This includes `AFL_INST_RATIO`,
+`AFL_USE_ASAN`, `AFL_HARDEN`, and `AFL_DONT_OPTIMIZE`.
 
 Note: if you want the GCC plugin to be installed on your system for all
 users, you need to build it before issuing 'make install' in the parent
@@ -72,7 +79,7 @@ directory.
 ## 3) Gotchas, feedback, bugs
 
 This is an early-stage mechanism, so field reports are welcome. You can send bug
-reports to afl@aflplus.plus
+reports to afl@aflplus.plus.
 
 ## 4) Bonus feature #1: deferred initialization
 
@@ -88,7 +95,7 @@ file before getting to the fuzzed data.
 In such cases, it's beneficial to initialize the forkserver a bit later, once
 most of the initialization work is already done, but before the binary attempts
 to read the fuzzed input and parse it; in some cases, this can offer a 10x+
-performance gain. You can implement delayed initialization in LLVM mode in a
+performance gain. You can implement delayed initialization in GCC mode in a
 fairly simple way.
 
 First, locate a suitable location in the code where the delayed cloning can
@@ -117,7 +124,7 @@ With the location selected, add this code in the appropriate spot:
 ```
 
 You don't need the #ifdef guards, but they will make the program still work as
-usual when compiled with a tool other than afl-gcc-fast/afl-clang-fast.
+usual when compiled with a compiler other than afl-gcc-fast/afl-clang-fast.
 
 Finally, recompile the program with afl-gcc-fast (afl-gcc or afl-clang will
 *not* generate a deferred-initialization binary) - and you should be all set!
@@ -127,7 +134,7 @@ Finally, recompile the program with afl-gcc-fast (afl-gcc or afl-clang will
 Some libraries provide APIs that are stateless, or whose state can be reset in
 between processing different input files. When such a reset is performed, a
 single long-lived process can be reused to try out multiple test cases,
-eliminating the need for repeated fork() calls and the associated OS overhead.
+eliminating the need for repeated `fork()` calls and the associated OS overhead.
 
 The basic structure of the program that does this would be:
 
@@ -160,5 +167,9 @@ wary of memory leaks and the state of file descriptors.
 When running in this mode, the execution paths will inherently vary a bit
 depending on whether the input loop is being entered for the first time or
 executed again. To avoid spurious warnings, the feature implies
-AFL_NO_VAR_CHECK and hides the "variable path" warnings in the UI.
+`AFL_NO_VAR_CHECK` and hides the "variable path" warnings in the UI.
+
+## 6) Bonus feature #3: selective instrumentation
 
+It can be more effective to fuzzing to only instrument parts of the code.
+For details see [README.instrument_list.md](README.instrument_list.md).
diff --git a/instrumentation/README.instrument_list.md b/instrumentation/README.instrument_list.md
index b7dfb40c..2116d24c 100644
--- a/instrumentation/README.instrument_list.md
+++ b/instrumentation/README.instrument_list.md
@@ -47,10 +47,10 @@ A special function is `__afl_coverage_interesting`.
 To use this, you must define `void __afl_coverage_interesting(u8 val, u32 id);`.
 Then you can use this function globally, where the `val` parameter can be set
 by you, the `id` parameter is for afl-fuzz and will be overwritten.
-Note that useful parameters are for `val` are: 1, 2, 3, 4, 8, 16, 32, 64, 128.
+Note that useful parameters for `val` are: 1, 2, 3, 4, 8, 16, 32, 64, 128.
 A value of e.g. 33 will be seen as 32 for coverage purposes.
 
-## 3) Selective instrumenation with AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST
+## 3) Selective instrumentation with AFL_LLVM_ALLOWLIST/AFL_LLVM_DENYLIST
 
 This feature is equivalent to llvm 12 sancov feature and allows to specify
 on a filename and/or function name level to instrument these or skip them.
diff --git a/instrumentation/README.lto.md b/instrumentation/README.lto.md
index a2814173..39f6465a 100644
--- a/instrumentation/README.lto.md
+++ b/instrumentation/README.lto.md
@@ -88,16 +88,35 @@ apt-get install -y clang-12 clang-tools-12 libc++1-12 libc++-12-dev \
 ### Building llvm yourself (version 12)
 
 Building llvm from github takes quite some long time and is not painless:
-```
+```sh
 sudo apt install binutils-dev  # this is *essential*!
-git clone https://github.com/llvm/llvm-project
+git clone --depth=1 https://github.com/llvm/llvm-project
 cd llvm-project
 mkdir build
 cd build
-cmake -DLLVM_ENABLE_PROJECTS='clang;clang-tools-extra;compiler-rt;libclc;libcxx;libcxxabi;libunwind;lld' -DCMAKE_BUILD_TYPE=Release -DLLVM_BINUTILS_INCDIR=/usr/include/ ../llvm/
-make -j $(nproc)
-export PATH=`pwd`/bin:$PATH
-export LLVM_CONFIG=`pwd`/bin/llvm-config
+
+# Add -G Ninja if ninja-build installed
+# "Building with ninja significantly improves your build time, especially with
+# incremental builds, and improves your memory usage."
+cmake \
+    -DCLANG_INCLUDE_DOCS="OFF" \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DLLVM_BINUTILS_INCDIR=/usr/include/ \
+    -DLLVM_BUILD_LLVM_DYLIB="ON" \
+    -DLLVM_ENABLE_BINDINGS="OFF" \
+    -DLLVM_ENABLE_PROJECTS='clang;compiler-rt;libcxx;libcxxabi;libunwind;lld' \
+    -DLLVM_ENABLE_WARNINGS="OFF" \
+    -DLLVM_INCLUDE_BENCHMARKS="OFF" \
+    -DLLVM_INCLUDE_DOCS="OFF" \
+    -DLLVM_INCLUDE_EXAMPLES="OFF" \
+    -DLLVM_INCLUDE_TESTS="OFF" \
+    -DLLVM_LINK_LLVM_DYLIB="ON" \
+    -DLLVM_TARGETS_TO_BUILD="host" \
+    ../llvm/
+cmake --build . -j4
+export PATH="$(pwd)/bin:$PATH"
+export LLVM_CONFIG="$(pwd)/bin/llvm-config"
+export LD_LIBRARY_PATH="$(llvm-config --libdir)${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
 cd /path/to/AFLplusplus/
 make
 sudo make install
diff --git a/instrumentation/README.neverzero.md b/instrumentation/README.neverzero.md
index 5c894d6e..49104e00 100644
--- a/instrumentation/README.neverzero.md
+++ b/instrumentation/README.neverzero.md
@@ -16,7 +16,7 @@ at a very little cost (one instruction per edge).
 (The alternative of saturated counters has been tested also and proved to be
 inferior in terms of path discovery.)
 
-This is implemented in afl-gcc, however for llvm_mode this is optional if
+This is implemented in afl-gcc and afl-gcc-fast, however for llvm_mode this is optional if
 the llvm version is below 9 - as there is a perfomance bug that is only fixed
 in version 9 and onwards.
 
diff --git a/instrumentation/README.ngram.md b/instrumentation/README.ngram.md
index de3ba432..da61ef32 100644
--- a/instrumentation/README.ngram.md
+++ b/instrumentation/README.ngram.md
@@ -10,8 +10,8 @@ by Jinghan Wang, et. al.
 Note that the original implementation (available
 [here](https://github.com/bitsecurerlab/afl-sensitive))
 is built on top of AFL's QEMU mode.
-This is essentially a port that uses LLVM vectorized instructions to achieve
-the same results when compiling source code.
+This is essentially a port that uses LLVM vectorized instructions (available from
+llvm versions 4.0.1 and higher) to achieve the same results when compiling source code.
 
 In math the branch coverage is performed as follows:
 `map[current_location ^ prev_location[0] >> 1 ^ prev_location[1] >> 1 ^ ... up to n-1`] += 1`
diff --git a/instrumentation/README.out_of_line.md b/instrumentation/README.out_of_line.md
index aad215b6..2264f91f 100644
--- a/instrumentation/README.out_of_line.md
+++ b/instrumentation/README.out_of_line.md
@@ -1,18 +1,16 @@
-===========================================
-Using afl++ without inlined instrumentation
-===========================================
+## Using afl++ without inlined instrumentation
 
   This file describes how you can disable inlining of instrumentation.
 
 
 By default, the GCC plugin will duplicate the effects of calling
-__afl_trace (see afl-gcc-rt.o.c) in instrumented code, instead of
+`__afl_trace` (see `afl-gcc-rt.o.c`) in instrumented code, instead of
 issuing function calls.
 
 The calls are presumed to be slower, more so because the rt file
 itself is not optimized by the compiler.
 
-Setting AFL_GCC_OUT_OF_LINE=1 in the environment while compiling code
+Setting `AFL_GCC_OUT_OF_LINE=1` in the environment while compiling code
 with the plugin will disable this inlining, issuing calls to the
 unoptimized runtime instead.
 
diff --git a/instrumentation/README.persistent_mode.md b/instrumentation/README.persistent_mode.md
index 2cf76adf..24f81ea0 100644
--- a/instrumentation/README.persistent_mode.md
+++ b/instrumentation/README.persistent_mode.md
@@ -16,7 +16,7 @@ Examples can be found in [utils/persistent_mode](../utils/persistent_mode).
 ## 2) TLDR;
 
 Example `fuzz_target.c`:
-```
+```c
 #include "what_you_need_for_your_target.h"
 
 __AFL_FUZZ_INIT();
@@ -60,14 +60,14 @@ The speed increase is usually x10 to x20.
 If you want to be able to compile the target without afl-clang-fast/lto then
 add this just after the includes:
 
-```
+```c
 #ifndef __AFL_FUZZ_TESTCASE_LEN
   ssize_t fuzz_len;
   #define __AFL_FUZZ_TESTCASE_LEN fuzz_len
   unsigned char fuzz_buf[1024000];
   #define __AFL_FUZZ_TESTCASE_BUF fuzz_buf
   #define __AFL_FUZZ_INIT() void sync(void);
-  #define __AFL_LOOP(x) ((fuzz_len = read(0, fuzz_buf, sizeof(fuzz_buf))) > 0 ?
+  #define __AFL_LOOP(x) ((fuzz_len = read(0, fuzz_buf, sizeof(fuzz_buf))) > 0 ? 1 : 0)
   #define __AFL_INIT() sync() 
 #endif
 ```
@@ -75,7 +75,7 @@ add this just after the includes:
 ## 3) Deferred initialization
 
 AFL tries to optimize performance by executing the targeted binary just once,
-stopping it just before main(), and then cloning this "main" process to get
+stopping it just before `main()`, and then cloning this "main" process to get
 a steady supply of targets to fuzz.
 
 Although this approach eliminates much of the OS-, linker- and libc-level
@@ -97,7 +97,7 @@ a location after:
   - The creation of any vital threads or child processes - since the forkserver
     can't clone them easily.
 
-  - The initialization of timers via setitimer() or equivalent calls.
+  - The initialization of timers via `setitimer()` or equivalent calls.
 
   - The creation of temporary files, network sockets, offset-sensitive file
     descriptors, and similar shared-state resources - but only provided that
@@ -150,9 +150,9 @@ the impact of memory leaks and similar glitches; 1000 is a good starting point,
 and going much higher increases the likelihood of hiccups without giving you
 any real performance benefits.
 
-A more detailed template is shown in ../utils/persistent_mode/.
-Similarly to the previous mode, the feature works only with afl-clang-fast; #ifdef
-guards can be used to suppress it when using other compilers.
+A more detailed template is shown in `../utils/persistent_mode/.`
+Similarly to the previous mode, the feature works only with afl-clang-fast; 
+`#ifdef` guards can be used to suppress it when using other compilers.
 
 Note that as with the previous mode, the feature is easy to misuse; if you
 do not fully reset the critical state, you may end up with false positives or
@@ -161,7 +161,7 @@ wary of memory leaks and of the state of file descriptors.
 
 PS. Because there are task switches still involved, the mode isn't as fast as
 "pure" in-process fuzzing offered, say, by LLVM's LibFuzzer; but it is a lot
-faster than the normal fork() model, and compared to in-process fuzzing,
+faster than the normal `fork()` model, and compared to in-process fuzzing,
 should be a lot more robust.
 
 ## 5) Shared memory fuzzing
@@ -174,17 +174,17 @@ Setting this up is very easy:
 
 After the includes set the following macro:
 
-```
+```c
 __AFL_FUZZ_INIT();
 ```
 Directly at the start of main - or if you are using the deferred forkserver
-with `__AFL_INIT()`  then *after* `__AFL_INIT? :
-```
+with `__AFL_INIT()` then *after* `__AFL_INIT()` :
+```c
   unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;
 ```
 
 Then as first line after the `__AFL_LOOP` while loop:
-```
+```c
   int len = __AFL_FUZZ_TESTCASE_LEN;
 ```
 and that is all!
diff --git a/instrumentation/SanitizerCoverageLTO.so.cc b/instrumentation/SanitizerCoverageLTO.so.cc
index e3490847..6dd390e6 100644
--- a/instrumentation/SanitizerCoverageLTO.so.cc
+++ b/instrumentation/SanitizerCoverageLTO.so.cc
@@ -507,6 +507,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
   Zero = ConstantInt::get(Int8Tyi, 0);
   One = ConstantInt::get(Int8Tyi, 1);
 
+  initInstrumentList();
   scanForDangerousFunctions(&M);
   Mo = &M;
 
@@ -733,7 +734,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
                             Var->getInitializer())) {
 
                       HasStr2 = true;
-                      Str2 = Array->getAsString().str();
+                      Str2 = Array->getRawDataValues().str();
 
                     }
 
@@ -760,7 +761,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
                   if (literalLength + 1 == optLength) {
 
                     Str2.append("\0", 1);  // add null byte
-                    addedNull = true;
+                    // addedNull = true;
 
                   }
 
@@ -809,7 +810,7 @@ bool ModuleSanitizerCoverage::instrumentModule(
                             Var->getInitializer())) {
 
                       HasStr1 = true;
-                      Str1 = Array->getAsString().str();
+                      Str1 = Array->getRawDataValues().str();
 
                     }
 
@@ -849,15 +850,18 @@ bool ModuleSanitizerCoverage::instrumentModule(
               thestring = Str2;
 
             optLen = thestring.length();
+            if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; }
 
             if (isMemcmp || isStrncmp || isStrncasecmp) {
 
               Value *      op2 = callInst->getArgOperand(2);
               ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+
               if (ilen) {
 
                 uint64_t literalLength = optLen;
                 optLen = ilen->getZExtValue();
+                if (optLen < 2) { continue; }
                 if (literalLength + 1 == optLen) {  // add null byte
                   thestring.append("\0", 1);
                   addedNull = true;
@@ -872,17 +876,21 @@ bool ModuleSanitizerCoverage::instrumentModule(
             // was not already added
             if (!isMemcmp) {
 
-              if (addedNull == false) {
+              if (addedNull == false && thestring[optLen - 1] != '\0') {
 
                 thestring.append("\0", 1);  // add null byte
                 optLen++;
 
               }
 
-              // ensure we do not have garbage
-              size_t offset = thestring.find('\0', 0);
-              if (offset + 1 < optLen) optLen = offset + 1;
-              thestring = thestring.substr(0, optLen);
+              if (!isStdString) {
+
+                // ensure we do not have garbage
+                size_t offset = thestring.find('\0', 0);
+                if (offset + 1 < optLen) optLen = offset + 1;
+                thestring = thestring.substr(0, optLen);
+
+              }
 
             }
 
@@ -1222,7 +1230,7 @@ void ModuleSanitizerCoverage::instrumentFunction(
 
   // afl++ START
   if (!F.size()) return;
-  if (isIgnoreFunction(&F)) return;
+  if (!isInInstrumentList(&F)) return;
   // afl++ END
 
   if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
@@ -1284,10 +1292,17 @@ GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection(
       *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
       Constant::getNullValue(ArrayTy), "__sancov_gen_");
 
+#if LLVM_VERSION_MAJOR > 12
+  if (TargetTriple.supportsCOMDAT() &&
+      (TargetTriple.isOSBinFormatELF() || !F.isInterposable()))
+    if (auto Comdat = getOrCreateFunctionComdat(F, TargetTriple))
+      Array->setComdat(Comdat);
+#else
   if (TargetTriple.supportsCOMDAT() && !F.isInterposable())
     if (auto Comdat =
             GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
       Array->setComdat(Comdat);
+#endif
   Array->setSection(getSectionName(Section));
   Array->setAlignment(Align(DL->getTypeStoreSize(Ty).getFixedSize()));
   GlobalsToAppendToUsed.push_back(Array);
diff --git a/instrumentation/SanitizerCoveragePCGUARD.so.cc b/instrumentation/SanitizerCoveragePCGUARD.so.cc
index 5d6d6703..09cda9e2 100644
--- a/instrumentation/SanitizerCoveragePCGUARD.so.cc
+++ b/instrumentation/SanitizerCoveragePCGUARD.so.cc
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/EHPersonalities.h"
@@ -34,11 +35,11 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SpecialCaseList.h"
 #if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
   #include "llvm/Support/VirtualFileSystem.h"
 #endif
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -47,65 +48,6 @@
 #include "debug.h"
 #include "afl-llvm-common.h"
 
-namespace llvm {
-
-/// This is the ModuleSanitizerCoverage pass used in the new pass manager. The
-/// pass instruments functions for coverage, adds initialization calls to the
-/// module for trace PC guards and 8bit counters if they are requested, and
-/// appends globals to llvm.compiler.used.
-class ModuleSanitizerCoveragePass
-    : public PassInfoMixin<ModuleSanitizerCoveragePass> {
-
- public:
-  explicit ModuleSanitizerCoveragePass(
-      SanitizerCoverageOptions        Options = SanitizerCoverageOptions(),
-      const std::vector<std::string> &AllowlistFiles =
-          std::vector<std::string>(),
-      const std::vector<std::string> &BlocklistFiles =
-          std::vector<std::string>())
-      : Options(Options) {
-
-    if (AllowlistFiles.size() > 0)
-      Allowlist = SpecialCaseList::createOrDie(AllowlistFiles
-#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
-                                               ,
-                                               *vfs::getRealFileSystem()
-#endif
-      );
-    if (BlocklistFiles.size() > 0)
-      Blocklist = SpecialCaseList::createOrDie(BlocklistFiles
-#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
-                                               ,
-                                               *vfs::getRealFileSystem()
-#endif
-      );
-
-  }
-
-  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
-  static bool       isRequired() {
-
-    return true;
-
-  }
-
- private:
-  SanitizerCoverageOptions Options;
-
-  std::unique_ptr<SpecialCaseList> Allowlist;
-  std::unique_ptr<SpecialCaseList> Blocklist;
-
-};
-
-// Insert SanitizerCoverage instrumentation.
-ModulePass *createModuleSanitizerCoverageLegacyPassPass(
-    const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(),
-    const std::vector<std::string> &AllowlistFiles = std::vector<std::string>(),
-    const std::vector<std::string> &BlocklistFiles =
-        std::vector<std::string>());
-
-}  // namespace llvm
-
 using namespace llvm;
 
 #define DEBUG_TYPE "sancov"
@@ -156,96 +98,8 @@ static const char *const SanCovLowestStackName = "__sancov_lowest_stack";
 
 static char *skip_nozero;
 
-/*
-static cl::opt<int> ClCoverageLevel(
-    "sanitizer-coverage-level",
-    cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, "
-             "3: all blocks and critical edges"),
-    cl::Hidden, cl::init(3));
-
-static cl::opt<bool> ClTracePC("sanitizer-coverage-trace-pc",
-                               cl::desc("Experimental pc tracing"), cl::Hidden,
-                               cl::init(false));
-
-static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard",
-                                    cl::desc("pc tracing with a guard"),
-                                    cl::Hidden, cl::init(true));
-
-// If true, we create a global variable that contains PCs of all instrumented
-// BBs, put this global into a named section, and pass this section's bounds
-// to __sanitizer_cov_pcs_init.
-// This way the coverage instrumentation does not need to acquire the PCs
-// at run-time. Works with trace-pc-guard, inline-8bit-counters, and
-// inline-bool-flag.
-static cl::opt<bool> ClCreatePCTable("sanitizer-coverage-pc-table",
-                                     cl::desc("create a static PC table"),
-                                     cl::Hidden, cl::init(false));
-
-static cl::opt<bool> ClInline8bitCounters(
-    "sanitizer-coverage-inline-8bit-counters",
-    cl::desc("increments 8-bit counter for every edge"), cl::Hidden,
-    cl::init(false));
-
-static cl::opt<bool> ClInlineBoolFlag(
-    "sanitizer-coverage-inline-bool-flag",
-    cl::desc("sets a boolean flag for every edge"), cl::Hidden,
-    cl::init(false));
-
-static cl::opt<bool> ClCMPTracing(
-    "sanitizer-coverage-trace-compares",
-    cl::desc("Tracing of CMP and similar instructions"), cl::Hidden,
-    cl::init(false));
-
-static cl::opt<bool> ClDIVTracing("sanitizer-coverage-trace-divs",
-                                  cl::desc("Tracing of DIV instructions"),
-                                  cl::Hidden, cl::init(false));
-
-static cl::opt<bool> ClGEPTracing("sanitizer-coverage-trace-geps",
-                                  cl::desc("Tracing of GEP instructions"),
-                                  cl::Hidden, cl::init(false));
-
-static cl::opt<bool> ClPruneBlocks(
-    "sanitizer-coverage-prune-blocks",
-    cl::desc("Reduce the number of instrumented blocks"), cl::Hidden,
-    cl::init(true));
-
-static cl::opt<bool> ClStackDepth("sanitizer-coverage-stack-depth",
-                                  cl::desc("max stack depth tracing"),
-                                  cl::Hidden, cl::init(false));
-*/
 namespace {
 
-/*
-SanitizerCoverageOptions getOptions(int LegacyCoverageLevel) {
-
-  SanitizerCoverageOptions Res;
-  switch (LegacyCoverageLevel) {
-
-    case 0:
-      Res.CoverageType = SanitizerCoverageOptions::SCK_None;
-      break;
-    case 1:
-      Res.CoverageType = SanitizerCoverageOptions::SCK_Function;
-      break;
-    case 2:
-      Res.CoverageType = SanitizerCoverageOptions::SCK_BB;
-      break;
-    case 3:
-      Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
-      break;
-    case 4:
-      Res.CoverageType = SanitizerCoverageOptions::SCK_Edge;
-      Res.IndirectCalls = true;
-      break;
-
-  }
-
-  return Res;
-
-}
-
-*/
-
 SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
 
   // Sets CoverageType and IndirectCalls.
@@ -281,12 +135,14 @@ class ModuleSanitizerCoverage {
 
  public:
   ModuleSanitizerCoverage(
-      const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(),
-      const SpecialCaseList *         Allowlist = nullptr,
-      const SpecialCaseList *         Blocklist = nullptr)
-      : Options(OverrideFromCL(Options)),
-        Allowlist(Allowlist),
-        Blocklist(Blocklist) {
+      const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()
+#if LLVM_MAJOR > 10
+          ,
+      const SpecialCaseList *Allowlist = nullptr,
+      const SpecialCaseList *Blocklist = nullptr
+#endif
+      )
+      : Options(OverrideFromCL(Options)) {
 
   }
 
@@ -356,9 +212,6 @@ class ModuleSanitizerCoverage {
 
   SanitizerCoverageOptions Options;
 
-  const SpecialCaseList *Allowlist;
-  const SpecialCaseList *Blocklist;
-
   uint32_t        instr = 0;
   GlobalVariable *AFLMapPtr = NULL;
   ConstantInt *   One = NULL;
@@ -370,27 +223,17 @@ class ModuleSanitizerCoverageLegacyPass : public ModulePass {
 
  public:
   ModuleSanitizerCoverageLegacyPass(
-      const SanitizerCoverageOptions &Options = SanitizerCoverageOptions(),
+      const SanitizerCoverageOptions &Options = SanitizerCoverageOptions()
+#if LLVM_VERSION_MAJOR > 10
+          ,
       const std::vector<std::string> &AllowlistFiles =
           std::vector<std::string>(),
       const std::vector<std::string> &BlocklistFiles =
-          std::vector<std::string>())
+          std::vector<std::string>()
+#endif
+          )
       : ModulePass(ID), Options(Options) {
 
-    if (AllowlistFiles.size() > 0)
-      Allowlist = SpecialCaseList::createOrDie(AllowlistFiles
-#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
-                                               ,
-                                               *vfs::getRealFileSystem()
-#endif
-      );
-    if (BlocklistFiles.size() > 0)
-      Blocklist = SpecialCaseList::createOrDie(BlocklistFiles
-#if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
-                                               ,
-                                               *vfs::getRealFileSystem()
-#endif
-      );
     initializeModuleSanitizerCoverageLegacyPassPass(
         *PassRegistry::getPassRegistry());
 
@@ -398,8 +241,12 @@ class ModuleSanitizerCoverageLegacyPass : public ModulePass {
 
   bool runOnModule(Module &M) override {
 
-    ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(),
-                                         Blocklist.get());
+    ModuleSanitizerCoverage ModuleSancov(Options
+#if LLVM_MAJOR > 10
+                                         ,
+                                         Allowlist.get(), Blocklist.get()
+#endif
+    );
     auto DTCallback = [this](Function &F) -> const DominatorTree * {
 
       return &this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
@@ -444,8 +291,12 @@ class ModuleSanitizerCoverageLegacyPass : public ModulePass {
 PreservedAnalyses ModuleSanitizerCoveragePass::run(Module &               M,
                                                    ModuleAnalysisManager &MAM) {
 
-  ModuleSanitizerCoverage ModuleSancov(Options, Allowlist.get(),
-                                       Blocklist.get());
+  ModuleSanitizerCoverage ModuleSancov(Options
+#if LLVM_MAJOR > 10
+                                       ,
+                                       Allowlist.get(), Blocklist.get()
+#endif
+  );
   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
   auto  DTCallback = [&FAM](Function &F) -> const DominatorTree * {
 
@@ -564,12 +415,6 @@ bool ModuleSanitizerCoverage::instrumentModule(
   }
 
   if (Options.CoverageType == SanitizerCoverageOptions::SCK_None) return false;
-  if (Allowlist &&
-      !Allowlist->inSection("coverage", "src", M.getSourceFileName()))
-    return false;
-  if (Blocklist &&
-      Blocklist->inSection("coverage", "src", M.getSourceFileName()))
-    return false;
   C = &(M.getContext());
   DL = &M.getDataLayout();
   CurModule = &M;
@@ -842,9 +687,6 @@ void ModuleSanitizerCoverage::instrumentFunction(
   if (F.hasPersonalityFn() &&
       isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
     return;
-  if (Allowlist && !Allowlist->inSection("coverage", "fun", F.getName()))
-    return;
-  if (Blocklist && Blocklist->inSection("coverage", "fun", F.getName())) return;
   if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
     SplitAllCriticalEdges(
         F, CriticalEdgeSplittingOptions().setIgnoreUnreachableDests());
@@ -915,10 +757,18 @@ GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection(
       *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
       Constant::getNullValue(ArrayTy), "__sancov_gen_");
 
+#if LLVM_VERSION_MAJOR > 12
+  if (TargetTriple.supportsCOMDAT() &&
+      (TargetTriple.isOSBinFormatELF() || !F.isInterposable()))
+    if (auto Comdat = getOrCreateFunctionComdat(F, TargetTriple))
+      Array->setComdat(Comdat);
+#else
   if (TargetTriple.supportsCOMDAT() && !F.isInterposable())
     if (auto Comdat =
             GetOrCreateFunctionComdat(F, TargetTriple, CurModuleUniqueId))
       Array->setComdat(Comdat);
+#endif
+
   Array->setSection(getSectionName(Section));
 #if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
   Array->setAlignment(Align(DL->getTypeStoreSize(Ty).getFixedSize()));
@@ -1088,7 +938,7 @@ void ModuleSanitizerCoverage::InjectTraceForSwitch(
 
       }
 
-      llvm::sort(Initializers.begin() + 2, Initializers.end(),
+      llvm::sort(drop_begin(Initializers, 2),
                  [](const Constant *A, const Constant *B) {
 
                    return cast<ConstantInt>(A)->getLimitedValue() <
@@ -1136,10 +986,10 @@ void ModuleSanitizerCoverage::InjectTraceForGep(
   for (auto GEP : GepTraceTargets) {
 
     IRBuilder<> IRB(GEP);
-    for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
-      if (!isa<ConstantInt>(*I) && (*I)->getType()->isIntegerTy())
+    for (Use &Idx : GEP->indices())
+      if (!isa<ConstantInt>(Idx) && Idx->getType()->isIntegerTy())
         IRB.CreateCall(SanCovTraceGepFunction,
-                       {IRB.CreateIntCast(*I, IntptrTy, true)});
+                       {IRB.CreateIntCast(Idx, IntptrTy, true)});
 
   }
 
@@ -1354,12 +1204,20 @@ INITIALIZE_PASS_END(ModuleSanitizerCoverageLegacyPass, "sancov",
                     false)
 
 ModulePass *llvm::createModuleSanitizerCoverageLegacyPassPass(
-    const SanitizerCoverageOptions &Options,
+    const SanitizerCoverageOptions &Options
+#if LLVM_MAJOR > 10
+    ,
     const std::vector<std::string> &AllowlistFiles,
-    const std::vector<std::string> &BlocklistFiles) {
+    const std::vector<std::string> &BlocklistFiles
+#endif
+) {
 
-  return new ModuleSanitizerCoverageLegacyPass(Options, AllowlistFiles,
-                                               BlocklistFiles);
+  return new ModuleSanitizerCoverageLegacyPass(Options
+#if LLVM_MAJOR > 10
+                                               ,
+                                               AllowlistFiles, BlocklistFiles
+#endif
+  );
 
 }
 
diff --git a/instrumentation/afl-compiler-rt.o.c b/instrumentation/afl-compiler-rt.o.c
index c24173af..f241447a 100644
--- a/instrumentation/afl-compiler-rt.o.c
+++ b/instrumentation/afl-compiler-rt.o.c
@@ -20,7 +20,7 @@
 #include "config.h"
 #include "types.h"
 #include "cmplog.h"
-#include "llvm-ngram-coverage.h"
+#include "llvm-alternative-coverage.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -34,6 +34,7 @@
 #include <errno.h>
 
 #include <sys/mman.h>
+#include <sys/syscall.h>
 #ifndef __HAIKU__
   #include <sys/shm.h>
 #endif
@@ -70,7 +71,7 @@
    run. It will end up as .comm, so it shouldn't be too wasteful. */
 
 #if MAP_SIZE <= 65536
-  #define MAP_INITIAL_SIZE 256000
+  #define MAP_INITIAL_SIZE 2097152
 #else
   #define MAP_INITIAL_SIZE MAP_SIZE
 #endif
@@ -96,10 +97,12 @@ int __afl_selective_coverage_temp = 1;
 
 #if defined(__ANDROID__) || defined(__HAIKU__)
 PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX];
+PREV_LOC_T __afl_prev_caller[CTX_MAX_K];
 u32        __afl_prev_ctx;
 u32        __afl_cmp_counter;
 #else
 __thread PREV_LOC_T __afl_prev_loc[NGRAM_SIZE_MAX];
+__thread PREV_LOC_T __afl_prev_caller[CTX_MAX_K];
 __thread u32        __afl_prev_ctx;
 __thread u32        __afl_cmp_counter;
 #endif
@@ -122,6 +125,21 @@ static u8 is_persistent;
 
 static u8 _is_sancov;
 
+/* Debug? */
+
+static u32 __afl_debug;
+
+/* Already initialized markers */
+
+u32 __afl_already_initialized_shm;
+u32 __afl_already_initialized_forkserver;
+u32 __afl_already_initialized_first;
+u32 __afl_already_initialized_second;
+
+/* Dummy pipe for area_is_valid() */
+
+static int __afl_dummy_fd[2] = {2, 2};
+
 /* ensure we kill the child on termination */
 
 void at_exit(int signal) {
@@ -171,7 +189,7 @@ static void __afl_map_shm_fuzz() {
 
   char *id_str = getenv(SHM_FUZZ_ENV_VAR);
 
-  if (getenv("AFL_DEBUG")) {
+  if (__afl_debug) {
 
     fprintf(stderr, "DEBUG: fuzzcase shmem %s\n", id_str ? id_str : "none");
 
@@ -186,7 +204,7 @@ static void __afl_map_shm_fuzz() {
     int         shm_fd = -1;
 
     /* create the shared memory segment as if it was a file */
-    shm_fd = shm_open(shm_file_path, O_RDWR, 0600);
+    shm_fd = shm_open(shm_file_path, O_RDWR, DEFAULT_PERMISSION);
     if (shm_fd == -1) {
 
       fprintf(stderr, "shm_open() failed for fuzz\n");
@@ -217,7 +235,7 @@ static void __afl_map_shm_fuzz() {
     __afl_fuzz_len = (u32 *)map;
     __afl_fuzz_ptr = map + sizeof(u32);
 
-    if (getenv("AFL_DEBUG")) {
+    if (__afl_debug) {
 
       fprintf(stderr, "DEBUG: successfully got fuzzing shared memory\n");
 
@@ -237,6 +255,9 @@ static void __afl_map_shm_fuzz() {
 
 static void __afl_map_shm(void) {
 
+  if (__afl_already_initialized_shm) return;
+  __afl_already_initialized_shm = 1;
+
   // if we are not running in afl ensure the map exists
   if (!__afl_area_ptr) { __afl_area_ptr = __afl_area_ptr_dummy; }
 
@@ -244,8 +265,12 @@ static void __afl_map_shm(void) {
 
   if (__afl_final_loc) {
 
-    if (__afl_final_loc % 32)
-      __afl_final_loc = (((__afl_final_loc + 31) >> 5) << 5);
+    if (__afl_final_loc % 64) {
+
+      __afl_final_loc = (((__afl_final_loc + 63) >> 6) << 6);
+
+    }
+
     __afl_map_size = __afl_final_loc;
 
     if (__afl_final_loc > MAP_SIZE) {
@@ -290,18 +315,23 @@ static void __afl_map_shm(void) {
      early-stage __afl_area_initial region that is needed to allow some really
      hacky .init code to work correctly in projects such as OpenSSL. */
 
-  if (getenv("AFL_DEBUG"))
+  if (__afl_debug) {
+
     fprintf(stderr,
-            "DEBUG: id_str %s, __afl_area_ptr %p, __afl_area_initial %p, "
-            "__afl_map_addr 0x%llx, MAP_SIZE %u, __afl_final_loc %u, "
+            "DEBUG: (1) id_str %s, __afl_area_ptr %p, __afl_area_initial %p, "
+            "__afl_area_ptr_dummy 0x%p, __afl_map_addr 0x%llx, MAP_SIZE %u, "
+            "__afl_final_loc %u, "
             "max_size_forkserver %u/0x%x\n",
             id_str == NULL ? "<null>" : id_str, __afl_area_ptr,
-            __afl_area_initial, __afl_map_addr, MAP_SIZE, __afl_final_loc,
-            FS_OPT_MAX_MAPSIZE, FS_OPT_MAX_MAPSIZE);
+            __afl_area_initial, __afl_area_ptr_dummy, __afl_map_addr, MAP_SIZE,
+            __afl_final_loc, FS_OPT_MAX_MAPSIZE, FS_OPT_MAX_MAPSIZE);
+
+  }
 
   if (id_str) {
 
-    if (__afl_area_ptr && __afl_area_ptr != __afl_area_initial) {
+    if (__afl_area_ptr && __afl_area_ptr != __afl_area_initial &&
+        __afl_area_ptr != __afl_area_ptr_dummy) {
 
       if (__afl_map_addr) {
 
@@ -323,7 +353,7 @@ static void __afl_map_shm(void) {
     unsigned char *shm_base = NULL;
 
     /* create the shared memory segment as if it was a file */
-    shm_fd = shm_open(shm_file_path, O_RDWR, 0600);
+    shm_fd = shm_open(shm_file_path, O_RDWR, DEFAULT_PERMISSION);
     if (shm_fd == -1) {
 
       fprintf(stderr, "shm_open() failed\n");
@@ -346,17 +376,18 @@ static void __afl_map_shm(void) {
 
     }
 
-    if (shm_base == MAP_FAILED) {
+    close(shm_fd);
+    shm_fd = -1;
 
-      close(shm_fd);
-      shm_fd = -1;
+    if (shm_base == MAP_FAILED) {
 
       fprintf(stderr, "mmap() failed\n");
+      perror("mmap for map");
+
       if (__afl_map_addr)
         send_forkserver_error(FS_ERROR_MAP_ADDR);
       else
         send_forkserver_error(FS_ERROR_MMAP);
-      perror("mmap for map");
 
       exit(2);
 
@@ -368,8 +399,8 @@ static void __afl_map_shm(void) {
 
     if (__afl_map_size && __afl_map_size > MAP_SIZE) {
 
-      u8 *map_env = getenv("AFL_MAP_SIZE");
-      if (!map_env || atoi(map_env) < MAP_SIZE) {
+      u8 *map_env = (u8 *)getenv("AFL_MAP_SIZE");
+      if (!map_env || atoi((char *)map_env) < MAP_SIZE) {
 
         send_forkserver_error(FS_ERROR_MAP_SIZE);
         _exit(1);
@@ -378,7 +409,7 @@ static void __afl_map_shm(void) {
 
     }
 
-    __afl_area_ptr = shmat(shm_id, (void *)__afl_map_addr, 0);
+    __afl_area_ptr = (u8 *)shmat(shm_id, (void *)__afl_map_addr, 0);
 
     /* Whooooops. */
 
@@ -405,9 +436,9 @@ static void __afl_map_shm(void) {
 
              __afl_map_addr) {
 
-    __afl_area_ptr =
-        mmap((void *)__afl_map_addr, __afl_map_size, PROT_READ | PROT_WRITE,
-             MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+    __afl_area_ptr = (u8 *)mmap(
+        (void *)__afl_map_addr, __afl_map_size, PROT_READ | PROT_WRITE,
+        MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 
     if (__afl_area_ptr == MAP_FAILED) {
 
@@ -425,7 +456,7 @@ static void __afl_map_shm(void) {
 
     if (__afl_final_loc > MAP_INITIAL_SIZE) {
 
-      __afl_area_ptr = malloc(__afl_final_loc);
+      __afl_area_ptr = (u8 *)malloc(__afl_final_loc);
 
     }
 
@@ -435,11 +466,24 @@ static void __afl_map_shm(void) {
 
   __afl_area_ptr_backup = __afl_area_ptr;
 
+  if (__afl_debug) {
+
+    fprintf(stderr,
+            "DEBUG: (2) id_str %s, __afl_area_ptr %p, __afl_area_initial %p, "
+            "__afl_area_ptr_dummy 0x%p, __afl_map_addr 0x%llx, MAP_SIZE "
+            "%u, __afl_final_loc %u, "
+            "max_size_forkserver %u/0x%x\n",
+            id_str == NULL ? "<null>" : id_str, __afl_area_ptr,
+            __afl_area_initial, __afl_area_ptr_dummy, __afl_map_addr, MAP_SIZE,
+            __afl_final_loc, FS_OPT_MAX_MAPSIZE, FS_OPT_MAX_MAPSIZE);
+
+  }
+
   if (__afl_selective_coverage) {
 
     if (__afl_map_size > MAP_INITIAL_SIZE) {
 
-      __afl_area_ptr_dummy = malloc(__afl_map_size);
+      __afl_area_ptr_dummy = (u8 *)malloc(__afl_map_size);
 
       if (__afl_area_ptr_dummy) {
 
@@ -463,7 +507,7 @@ static void __afl_map_shm(void) {
 
   id_str = getenv(CMPLOG_SHM_ENV_VAR);
 
-  if (getenv("AFL_DEBUG")) {
+  if (__afl_debug) {
 
     fprintf(stderr, "DEBUG: cmplog id_str %s\n",
             id_str == NULL ? "<null>" : id_str);
@@ -472,13 +516,19 @@ static void __afl_map_shm(void) {
 
   if (id_str) {
 
+    if ((__afl_dummy_fd[1] = open("/dev/null", O_WRONLY)) < 0) {
+
+      if (pipe(__afl_dummy_fd) < 0) { __afl_dummy_fd[1] = 1; }
+
+    }
+
 #ifdef USEMMAP
     const char *    shm_file_path = id_str;
     int             shm_fd = -1;
     struct cmp_map *shm_base = NULL;
 
     /* create the shared memory segment as if it was a file */
-    shm_fd = shm_open(shm_file_path, O_RDWR, 0600);
+    shm_fd = shm_open(shm_file_path, O_RDWR, DEFAULT_PERMISSION);
     if (shm_fd == -1) {
 
       perror("shm_open() failed\n");
@@ -505,7 +555,7 @@ static void __afl_map_shm(void) {
 #else
     u32 shm_id = atoi(id_str);
 
-    __afl_cmp_map = shmat(shm_id, NULL, 0);
+    __afl_cmp_map = (struct cmp_map *)shmat(shm_id, NULL, 0);
 #endif
 
     __afl_cmp_map_backup = __afl_cmp_map;
@@ -522,6 +572,58 @@ static void __afl_map_shm(void) {
 
 }
 
+/* unmap SHM. */
+
+static void __afl_unmap_shm(void) {
+
+  if (!__afl_already_initialized_shm) return;
+
+  char *id_str = getenv(SHM_ENV_VAR);
+
+  if (id_str) {
+
+#ifdef USEMMAP
+
+    munmap((void *)__afl_area_ptr, __afl_map_size);
+
+#else
+
+    shmdt((void *)__afl_area_ptr);
+
+#endif
+
+  } else if ((!__afl_area_ptr || __afl_area_ptr == __afl_area_initial) &&
+
+             __afl_map_addr) {
+
+    munmap((void *)__afl_map_addr, __afl_map_size);
+
+  }
+
+  __afl_area_ptr = __afl_area_ptr_dummy;
+
+  id_str = getenv(CMPLOG_SHM_ENV_VAR);
+
+  if (id_str) {
+
+#ifdef USEMMAP
+
+    munmap((void *)__afl_cmp_map, __afl_map_size);
+
+#else
+
+    shmdt((void *)__afl_cmp_map);
+
+#endif
+
+    __afl_cmp_map = NULL;
+
+  }
+
+  __afl_already_initialized_shm = 0;
+
+}
+
 #ifdef __linux__
 static void __afl_start_snapshots(void) {
 
@@ -550,7 +652,7 @@ static void __afl_start_snapshots(void) {
 
     if (read(FORKSRV_FD, &was_killed, 4) != 4) { _exit(1); }
 
-    if (getenv("AFL_DEBUG")) {
+    if (__afl_debug) {
 
       fprintf(stderr, "target forkserver recv: %08x\n", was_killed);
 
@@ -627,7 +729,7 @@ static void __afl_start_snapshots(void) {
       static uint32_t counter = 0;
       char            fn[32];
       sprintf(fn, "%09u:forkserver", counter);
-      s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+      s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION);
       if (fd_doc >= 0) {
 
         if (write(fd_doc, __afl_fuzz_ptr, *__afl_fuzz_len) != *__afl_fuzz_len) {
@@ -727,6 +829,9 @@ static void __afl_start_snapshots(void) {
 
 static void __afl_start_forkserver(void) {
 
+  if (__afl_already_initialized_forkserver) return;
+  __afl_already_initialized_forkserver = 1;
+
   struct sigaction orig_action;
   sigaction(SIGTERM, NULL, &orig_action);
   old_sigterm_handler = orig_action.sa_handler;
@@ -777,7 +882,7 @@ static void __afl_start_forkserver(void) {
 
     if (read(FORKSRV_FD, &was_killed, 4) != 4) _exit(1);
 
-    if (getenv("AFL_DEBUG")) {
+    if (__afl_debug) {
 
       fprintf(stderr, "target forkserver recv: %08x\n", was_killed);
 
@@ -855,7 +960,7 @@ static void __afl_start_forkserver(void) {
       static uint32_t counter = 0;
       char            fn[32];
       sprintf(fn, "%09u:forkserver", counter);
-      s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+      s32 fd_doc = open(fn, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION);
       if (fd_doc >= 0) {
 
         if (write(fd_doc, __afl_fuzz_ptr, *__afl_fuzz_len) != *__afl_fuzz_len) {
@@ -1012,11 +1117,14 @@ void __afl_manual_init(void) {
     __afl_sharedmem_fuzzing = 0;
     if (__afl_area_ptr == NULL) __afl_area_ptr = __afl_area_ptr_dummy;
 
-    if (getenv("AFL_DEBUG"))
+    if (__afl_debug) {
+
       fprintf(stderr,
               "DEBUG: disabled instrumentation because of "
               "AFL_DISABLE_LLVM_INSTRUMENTATION\n");
 
+    }
+
   }
 
   if (!init_done) {
@@ -1056,6 +1164,11 @@ __attribute__((constructor(CTOR_PRIO))) void __afl_auto_early(void) {
 
 __attribute__((constructor(1))) void __afl_auto_second(void) {
 
+  if (__afl_already_initialized_second) return;
+  __afl_already_initialized_second = 1;
+
+  if (getenv("AFL_DEBUG")) { __afl_debug = 1; }
+
   if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
   u8 *ptr;
 
@@ -1080,17 +1193,18 @@ __attribute__((constructor(1))) void __afl_auto_second(void) {
 
   }
 
-}
+}  // ptr memleak report is a false positive
 
 /* preset __afl_area_ptr #1 - at constructor level 0 global variables have
    not been set */
 
 __attribute__((constructor(0))) void __afl_auto_first(void) {
 
-  if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
-  u8 *ptr;
+  if (__afl_already_initialized_first) return;
+  __afl_already_initialized_first = 1;
 
-  ptr = (u8 *)malloc(1024000);
+  if (getenv("AFL_DISABLE_LLVM_INSTRUMENTATION")) return;
+  u8 *ptr = (u8 *)malloc(MAP_INITIAL_SIZE);
 
   if (ptr && (ssize_t)ptr != -1) {
 
@@ -1099,7 +1213,7 @@ __attribute__((constructor(0))) void __afl_auto_first(void) {
 
   }
 
-}
+}  // ptr memleak report is a false positive
 
 /* The following stuff deals with supporting -fsanitize-coverage=trace-pc-guard.
    It remains non-operational in the traditional, plugin-backed LLVM mode.
@@ -1167,11 +1281,13 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   _is_sancov = 1;
 
-  if (getenv("AFL_DEBUG")) {
+  if (__afl_debug) {
 
     fprintf(stderr,
-            "Running __sanitizer_cov_trace_pc_guard_init: %p-%p (%lu edges)\n",
-            start, stop, stop - start);
+            "Running __sanitizer_cov_trace_pc_guard_init: %p-%p (%lu edges) "
+            "after_fs=%u\n",
+            start, stop, (unsigned long)(stop - start),
+            __afl_already_initialized_forkserver);
 
   }
 
@@ -1187,6 +1303,40 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   }
 
+  /* instrumented code is loaded *after* our forkserver is up. this is a
+     problem. We cannot prevent collisions then :( */
+  if (__afl_already_initialized_forkserver &&
+      __afl_final_loc + 1 + stop - start > __afl_map_size) {
+
+    if (__afl_debug) {
+
+      fprintf(stderr, "Warning: new instrumented code after the forkserver!\n");
+
+    }
+
+    __afl_final_loc = 2;
+
+    if (1 + stop - start > __afl_map_size) {
+
+      *(start++) = ++__afl_final_loc;
+
+      while (start < stop) {
+
+        if (R(100) < inst_ratio)
+          *start = ++__afl_final_loc % __afl_map_size;
+        else
+          *start = 0;
+
+        start++;
+
+      }
+
+      return;
+
+    }
+
+  }
+
   /* Make sure that the first element in the range is always set - we use that
      to avoid duplicate calls (which can happen as an artifact of the underlying
      implementation in LLVM). */
@@ -1204,6 +1354,28 @@ void __sanitizer_cov_trace_pc_guard_init(uint32_t *start, uint32_t *stop) {
 
   }
 
+  if (__afl_debug) {
+
+    fprintf(stderr,
+            "Done __sanitizer_cov_trace_pc_guard_init: __afl_final_loc = %u\n",
+            __afl_final_loc);
+
+  }
+
+  if (__afl_already_initialized_shm && __afl_final_loc > __afl_map_size) {
+
+    if (__afl_debug) {
+
+      fprintf(stderr, "Reinit shm necessary (+%u)\n",
+              __afl_final_loc - __afl_map_size);
+
+    }
+
+    __afl_unmap_shm();
+    __afl_map_shm();
+
+  }
+
 }
 
 ///// CmpLog instrumentation
@@ -1455,24 +1627,48 @@ void __sanitizer_cov_trace_cmp1(uint8_t arg1, uint8_t arg2) {
 
 }
 
+void __sanitizer_cov_trace_const_cmp1(uint8_t arg1, uint8_t arg2) {
+
+  __cmplog_ins_hook1(arg1, arg2, 0);
+
+}
+
 void __sanitizer_cov_trace_cmp2(uint16_t arg1, uint16_t arg2) {
 
   __cmplog_ins_hook2(arg1, arg2, 0);
 
 }
 
+void __sanitizer_cov_trace_const_cmp2(uint16_t arg1, uint16_t arg2) {
+
+  __cmplog_ins_hook2(arg1, arg2, 0);
+
+}
+
 void __sanitizer_cov_trace_cmp4(uint32_t arg1, uint32_t arg2) {
 
   __cmplog_ins_hook4(arg1, arg2, 0);
 
 }
 
+void __sanitizer_cov_trace_cost_cmp4(uint32_t arg1, uint32_t arg2) {
+
+  __cmplog_ins_hook4(arg1, arg2, 0);
+
+}
+
 void __sanitizer_cov_trace_cmp8(uint64_t arg1, uint64_t arg2) {
 
   __cmplog_ins_hook8(arg1, arg2, 0);
 
 }
 
+void __sanitizer_cov_trace_const_cmp8(uint64_t arg1, uint64_t arg2) {
+
+  __cmplog_ins_hook8(arg1, arg2, 0);
+
+}
+
 #ifdef WORD_SIZE_64
 void __sanitizer_cov_trace_cmp16(uint128_t arg1, uint128_t arg2) {
 
@@ -1480,6 +1676,12 @@ void __sanitizer_cov_trace_cmp16(uint128_t arg1, uint128_t arg2) {
 
 }
 
+void __sanitizer_cov_trace_const_cmp16(uint128_t arg1, uint128_t arg2) {
+
+  __cmplog_ins_hook16(arg1, arg2, 0);
+
+}
+
 #endif
 
 void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) {
@@ -1523,17 +1725,43 @@ void __sanitizer_cov_trace_switch(uint64_t val, uint64_t *cases) {
 
 }
 
+__attribute__((weak)) void *__asan_region_is_poisoned(void *beg, size_t size) {
+
+  return NULL;
+
+}
+
 // POSIX shenanigan to see if an area is mapped.
 // If it is mapped as X-only, we have a problem, so maybe we should add a check
 // to avoid to call it on .text addresses
-static int area_is_mapped(void *ptr, size_t len) {
+static int area_is_valid(void *ptr, size_t len) {
+
+  if (unlikely(!ptr || __asan_region_is_poisoned(ptr, len))) { return 0; }
+
+  long r = syscall(SYS_write, __afl_dummy_fd[1], ptr, len);
+
+  if (r <= 0 || r > len) return 0;
+
+  // even if the write succeed this can be a false positive if we cross
+  // a page boundary. who knows why.
+
+  char *p = (char *)ptr;
+  long  page_size = sysconf(_SC_PAGE_SIZE);
+  char *page = (char *)((uintptr_t)p & ~(page_size - 1)) + page_size;
 
-  char *p = ptr;
-  char *page = (char *)((uintptr_t)p & ~(sysconf(_SC_PAGE_SIZE) - 1));
+  if (page > p + len) {
 
-  int r = msync(page, (p - page) + len, MS_ASYNC);
-  if (r < 0) return errno != ENOMEM;
-  return 1;
+    // no, not crossing a page boundary
+    return (int)r;
+
+  } else {
+
+    // yes it crosses a boundary, hence we can only return the length of
+    // rest of the first page, we cannot detect if the next page is valid
+    // or not, neither by SYS_write nor msync() :-(
+    return (int)(page - p);
+
+  }
 
 }
 
@@ -1541,20 +1769,25 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
 
   /*
     u32 i;
-    if (!area_is_mapped(ptr1, 32) || !area_is_mapped(ptr2, 32)) return;
+    if (area_is_valid(ptr1, 32) <= 0 || area_is_valid(ptr2, 32) <= 0) return;
     fprintf(stderr, "rtn arg0=");
-    for (i = 0; i < 24; i++)
+    for (i = 0; i < 32; i++)
       fprintf(stderr, "%02x", ptr1[i]);
     fprintf(stderr, " arg1=");
-    for (i = 0; i < 24; i++)
+    for (i = 0; i < 32; i++)
       fprintf(stderr, "%02x", ptr2[i]);
     fprintf(stderr, "\n");
   */
 
   if (unlikely(!__afl_cmp_map)) return;
+  // fprintf(stderr, "RTN1 %p %p\n", ptr1, ptr2);
+  int l1, l2;
+  if ((l1 = area_is_valid(ptr1, 32)) <= 0 ||
+      (l2 = area_is_valid(ptr2, 32)) <= 0)
+    return;
+  int len = MIN(l1, l2);
 
-  if (!area_is_mapped(ptr1, 32) || !area_is_mapped(ptr2, 32)) return;
-
+  // fprintf(stderr, "RTN2 %u\n", len);
   uintptr_t k = (uintptr_t)__builtin_return_address(0);
   k = (k >> 4) ^ (k << 8);
   k &= CMP_MAP_W - 1;
@@ -1564,17 +1797,17 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
   if (__afl_cmp_map->headers[k].type != CMP_TYPE_RTN) {
 
     __afl_cmp_map->headers[k].type = CMP_TYPE_RTN;
-    hits = 0;
     __afl_cmp_map->headers[k].hits = 1;
-    __afl_cmp_map->headers[k].shape = 31;
+    __afl_cmp_map->headers[k].shape = len - 1;
+    hits = 0;
 
   } else {
 
     hits = __afl_cmp_map->headers[k].hits++;
 
-    if (__afl_cmp_map->headers[k].shape < 31) {
+    if (__afl_cmp_map->headers[k].shape < len) {
 
-      __afl_cmp_map->headers[k].shape = 31;
+      __afl_cmp_map->headers[k].shape = len - 1;
 
     }
 
@@ -1582,9 +1815,10 @@ void __cmplog_rtn_hook(u8 *ptr1, u8 *ptr2) {
 
   hits &= CMP_MAP_RTN_H - 1;
   __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v0,
-                   ptr1, 32);
+                   ptr1, len);
   __builtin_memcpy(((struct cmpfn_operands *)__afl_cmp_map->log[k])[hits].v1,
-                   ptr2, 32);
+                   ptr2, len);
+  // fprintf(stderr, "RTN3\n");
 
 }
 
@@ -1629,12 +1863,20 @@ static u8 *get_llvm_stdstring(u8 *string) {
 
 void __cmplog_rtn_gcc_stdstring_cstring(u8 *stdstring, u8 *cstring) {
 
+  if (unlikely(!__afl_cmp_map)) return;
+  if (area_is_valid(stdstring, 32) <= 0 || area_is_valid(cstring, 32) <= 0)
+    return;
+
   __cmplog_rtn_hook(get_gcc_stdstring(stdstring), cstring);
 
 }
 
 void __cmplog_rtn_gcc_stdstring_stdstring(u8 *stdstring1, u8 *stdstring2) {
 
+  if (unlikely(!__afl_cmp_map)) return;
+  if (area_is_valid(stdstring1, 32) <= 0 || area_is_valid(stdstring2, 32) <= 0)
+    return;
+
   __cmplog_rtn_hook(get_gcc_stdstring(stdstring1),
                     get_gcc_stdstring(stdstring2));
 
@@ -1642,12 +1884,20 @@ void __cmplog_rtn_gcc_stdstring_stdstring(u8 *stdstring1, u8 *stdstring2) {
 
 void __cmplog_rtn_llvm_stdstring_cstring(u8 *stdstring, u8 *cstring) {
 
+  if (unlikely(!__afl_cmp_map)) return;
+  if (area_is_valid(stdstring, 32) <= 0 || area_is_valid(cstring, 32) <= 0)
+    return;
+
   __cmplog_rtn_hook(get_llvm_stdstring(stdstring), cstring);
 
 }
 
 void __cmplog_rtn_llvm_stdstring_stdstring(u8 *stdstring1, u8 *stdstring2) {
 
+  if (unlikely(!__afl_cmp_map)) return;
+  if (area_is_valid(stdstring1, 32) <= 0 || area_is_valid(stdstring2, 32) <= 0)
+    return;
+
   __cmplog_rtn_hook(get_llvm_stdstring(stdstring1),
                     get_llvm_stdstring(stdstring2));
 
diff --git a/instrumentation/afl-llvm-common.cc b/instrumentation/afl-llvm-common.cc
index a27c4069..74943fb2 100644
--- a/instrumentation/afl-llvm-common.cc
+++ b/instrumentation/afl-llvm-common.cc
@@ -60,20 +60,25 @@ bool isIgnoreFunction(const llvm::Function *F) {
       "asan.",
       "llvm.",
       "sancov.",
-      "__ubsan_",
+      "__ubsan",
       "ign.",
-      "__afl_",
+      "__afl",
       "_fini",
-      "__libc_csu",
+      "__libc_",
       "__asan",
       "__msan",
       "__cmplog",
       "__sancov",
+      "__san",
+      "__cxx_",
+      "__decide_deferred",
+      "_GLOBAL",
+      "_ZZN6__asan",
+      "_ZZN6__lsan",
       "msan.",
       "LLVMFuzzerM",
       "LLVMFuzzerC",
       "LLVMFuzzerI",
-      "__decide_deferred",
       "maybe_duplicate_stderr",
       "discard_output",
       "close_stdout",
@@ -89,6 +94,20 @@ bool isIgnoreFunction(const llvm::Function *F) {
 
   }
 
+  static const char *ignoreSubstringList[] = {
+
+      "__asan",       "__msan",     "__ubsan", "__lsan",
+      "__san",        "__sanitize", "__cxx",   "_GLOBAL__",
+      "DebugCounter", "DwarfDebug", "DebugLoc"
+
+  };
+
+  for (auto const &ignoreListFunc : ignoreSubstringList) {
+
+    if (F->getName().contains(ignoreListFunc)) { return true; }
+
+  }
+
   return false;
 
 }
@@ -351,7 +370,7 @@ static std::string getSourceName(llvm::Function *F) {
 
     if (cDILoc) { instFilename = cDILoc->getFilename(); }
 
-    if (instFilename.str().empty()) {
+    if (instFilename.str().empty() && cDILoc) {
 
       /* If the original location is empty, try using the inlined location
        */
diff --git a/instrumentation/afl-llvm-dict2file.so.cc b/instrumentation/afl-llvm-dict2file.so.cc
index a4b33732..c954054b 100644
--- a/instrumentation/afl-llvm-dict2file.so.cc
+++ b/instrumentation/afl-llvm-dict2file.so.cc
@@ -90,7 +90,7 @@ void dict2file(int fd, u8 *mem, u32 len) {
   j = 1;
   for (i = 0; i < len; i++) {
 
-    if (isprint(mem[i])) {
+    if (isprint(mem[i]) && mem[i] != '\\' && mem[i] != '"') {
 
       line[j++] = mem[i];
 
@@ -357,6 +357,7 @@ bool AFLdict2filePass::runOnModule(Module &M) {
           StringRef   TmpStr;
           bool        HasStr1;
           getConstantStringInfo(Str1P, TmpStr);
+
           if (TmpStr.empty()) {
 
             HasStr1 = false;
@@ -403,7 +404,7 @@ bool AFLdict2filePass::runOnModule(Module &M) {
                           dyn_cast<ConstantDataArray>(Var->getInitializer())) {
 
                     HasStr2 = true;
-                    Str2 = Array->getAsString().str();
+                    Str2 = Array->getRawDataValues().str();
 
                   }
 
@@ -430,7 +431,6 @@ bool AFLdict2filePass::runOnModule(Module &M) {
                 if (literalLength + 1 == optLength) {
 
                   Str2.append("\0", 1);  // add null byte
-                  addedNull = true;
 
                 }
 
@@ -480,7 +480,7 @@ bool AFLdict2filePass::runOnModule(Module &M) {
                           dyn_cast<ConstantDataArray>(Var->getInitializer())) {
 
                     HasStr1 = true;
-                    Str1 = Array->getAsString().str();
+                    Str1 = Array->getRawDataValues().str();
 
                   }
 
@@ -521,14 +521,18 @@ bool AFLdict2filePass::runOnModule(Module &M) {
 
           optLen = thestring.length();
 
+          if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; }
+
           if (isMemcmp || isStrncmp || isStrncasecmp) {
 
             Value *      op2 = callInst->getArgOperand(2);
             ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+
             if (ilen) {
 
               uint64_t literalLength = optLen;
               optLen = ilen->getZExtValue();
+              if (optLen < 2) { continue; }
               if (literalLength + 1 == optLen) {  // add null byte
                 thestring.append("\0", 1);
                 addedNull = true;
@@ -543,17 +547,21 @@ bool AFLdict2filePass::runOnModule(Module &M) {
           // was not already added
           if (!isMemcmp) {
 
-            if (addedNull == false) {
+            if (addedNull == false && thestring[optLen - 1] != '\0') {
 
               thestring.append("\0", 1);  // add null byte
               optLen++;
 
             }
 
-            // ensure we do not have garbage
-            size_t offset = thestring.find('\0', 0);
-            if (offset + 1 < optLen) optLen = offset + 1;
-            thestring = thestring.substr(0, optLen);
+            if (!isStdString) {
+
+              // ensure we do not have garbage
+              size_t offset = thestring.find('\0', 0);
+              if (offset + 1 < optLen) optLen = offset + 1;
+              thestring = thestring.substr(0, optLen);
+
+            }
 
           }
 
diff --git a/instrumentation/afl-llvm-lto-instrumentation.so.cc b/instrumentation/afl-llvm-lto-instrumentation.so.cc
index fa494f44..50306224 100644
--- a/instrumentation/afl-llvm-lto-instrumentation.so.cc
+++ b/instrumentation/afl-llvm-lto-instrumentation.so.cc
@@ -69,7 +69,8 @@ class AFLLTOPass : public ModulePass {
 
     if (getenv("AFL_DEBUG")) debug = 1;
     if ((ptr = getenv("AFL_LLVM_LTO_STARTID")) != NULL)
-      if ((afl_global_id = (uint32_t)atoi(ptr)) < 0 || afl_global_id >= MAP_SIZE)
+      if ((afl_global_id = (uint32_t)atoi(ptr)) < 0 ||
+          afl_global_id >= MAP_SIZE)
         FATAL("AFL_LLVM_LTO_STARTID value of \"%s\" is not between 0 and %u\n",
               ptr, MAP_SIZE - 1);
 
@@ -518,7 +519,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
                             Var->getInitializer())) {
 
                       HasStr2 = true;
-                      Str2 = Array->getAsString().str();
+                      Str2 = Array->getRawDataValues().str();
 
                     }
 
@@ -545,7 +546,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
                   if (literalLength + 1 == optLength) {
 
                     Str2.append("\0", 1);  // add null byte
-                    addedNull = true;
+                    // addedNull = true;
 
                   }
 
@@ -594,7 +595,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
                             Var->getInitializer())) {
 
                       HasStr1 = true;
-                      Str1 = Array->getAsString().str();
+                      Str1 = Array->getRawDataValues().str();
 
                     }
 
@@ -634,15 +635,18 @@ bool AFLLTOPass::runOnModule(Module &M) {
               thestring = Str2;
 
             optLen = thestring.length();
+            if (optLen < 2 || (optLen == 2 && !thestring[1])) { continue; }
 
             if (isMemcmp || isStrncmp || isStrncasecmp) {
 
               Value *      op2 = callInst->getArgOperand(2);
               ConstantInt *ilen = dyn_cast<ConstantInt>(op2);
+
               if (ilen) {
 
                 uint64_t literalLength = optLen;
                 optLen = ilen->getZExtValue();
+                if (optLen < 2) { continue; }
                 if (literalLength + 1 == optLen) {  // add null byte
                   thestring.append("\0", 1);
                   addedNull = true;
@@ -657,17 +661,21 @@ bool AFLLTOPass::runOnModule(Module &M) {
             // was not already added
             if (!isMemcmp) {
 
-              if (addedNull == false) {
+              if (addedNull == false && thestring[optLen - 1] != '\0') {
 
                 thestring.append("\0", 1);  // add null byte
                 optLen++;
 
               }
 
-              // ensure we do not have garbage
-              size_t offset = thestring.find('\0', 0);
-              if (offset + 1 < optLen) optLen = offset + 1;
-              thestring = thestring.substr(0, optLen);
+              if (!isStdString) {
+
+                // ensure we do not have garbage
+                size_t offset = thestring.find('\0', 0);
+                if (offset + 1 < optLen) optLen = offset + 1;
+                thestring = thestring.substr(0, optLen);
+
+              }
 
             }
 
@@ -923,9 +931,7 @@ bool AFLLTOPass::runOnModule(Module &M) {
 
     if (getenv("AFL_LLVM_LTO_DONTWRITEID") == NULL) {
 
-      uint32_t write_loc = afl_global_id;
-
-      if (afl_global_id % 32) write_loc = (((afl_global_id + 32) >> 4) << 4);
+      uint32_t write_loc = (((afl_global_id + 63) >> 6) << 6);
 
       GlobalVariable *AFLFinalLoc = new GlobalVariable(
           M, Int32Ty, true, GlobalValue::ExternalLinkage, 0, "__afl_final_loc");
diff --git a/instrumentation/afl-llvm-pass.so.cc b/instrumentation/afl-llvm-pass.so.cc
index 57ff3b47..0f773aba 100644
--- a/instrumentation/afl-llvm-pass.so.cc
+++ b/instrumentation/afl-llvm-pass.so.cc
@@ -62,7 +62,7 @@ typedef long double max_align_t;
 #endif
 
 #include "afl-llvm-common.h"
-#include "llvm-ngram-coverage.h"
+#include "llvm-alternative-coverage.h"
 
 using namespace llvm;
 
@@ -82,9 +82,10 @@ class AFLCoverage : public ModulePass {
 
  protected:
   uint32_t ngram_size = 0;
+  uint32_t ctx_k = 0;
   uint32_t map_size = MAP_SIZE;
   uint32_t function_minimum_size = 1;
-  char *   ctx_str = NULL, *skip_nozero = NULL;
+  char *   ctx_str = NULL, *caller_str = NULL, *skip_nozero = NULL;
 
 };
 
@@ -183,10 +184,16 @@ bool AFLCoverage::runOnModule(Module &M) {
   skip_nozero = getenv("AFL_LLVM_SKIP_NEVERZERO");
 
   unsigned PrevLocSize = 0;
+  unsigned PrevCallerSize = 0;
 
   char *ngram_size_str = getenv("AFL_LLVM_NGRAM_SIZE");
   if (!ngram_size_str) ngram_size_str = getenv("AFL_NGRAM_SIZE");
+  char *ctx_k_str = getenv("AFL_LLVM_CTX_K");
+  if (!ctx_k_str) ctx_k_str = getenv("AFL_CTX_K");
   ctx_str = getenv("AFL_LLVM_CTX");
+  caller_str = getenv("AFL_LLVM_CALLER");
+
+  bool instrument_ctx = ctx_str || caller_str;
 
 #ifdef AFL_HAVE_VECTOR_INTRINSICS
   /* Decide previous location vector size (must be a power of two) */
@@ -204,6 +211,31 @@ bool AFLCoverage::runOnModule(Module &M) {
   if (ngram_size)
     PrevLocSize = ngram_size - 1;
   else
+    PrevLocSize = 1;
+
+  /* Decide K-ctx vector size (must be a power of two) */
+  VectorType *PrevCallerTy = NULL;
+
+  if (ctx_k_str)
+    if (sscanf(ctx_k_str, "%u", &ctx_k) != 1 || ctx_k < 1 || ctx_k > CTX_MAX_K)
+      FATAL("Bad value of AFL_CTX_K (must be between 1 and CTX_MAX_K (%u))",
+            CTX_MAX_K);
+
+  if (ctx_k == 1) {
+
+    ctx_k = 0;
+    instrument_ctx = true;
+    caller_str = ctx_k_str;  // Enable CALLER instead
+
+  }
+
+  if (ctx_k) {
+
+    PrevCallerSize = ctx_k;
+    instrument_ctx = true;
+
+  }
+
 #else
   if (ngram_size_str)
   #ifndef LLVM_VERSION_PATCH
@@ -217,8 +249,20 @@ bool AFLCoverage::runOnModule(Module &M) {
         "%d.%d.%d!",
         LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH);
   #endif
+  if (ctx_k_str)
+  #ifndef LLVM_VERSION_PATCH
+    FATAL(
+        "Sorry, K-CTX branch coverage is not supported with llvm version "
+        "%d.%d.%d!",
+        LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, 0);
+  #else
+    FATAL(
+        "Sorry, K-CTX branch coverage is not supported with llvm version "
+        "%d.%d.%d!",
+        LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH);
+  #endif
+  PrevLocSize = 1;
 #endif
-    PrevLocSize = 1;
 
 #ifdef AFL_HAVE_VECTOR_INTRINSICS
   int PrevLocVecSize = PowerOf2Ceil(PrevLocSize);
@@ -231,6 +275,17 @@ bool AFLCoverage::runOnModule(Module &M) {
     );
 #endif
 
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+  int PrevCallerVecSize = PowerOf2Ceil(PrevCallerSize);
+  if (ctx_k)
+    PrevCallerTy = VectorType::get(IntLocTy, PrevCallerVecSize
+  #if LLVM_VERSION_MAJOR >= 12
+                                   ,
+                                   false
+  #endif
+    );
+#endif
+
   /* Get globals for the SHM region and the previous location. Note that
      __afl_prev_loc is thread-local. */
 
@@ -238,9 +293,10 @@ bool AFLCoverage::runOnModule(Module &M) {
       new GlobalVariable(M, PointerType::get(Int8Ty, 0), false,
                          GlobalValue::ExternalLinkage, 0, "__afl_area_ptr");
   GlobalVariable *AFLPrevLoc;
+  GlobalVariable *AFLPrevCaller;
   GlobalVariable *AFLContext = NULL;
 
-  if (ctx_str)
+  if (ctx_str || caller_str)
 #if defined(__ANDROID__) || defined(__HAIKU__)
     AFLContext = new GlobalVariable(
         M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_ctx");
@@ -275,6 +331,31 @@ bool AFLCoverage::runOnModule(Module &M) {
 #endif
 
 #ifdef AFL_HAVE_VECTOR_INTRINSICS
+  if (ctx_k)
+  #if defined(__ANDROID__) || defined(__HAIKU__)
+    AFLPrevCaller = new GlobalVariable(
+        M, PrevCallerTy, /* isConstant */ false, GlobalValue::ExternalLinkage,
+        /* Initializer */ nullptr, "__afl_prev_caller");
+  #else
+    AFLPrevCaller = new GlobalVariable(
+        M, PrevCallerTy, /* isConstant */ false, GlobalValue::ExternalLinkage,
+        /* Initializer */ nullptr, "__afl_prev_caller",
+        /* InsertBefore */ nullptr, GlobalVariable::GeneralDynamicTLSModel,
+        /* AddressSpace */ 0, /* IsExternallyInitialized */ false);
+  #endif
+  else
+#endif
+#if defined(__ANDROID__) || defined(__HAIKU__)
+    AFLPrevCaller =
+        new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, 0,
+                           "__afl_prev_caller");
+#else
+  AFLPrevCaller = new GlobalVariable(
+      M, Int32Ty, false, GlobalValue::ExternalLinkage, 0, "__afl_prev_caller",
+      0, GlobalVariable::GeneralDynamicTLSModel, 0, false);
+#endif
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
   /* Create the vector shuffle mask for updating the previous block history.
      Note that the first element of the vector will store cur_loc, so just set
      it to undef to allow the optimizer to do its thing. */
@@ -288,13 +369,30 @@ bool AFLCoverage::runOnModule(Module &M) {
     PrevLocShuffle.push_back(ConstantInt::get(Int32Ty, PrevLocSize));
 
   Constant *PrevLocShuffleMask = ConstantVector::get(PrevLocShuffle);
+
+  Constant *                  PrevCallerShuffleMask = NULL;
+  SmallVector<Constant *, 32> PrevCallerShuffle = {UndefValue::get(Int32Ty)};
+
+  if (ctx_k) {
+
+    for (unsigned I = 0; I < PrevCallerSize - 1; ++I)
+      PrevCallerShuffle.push_back(ConstantInt::get(Int32Ty, I));
+
+    for (int I = PrevCallerSize; I < PrevCallerVecSize; ++I)
+      PrevCallerShuffle.push_back(ConstantInt::get(Int32Ty, PrevCallerSize));
+
+    PrevCallerShuffleMask = ConstantVector::get(PrevCallerShuffle);
+
+  }
+
 #endif
 
   // other constants we need
   ConstantInt *Zero = ConstantInt::get(Int8Ty, 0);
   ConstantInt *One = ConstantInt::get(Int8Ty, 1);
 
-  LoadInst *PrevCtx = NULL;  // CTX sensitive coverage
+  Value *   PrevCtx = NULL;     // CTX sensitive coverage
+  LoadInst *PrevCaller = NULL;  // K-CTX coverage
 
   /* Instrument all the things! */
 
@@ -318,12 +416,30 @@ bool AFLCoverage::runOnModule(Module &M) {
       IRBuilder<>          IRB(&(*IP));
 
       // Context sensitive coverage
-      if (ctx_str && &BB == &F.getEntryBlock()) {
+      if (instrument_ctx && &BB == &F.getEntryBlock()) {
+
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+        if (ctx_k) {
+
+          PrevCaller = IRB.CreateLoad(AFLPrevCaller);
+          PrevCaller->setMetadata(M.getMDKindID("nosanitize"),
+                                  MDNode::get(C, None));
+          PrevCtx =
+              IRB.CreateZExt(IRB.CreateXorReduce(PrevCaller), IRB.getInt32Ty());
+
+        } else
+
+#endif
+        {
 
-        // load the context ID of the previous function and write to to a local
-        // variable on the stack
-        PrevCtx = IRB.CreateLoad(AFLContext);
-        PrevCtx->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
+          // load the context ID of the previous function and write to to a
+          // local variable on the stack
+          LoadInst *PrevCtxLoad = IRB.CreateLoad(AFLContext);
+          PrevCtxLoad->setMetadata(M.getMDKindID("nosanitize"),
+                                   MDNode::get(C, None));
+          PrevCtx = PrevCtxLoad;
+
+        }
 
         // does the function have calls? and is any of the calls larger than one
         // basic block?
@@ -354,10 +470,32 @@ bool AFLCoverage::runOnModule(Module &M) {
         // if yes we store a context ID for this function in the global var
         if (has_calls) {
 
-          ConstantInt *NewCtx = ConstantInt::get(Int32Ty, AFL_R(map_size));
-          StoreInst *  StoreCtx = IRB.CreateStore(NewCtx, AFLContext);
-          StoreCtx->setMetadata(M.getMDKindID("nosanitize"),
-                                MDNode::get(C, None));
+          Value *NewCtx = ConstantInt::get(Int32Ty, AFL_R(map_size));
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+          if (ctx_k) {
+
+            Value *ShuffledPrevCaller = IRB.CreateShuffleVector(
+                PrevCaller, UndefValue::get(PrevCallerTy),
+                PrevCallerShuffleMask);
+            Value *UpdatedPrevCaller = IRB.CreateInsertElement(
+                ShuffledPrevCaller, NewCtx, (uint64_t)0);
+
+            StoreInst *Store =
+                IRB.CreateStore(UpdatedPrevCaller, AFLPrevCaller);
+            Store->setMetadata(M.getMDKindID("nosanitize"),
+                               MDNode::get(C, None));
+
+          } else
+
+#endif
+          {
+
+            if (ctx_str) NewCtx = IRB.CreateXor(PrevCtx, NewCtx);
+            StoreInst *StoreCtx = IRB.CreateStore(NewCtx, AFLContext);
+            StoreCtx->setMetadata(M.getMDKindID("nosanitize"),
+                                  MDNode::get(C, None));
+
+          }
 
         }
 
@@ -411,13 +549,20 @@ bool AFLCoverage::runOnModule(Module &M) {
 
         // in CTX mode we have to restore the original context for the caller -
         // she might be calling other functions which need the correct CTX
-        if (ctx_str && has_calls) {
+        if (instrument_ctx && has_calls) {
 
           Instruction *Inst = BB.getTerminator();
           if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
 
             IRBuilder<> Post_IRB(Inst);
-            StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
+
+            StoreInst *RestoreCtx;
+  #ifdef AFL_HAVE_VECTOR_INTRINSICS
+            if (ctx_k)
+              RestoreCtx = IRB.CreateStore(PrevCaller, AFLPrevCaller);
+            else
+  #endif
+              RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
             RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
                                     MDNode::get(C, None));
 
@@ -458,7 +603,7 @@ bool AFLCoverage::runOnModule(Module &M) {
 #endif
         PrevLocTrans = PrevLoc;
 
-      if (ctx_str)
+      if (instrument_ctx)
         PrevLocTrans =
             IRB.CreateZExt(IRB.CreateXor(PrevLocTrans, PrevCtx), Int32Ty);
       else
@@ -538,19 +683,27 @@ bool AFLCoverage::runOnModule(Module &M) {
 
         Store = IRB.CreateStore(ConstantInt::get(Int32Ty, cur_loc >> 1),
                                 AFLPrevLoc);
+        Store->setMetadata(M.getMDKindID("nosanitize"), MDNode::get(C, None));
 
       }
 
       // in CTX mode we have to restore the original context for the caller -
       // she might be calling other functions which need the correct CTX.
       // Currently this is only needed for the Ubuntu clang-6.0 bug
-      if (ctx_str && has_calls) {
+      if (instrument_ctx && has_calls) {
 
         Instruction *Inst = BB.getTerminator();
         if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst)) {
 
           IRBuilder<> Post_IRB(Inst);
-          StoreInst * RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
+
+          StoreInst *RestoreCtx;
+#ifdef AFL_HAVE_VECTOR_INTRINSICS
+          if (ctx_k)
+            RestoreCtx = IRB.CreateStore(PrevCaller, AFLPrevCaller);
+          else
+#endif
+            RestoreCtx = Post_IRB.CreateStore(PrevCtx, AFLContext);
           RestoreCtx->setMetadata(M.getMDKindID("nosanitize"),
                                   MDNode::get(C, None));
 
diff --git a/instrumentation/cmplog-instructions-pass.cc b/instrumentation/cmplog-instructions-pass.cc
index b5cc1882..ad334d3b 100644
--- a/instrumentation/cmplog-instructions-pass.cc
+++ b/instrumentation/cmplog-instructions-pass.cc
@@ -19,12 +19,13 @@
 #include <stdlib.h>
 #include <unistd.h>
 
+#include <iostream>
 #include <list>
 #include <string>
 #include <fstream>
 #include <sys/time.h>
-#include "llvm/Config/llvm-config.h"
 
+#include "llvm/Config/llvm-config.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -265,13 +266,20 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
       unsigned int  max_size = Val->getType()->getIntegerBitWidth(), cast_size;
       unsigned char do_cast = 0;
 
-      if (!SI->getNumCases() || max_size < 16 || max_size % 8) {
+      if (!SI->getNumCases() || max_size < 16) {
 
         // if (!be_quiet) errs() << "skip trivial switch..\n";
         continue;
 
       }
 
+      if (max_size % 8) {
+
+        max_size = (((max_size / 8) + 1) * 8);
+        do_cast = 1;
+
+      }
+
       IRBuilder<> IRB(SI->getParent());
       IRB.SetInsertPoint(SI);
 
@@ -310,36 +318,8 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
 
       if (do_cast) {
 
-        ConstantInt *cint = dyn_cast<ConstantInt>(Val);
-        if (cint) {
-
-          uint64_t val = cint->getZExtValue();
-          // fprintf(stderr, "ConstantInt: %lu\n", val);
-          switch (cast_size) {
-
-            case 8:
-              CompareTo = ConstantInt::get(Int8Ty, val);
-              break;
-            case 16:
-              CompareTo = ConstantInt::get(Int16Ty, val);
-              break;
-            case 32:
-              CompareTo = ConstantInt::get(Int32Ty, val);
-              break;
-            case 64:
-              CompareTo = ConstantInt::get(Int64Ty, val);
-              break;
-            case 128:
-              CompareTo = ConstantInt::get(Int128Ty, val);
-              break;
-
-          }
-
-        } else {
-
-          CompareTo = IRB.CreateBitCast(Val, IntegerType::get(C, cast_size));
-
-        }
+        CompareTo =
+            IRB.CreateIntCast(CompareTo, IntegerType::get(C, cast_size), false);
 
       }
 
@@ -361,27 +341,8 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
 
           if (do_cast) {
 
-            uint64_t val = cint->getZExtValue();
-            // fprintf(stderr, "ConstantInt: %lu\n", val);
-            switch (cast_size) {
-
-              case 8:
-                new_param = ConstantInt::get(Int8Ty, val);
-                break;
-              case 16:
-                new_param = ConstantInt::get(Int16Ty, val);
-                break;
-              case 32:
-                new_param = ConstantInt::get(Int32Ty, val);
-                break;
-              case 64:
-                new_param = ConstantInt::get(Int64Ty, val);
-                break;
-              case 128:
-                new_param = ConstantInt::get(Int128Ty, val);
-                break;
-
-            }
+            new_param =
+                IRB.CreateIntCast(cint, IntegerType::get(C, cast_size), false);
 
           }
 
@@ -457,7 +418,7 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
       IntegerType *        intTyOp0 = NULL;
       IntegerType *        intTyOp1 = NULL;
       unsigned             max_size = 0, cast_size = 0;
-      unsigned char        attr = 0, do_cast = 0;
+      unsigned char        attr = 0;
       std::vector<Value *> args;
 
       CmpInst *cmpInst = dyn_cast<CmpInst>(selectcmpInst);
@@ -523,7 +484,6 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
           max_size = 128;
 
         attr += 8;
-        do_cast = 1;
 
       } else {
 
@@ -540,7 +500,9 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
 
       }
 
-      if (!max_size || max_size % 8 || max_size < 16) { continue; }
+      if (!max_size || max_size < 16) { continue; }
+
+      if (max_size % 8) { max_size = (((max_size / 8) + 1) * 8); }
 
       if (max_size > 128) {
 
@@ -553,7 +515,6 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
         }
 
         max_size = 128;
-        do_cast = 1;
 
       }
 
@@ -569,92 +530,30 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
           break;
         default:
           cast_size = 128;
-          do_cast = 1;
 
       }
 
-      if (do_cast) {
-
-        // F*cking LLVM optimized out any kind of bitcasts of ConstantInt values
-        // creating illegal calls. WTF. So we have to work around this.
-
-        ConstantInt *cint = dyn_cast<ConstantInt>(op0);
-        if (cint) {
-
-          uint64_t val = cint->getZExtValue();
-          // fprintf(stderr, "ConstantInt: %lu\n", val);
-          ConstantInt *new_param = NULL;
-          switch (cast_size) {
-
-            case 8:
-              new_param = ConstantInt::get(Int8Ty, val);
-              break;
-            case 16:
-              new_param = ConstantInt::get(Int16Ty, val);
-              break;
-            case 32:
-              new_param = ConstantInt::get(Int32Ty, val);
-              break;
-            case 64:
-              new_param = ConstantInt::get(Int64Ty, val);
-              break;
-            case 128:
-              new_param = ConstantInt::get(Int128Ty, val);
-              break;
-
-          }
-
-          if (!new_param) { continue; }
-          args.push_back(new_param);
-
-        } else {
-
-          Value *V0 = IRB.CreateBitCast(op0, IntegerType::get(C, cast_size));
-          args.push_back(V0);
-
-        }
-
-        cint = dyn_cast<ConstantInt>(op1);
-        if (cint) {
-
-          uint64_t     val = cint->getZExtValue();
-          ConstantInt *new_param = NULL;
-          switch (cast_size) {
-
-            case 8:
-              new_param = ConstantInt::get(Int8Ty, val);
-              break;
-            case 16:
-              new_param = ConstantInt::get(Int16Ty, val);
-              break;
-            case 32:
-              new_param = ConstantInt::get(Int32Ty, val);
-              break;
-            case 64:
-              new_param = ConstantInt::get(Int64Ty, val);
-              break;
-            case 128:
-              new_param = ConstantInt::get(Int128Ty, val);
-              break;
-
-          }
-
-          if (!new_param) { continue; }
-          args.push_back(new_param);
-
-        } else {
-
-          Value *V1 = IRB.CreateBitCast(op1, IntegerType::get(C, cast_size));
-          args.push_back(V1);
-
-        }
-
-      } else {
-
-        args.push_back(op0);
-        args.push_back(op1);
-
-      }
+      // errs() << "[CMPLOG] cmp  " << *cmpInst << "(in function " <<
+      // cmpInst->getFunction()->getName() << ")\n";
+
+      // first bitcast to integer type of the same bitsize as the original
+      // type (this is a nop, if already integer)
+      Value *op0_i = IRB.CreateBitCast(
+          op0, IntegerType::get(C, op0->getType()->getPrimitiveSizeInBits()));
+      // then create a int cast, which does zext, trunc or bitcast. In our case
+      // usually zext to the next larger supported type (this is a nop if
+      // already the right type)
+      Value *V0 =
+          IRB.CreateIntCast(op0_i, IntegerType::get(C, cast_size), false);
+      args.push_back(V0);
+      Value *op1_i = IRB.CreateBitCast(
+          op1, IntegerType::get(C, op1->getType()->getPrimitiveSizeInBits()));
+      Value *V1 =
+          IRB.CreateIntCast(op1_i, IntegerType::get(C, cast_size), false);
+      args.push_back(V1);
+
+      // errs() << "[CMPLOG] casted parameters:\n0: " << *V0 << "\n1: " << *V1
+      // << "\n";
 
       ConstantInt *attribute = ConstantInt::get(Int8Ty, attr);
       args.push_back(attribute);
@@ -667,7 +566,7 @@ bool CmpLogInstructions::hookInstrs(Module &M) {
       }
 
       // fprintf(stderr, "_ExtInt(%u) castTo %u with attr %u didcast %u\n",
-      //         max_size, cast_size, attr, do_cast);
+      //         max_size, cast_size, attr);
 
       switch (cast_size) {
 
diff --git a/instrumentation/compare-transform-pass.so.cc b/instrumentation/compare-transform-pass.so.cc
index da5cf7e9..3ecba4e6 100644
--- a/instrumentation/compare-transform-pass.so.cc
+++ b/instrumentation/compare-transform-pass.so.cc
@@ -229,9 +229,9 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
                           dyn_cast<ConstantDataArray>(Var->getInitializer())) {
 
                     HasStr2 = true;
-                    Str2 = Array->getAsString();
+                    Str2 = Array->getRawDataValues();
                     valueMap[Str2P] = new std::string(Str2.str());
-                    fprintf(stderr, "glo2 %s\n", Str2.str().c_str());
+                    // fprintf(stderr, "glo2 %s\n", Str2.str().c_str());
 
                   }
 
@@ -254,7 +254,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
                             Var->getInitializer())) {
 
                       HasStr1 = true;
-                      Str1 = Array->getAsString();
+                      Str1 = Array->getRawDataValues();
                       valueMap[Str1P] = new std::string(Str1.str());
                       // fprintf(stderr, "glo1 %s\n", Str1.str().c_str());
 
@@ -316,7 +316,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
               uint64_t len = ilen->getZExtValue();
               // if len is zero this is a pointless call but allow real
               // implementation to worry about that
-              if (!len) continue;
+              if (len < 2) continue;
 
               if (isMemcmp) {
 
@@ -362,19 +362,22 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
     bool        HasStr1 = getConstantStringInfo(Str1P, Str1);
     bool        HasStr2 = getConstantStringInfo(Str2P, Str2);
     uint64_t    constStrLen, unrollLen, constSizedLen = 0;
-    bool        isMemcmp =
-        !callInst->getCalledFunction()->getName().compare(StringRef("memcmp"));
-    bool isSizedcmp = isMemcmp ||
-                      !callInst->getCalledFunction()->getName().compare(
-                          StringRef("strncmp")) ||
-                      !callInst->getCalledFunction()->getName().compare(
-                          StringRef("strncasecmp"));
+    bool        isMemcmp = false;
+    bool        isSizedcmp = false;
+    bool        isCaseInsensitive = false;
+    Function *  Callee = callInst->getCalledFunction();
+    if (Callee) {
+
+      isMemcmp = Callee->getName().compare("memcmp") == 0;
+      isSizedcmp = isMemcmp || Callee->getName().compare("strncmp") == 0 ||
+                   Callee->getName().compare("strncasecmp") == 0;
+      isCaseInsensitive = Callee->getName().compare("strcasecmp") == 0 ||
+                          Callee->getName().compare("strncasecmp") == 0;
+
+    }
+
     Value *sizedValue = isSizedcmp ? callInst->getArgOperand(2) : NULL;
     bool   isConstSized = sizedValue && isa<ConstantInt>(sizedValue);
-    bool isCaseInsensitive = !callInst->getCalledFunction()->getName().compare(
-                                 StringRef("strcasecmp")) ||
-                             !callInst->getCalledFunction()->getName().compare(
-                                 StringRef("strncasecmp"));
 
     if (!(HasStr1 || HasStr2)) {
 
@@ -391,7 +394,7 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
         if (val && !val->empty()) {
 
           Str2 = StringRef(*val);
-          HasStr2 = true;
+          // HasStr2 = true;
 
         }
 
@@ -417,15 +420,29 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
 
     }
 
+    if (TmpConstStr.length() < 2 ||
+        (TmpConstStr.length() == 2 && !TmpConstStr[1])) {
+
+      continue;
+
+    }
+
     // add null termination character implicit in c strings
-    TmpConstStr.append("\0", 1);
+    if (!isMemcmp && TmpConstStr[TmpConstStr.length() - 1]) {
+
+      TmpConstStr.append("\0", 1);
+
+    }
 
     // in the unusual case the const str has embedded null
     // characters, the string comparison functions should terminate
     // at the first null
-    if (!isMemcmp)
+    if (!isMemcmp) {
+
       TmpConstStr.assign(TmpConstStr, 0, TmpConstStr.find('\0') + 1);
 
+    }
+
     constStrLen = TmpConstStr.length();
     // prefer use of StringRef (in comparison to std::string a StringRef has
     // built-in runtime bounds checking, which makes debugging easier)
@@ -436,15 +453,6 @@ bool CompareTransform::transformCmps(Module &M, const bool processStrcmp,
     else
       unrollLen = constStrLen;
 
-    /*
-        if (!be_quiet)
-          errs() << callInst->getCalledFunction()->getName() << ": unroll len "
-                 << unrollLen
-                 << ((isSizedcmp && !isConstSized) ? ", variable n" : "") << ":
-       "
-                 << ConstStr << "\n";
-    */
-
     /* split before the call instruction */
     BasicBlock *bb = callInst->getParent();
     BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(callInst));
diff --git a/instrumentation/llvm-ngram-coverage.h b/instrumentation/llvm-alternative-coverage.h
index 666839c8..0d7b3957 100644
--- a/instrumentation/llvm-ngram-coverage.h
+++ b/instrumentation/llvm-alternative-coverage.h
@@ -14,5 +14,8 @@ typedef u64 PREV_LOC_T;
 /* Maximum ngram size */
 #define NGRAM_SIZE_MAX 16U
 
+/* Maximum K for top-K context sensitivity */
+#define CTX_MAX_K 32U
+
 #endif
 
diff --git a/instrumentation/split-compares-pass.so.cc b/instrumentation/split-compares-pass.so.cc
index b6d8c466..b02a89fb 100644
--- a/instrumentation/split-compares-pass.so.cc
+++ b/instrumentation/split-compares-pass.so.cc
@@ -149,8 +149,11 @@ bool SplitComparesTransform::simplifyFPCompares(Module &M) {
     auto op1 = FcmpInst->getOperand(1);
 
     /* find out what the new predicate is going to be */
-    auto               pred = dyn_cast<CmpInst>(FcmpInst)->getPredicate();
+    auto cmp_inst = dyn_cast<CmpInst>(FcmpInst);
+    if (!cmp_inst) { continue; }
+    auto               pred = cmp_inst->getPredicate();
     CmpInst::Predicate new_pred;
+
     switch (pred) {
 
       case CmpInst::FCMP_UGE:
@@ -276,8 +279,11 @@ bool SplitComparesTransform::simplifyCompares(Module &M) {
     auto op1 = IcmpInst->getOperand(1);
 
     /* find out what the new predicate is going to be */
-    auto               pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate();
+    auto cmp_inst = dyn_cast<CmpInst>(IcmpInst);
+    if (!cmp_inst) { continue; }
+    auto               pred = cmp_inst->getPredicate();
     CmpInst::Predicate new_pred;
+
     switch (pred) {
 
       case CmpInst::ICMP_UGE:
@@ -407,12 +413,16 @@ bool SplitComparesTransform::simplifyIntSignedness(Module &M) {
     auto op1 = IcmpInst->getOperand(1);
 
     IntegerType *intTyOp0 = dyn_cast<IntegerType>(op0->getType());
+    if (!intTyOp0) { continue; }
     unsigned     bitw = intTyOp0->getBitWidth();
     IntegerType *IntType = IntegerType::get(C, bitw);
 
     /* get the new predicate */
-    auto               pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate();
+    auto cmp_inst = dyn_cast<CmpInst>(IcmpInst);
+    if (!cmp_inst) { continue; }
+    auto               pred = cmp_inst->getPredicate();
     CmpInst::Predicate new_pred;
+
     if (pred == CmpInst::ICMP_SGT) {
 
       new_pred = CmpInst::ICMP_UGT;
@@ -602,12 +612,16 @@ size_t SplitComparesTransform::splitFPCompares(Module &M) {
     if (op_size != op1->getType()->getPrimitiveSizeInBits()) { continue; }
 
     const unsigned int sizeInBits = op0->getType()->getPrimitiveSizeInBits();
+
+    // BUG FIXME TODO: u64 does not work for > 64 bit ... e.g. 80 and 128 bit
+    if (sizeInBits > 64) { continue; }
+
     const unsigned int precision = sizeInBits == 32    ? 24
                                    : sizeInBits == 64  ? 53
                                    : sizeInBits == 128 ? 113
                                    : sizeInBits == 16  ? 11
-                                                      /* sizeInBits == 80 */
-                                                      : 65;
+                                   : sizeInBits == 80  ? 65
+                                                       : sizeInBits - 8;
 
     const unsigned           shiftR_exponent = precision - 1;
     const unsigned long long mask_fraction =
@@ -1111,7 +1125,9 @@ size_t SplitComparesTransform::splitIntCompares(Module &M, unsigned bitw) {
     auto op0 = IcmpInst->getOperand(0);
     auto op1 = IcmpInst->getOperand(1);
 
-    auto pred = dyn_cast<CmpInst>(IcmpInst)->getPredicate();
+    auto cmp_inst = dyn_cast<CmpInst>(IcmpInst);
+    if (!cmp_inst) { continue; }
+    auto pred = cmp_inst->getPredicate();
 
     BasicBlock *end_bb = bb->splitBasicBlock(BasicBlock::iterator(IcmpInst));
 
@@ -1300,12 +1316,9 @@ bool SplitComparesTransform::runOnModule(Module &M) {
 
     case 64:
       count += splitIntCompares(M, bitw);
-      /*
-            if (!be_quiet)
-              errs() << "Split-integer-compare-pass " << bitw << "bit: " <<
-         count
-                     << " split\n";
-      */
+      if (debug)
+        errs() << "Split-integer-compare-pass " << bitw << "bit: " << count
+               << " split\n";
       bitw >>= 1;
 #if LLVM_VERSION_MAJOR > 3 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
@@ -1313,12 +1326,9 @@ bool SplitComparesTransform::runOnModule(Module &M) {
 #endif
     case 32:
       count += splitIntCompares(M, bitw);
-      /*
-            if (!be_quiet)
-              errs() << "Split-integer-compare-pass " << bitw << "bit: " <<
-         count
-                     << " split\n";
-      */
+      if (debug)
+        errs() << "Split-integer-compare-pass " << bitw << "bit: " << count
+               << " split\n";
       bitw >>= 1;
 #if LLVM_VERSION_MAJOR > 3 || \
     (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR > 7)
@@ -1326,13 +1336,10 @@ bool SplitComparesTransform::runOnModule(Module &M) {
 #endif
     case 16:
       count += splitIntCompares(M, bitw);
-      /*
-            if (!be_quiet)
-              errs() << "Split-integer-compare-pass " << bitw << "bit: " <<
-         count
-                     << " split\n";
-      */
-      bitw >>= 1;
+      if (debug)
+        errs() << "Split-integer-compare-pass " << bitw << "bit: " << count
+               << " split\n";
+      // bitw >>= 1;
       break;
 
     default:
diff --git a/qemu_mode/QEMUAFL_VERSION b/qemu_mode/QEMUAFL_VERSION
index 97184973..8d95c359 100644
--- a/qemu_mode/QEMUAFL_VERSION
+++ b/qemu_mode/QEMUAFL_VERSION
@@ -1 +1 @@
-6ab6bf28de
+ddc4a9748d
diff --git a/qemu_mode/README.md b/qemu_mode/README.md
index 9818846d..a14cbe64 100644
--- a/qemu_mode/README.md
+++ b/qemu_mode/README.md
@@ -17,7 +17,7 @@ The idea and much of the initial implementation comes from Andrew Griffiths.
 The actual implementation on current QEMU (shipped as qemuafl) is from
 Andrea Fioraldi. Special thanks to abiondo that re-enabled TCG chaining.
 
-## 2) How to use
+## 2) How to use qemu_mode
 
 The feature is implemented with a patched QEMU. The simplest way
 to build it is to run ./build_qemu_support.sh. The script will download,
@@ -99,6 +99,13 @@ Just set AFL_QEMU_INST_RANGES=A,B,C...
 The format of the items in the list is either a range of addresses like 0x123-0x321
 or a module name like module.so (that is matched in the mapped object filename).
 
+Alternatively you can tell QEMU to ignore part of an address space for instrumentation.
+
+Just set AFL_QEMU_EXCLUDE_RANGES=A,B,C...
+
+The format of the items on the list is the same as for AFL_QEMU_INST_RANGES, and excluding ranges
+takes priority over any included ranges or AFL_INST_LIBS.
+
 ## 7) CompareCoverage
 
 CompareCoverage is a sub-instrumentation with effects similar to laf-intel.
@@ -176,7 +183,12 @@ Comparative measurements of execution speed or instrumentation coverage will be
 fairly meaningless if the optimization levels or instrumentation scopes don't
 match.
 
-## 12) Gotchas, feedback, bugs
+## 12) Other features
+
+With `AFL_QEMU_FORCE_DFL` you force QEMU to ignore the registered signal
+handlers of the target.
+
+## 13) Gotchas, feedback, bugs
 
 If you need to fix up checksums or do other cleanup on mutated test cases, see
 utils/custom_mutators/ for a viable solution.
@@ -197,19 +209,12 @@ with -march=core2, can help.
 Beyond that, this is an early-stage mechanism, so fields reports are welcome.
 You can send them to <afl-users@googlegroups.com>.
 
-## 13) Alternatives: static rewriting
+## 14) Alternatives: static rewriting
 
 Statically rewriting binaries just once, instead of attempting to translate
 them at run time, can be a faster alternative. That said, static rewriting is
 fraught with peril, because it depends on being able to properly and fully model
 program control flow without actually executing each and every code path.
 
-The best implementation is this one:
-
-  https://github.com/vanhauser-thc/afl-dyninst
-
-The issue however is Dyninst which is not rewriting the binaries so that
-they run stable. A lot of crashes happen, especially in C++ programs that
-use throw/catch. Try it first, and if it works for you be happy as it is
-2-3x as fast as qemu_mode, however usually not as fast as QEMU persistent mode.
-
+Checkout the "Fuzzing binary-only targets" section in our main README.md and
+the docs/binaryonly_fuzzing.md document for more information and hints.
diff --git a/qemu_mode/build_qemu_support.sh b/qemu_mode/build_qemu_support.sh
index a435f6f6..38085389 100755
--- a/qemu_mode/build_qemu_support.sh
+++ b/qemu_mode/build_qemu_support.sh
@@ -59,51 +59,11 @@ if [ ! -f "../afl-showmap" ]; then
 
 fi
 
-PREREQ_NOTFOUND=
-for i in git wget sha384sum bison flex iconv patch pkg-config; do
-
-  T=`command -v "$i" 2>/dev/null`
-
-  if [ "$T" = "" ]; then
-
-    echo "[-] Error: '$i' not found, please install first."
-    PREREQ_NOTFOUND=1
-
-  fi
-
-done
-
-PYTHONBIN=`command -v python3 || command -v python || command -v python2`
-
-if [ "$PYTHONBIN" = "" ]; then
-  echo "[-] Error: 'python' not found, please install using 'sudo apt install python3'."
-  PREREQ_NOTFOUND=1
-fi
-
-
-if [ ! -d "/usr/include/glib-2.0/" -a ! -d "/usr/local/include/glib-2.0/" ]; then
-
-  echo "[-] Error: devel version of 'glib2' not found, please install first."
-  PREREQ_NOTFOUND=1
-
-fi
-
-if [ ! -d "/usr/include/pixman-1/" -a ! -d "/usr/local/include/pixman-1/" ]; then
-
-  echo "[-] Error: devel version of 'pixman-1' not found, please install first."
-  PREREQ_NOTFOUND=1
-
-fi
-
 if echo "$CC" | grep -qF /afl-; then
 
   echo "[-] Error: do not use afl-gcc or afl-clang to compile this tool."
-  PREREQ_NOTFOUND=1
-
-fi
-
-if [ "$PREREQ_NOTFOUND" = "1" ]; then
   exit 1
+
 fi
 
 echo "[+] All checks passed!"
@@ -131,9 +91,13 @@ test -d qemuafl || { echo "[-] Not checked out, please install git or check your
 echo "[+] Got qemuafl."
 
 cd "qemuafl" || exit 1
-echo "[*] Checking out $QEMUAFL_VERSION"
-sh -c 'git stash && git stash drop' 1>/dev/null 2>/dev/null
-git checkout "$QEMUAFL_VERSION" || echo Warning: could not check out to commit $QEMUAFL_VERSION
+if [ -n "$NO_CHECKOUT" ]; then
+  echo "[*] Skipping checkout to $QEMUAFL_VERSION"
+else
+  echo "[*] Checking out $QEMUAFL_VERSION"
+  sh -c 'git stash' 1>/dev/null 2>/dev/null
+  git checkout "$QEMUAFL_VERSION" || echo Warning: could not check out to commit $QEMUAFL_VERSION
+fi
 
 echo "[*] Making sure imported headers matches"
 cp "../../include/config.h" "./qemuafl/imported/" || exit 1
@@ -233,15 +197,13 @@ QEMU_CONF_FLAGS=" \
   --disable-xen \
   --disable-xen-pci-passthrough \
   --disable-xfsctl \
-  --enable-pie \
-  --python=${PYTHONBIN} \
   --target-list="${CPU_TARGET}-linux-user" \
   --without-default-devices \
   "
 
 if [ -n "${CROSS_PREFIX}" ]; then
 
-  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} --cross-prefix=${CROSS_PREFIX}"
+  QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS --cross-prefix=$CROSS_PREFIX"
 
 fi
 
@@ -249,10 +211,15 @@ if [ "$STATIC" = "1" ]; then
 
   echo Building STATIC binary
 
-  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} \
+  QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS \
     --static \
     --extra-cflags=-DAFL_QEMU_STATIC_BUILD=1 \
     "
+
+else
+
+  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} --enable-pie "
+
 fi
 
 if [ "$DEBUG" = "1" ]; then
@@ -262,7 +229,7 @@ if [ "$DEBUG" = "1" ]; then
   # --enable-gcov might go here but incurs a mesonbuild error on meson
   # versions prior to 0.56:
   # https://github.com/qemu/meson/commit/903d5dd8a7dc1d6f8bef79e66d6ebc07c
-  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} \
+  QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS \
     --disable-strip \
     --enable-debug \
     --enable-debug-info \
@@ -275,7 +242,7 @@ if [ "$DEBUG" = "1" ]; then
 
 else
 
-  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} \
+  QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS \
     --disable-debug-info \
     --disable-debug-mutex \
     --disable-debug-tcg \
@@ -290,7 +257,7 @@ if [ "$PROFILING" = "1" ]; then
 
   echo Building PROFILED binary
 
-  QEMU_CONF_FLAGS="${QEMU_CONF_FLAGS} \
+  QEMU_CONF_FLAGS="$QEMU_CONF_FLAGS \
     --enable-gprof \
     --enable-profiler \
     "
@@ -298,7 +265,7 @@ if [ "$PROFILING" = "1" ]; then
 fi
 
 # shellcheck disable=SC2086
-./configure ${QEMU_CONF_FLAGS} || exit 1
+./configure $QEMU_CONF_FLAGS || exit 1
 
 echo "[+] Configuration complete."
 
@@ -364,18 +331,46 @@ ORIG_CROSS="$CROSS"
 
 if [ "$ORIG_CROSS" = "" ]; then
   CROSS=$CPU_TARGET-linux-gnu-gcc
+  if ! command -v "$CROSS" > /dev/null
+  then # works on Arch Linux
+    CROSS=$CPU_TARGET-pc-linux-gnu-gcc
+  fi
+  if ! command -v "$CROSS" > /dev/null && [ "$CPU_TARGET" = "i386" ]
+  then
+    CROSS=i686-linux-gnu-gcc
+    if ! command -v "$CROSS" > /dev/null
+    then # works on Arch Linux
+      CROSS=i686-pc-linux-gnu-gcc
+    fi
+    if ! command -v "$CROSS" > /dev/null && [ "`uname -m`" = "x86_64" ]
+    then # set -m32
+      test "$CC" = "" && CC="gcc"
+      CROSS="$CC"
+      CROSS_FLAGS=-m32
+    fi
+  fi
 fi
 
-if ! command -v "$CROSS" > /dev/null
-then
+if ! command -v "$CROSS" > /dev/null ; then
+  if [ "$CPU_TARGET" = "$(uname -m)" ] ; then
+    echo "[+] Building afl++ qemu support libraries with CC=$CC"
+    echo "[+] Building libcompcov ..."
+    make -C libcompcov && echo "[+] libcompcov ready"
+    echo "[+] Building unsigaction ..."
+    make -C unsigaction && echo "[+] unsigaction ready"
+    echo "[+] Building libqasan ..."
+    make -C libqasan && echo "[+] unsigaction ready"
+  else
     echo "[!] Cross compiler $CROSS could not be found, cannot compile libcompcov libqasan and unsigaction"
+  fi
 else
+  echo "[+] Building afl++ qemu support libraries with CC=\"$CROSS $CROSS_FLAGS\""
   echo "[+] Building libcompcov ..."
-  make -C libcompcov CC=$CROSS && echo "[+] libcompcov ready"
+  make -C libcompcov CC="$CROSS $CROSS_FLAGS" && echo "[+] libcompcov ready"
   echo "[+] Building unsigaction ..."
-  make -C unsigaction CC=$CROSS && echo "[+] unsigaction ready"
+  make -C unsigaction CC="$CROSS $CROSS_FLAGS" && echo "[+] unsigaction ready"
   echo "[+] Building libqasan ..."
-  make -C libqasan CC=$CROSS && echo "[+] unsigaction ready"
+  make -C libqasan CC="$CROSS $CROSS_FLAGS" && echo "[+] unsigaction ready"
 fi
 
 echo "[+] All done for qemu_mode, enjoy!"
diff --git a/qemu_mode/libcompcov/libcompcov.so.c b/qemu_mode/libcompcov/libcompcov.so.c
index 23f465a4..4fc84e62 100644
--- a/qemu_mode/libcompcov/libcompcov.so.c
+++ b/qemu_mode/libcompcov/libcompcov.so.c
@@ -29,6 +29,8 @@
 #include <sys/types.h>
 #include <sys/shm.h>
 #include <stdbool.h>
+#include <stdint.h>
+#include <inttypes.h>
 
 #include "types.h"
 #include "config.h"
@@ -159,14 +161,15 @@ static void __compcov_load(void) {
 
 }
 
-static void __compcov_trace(u64 cur_loc, const u8 *v0, const u8 *v1, size_t n) {
+static void __compcov_trace(uintptr_t cur_loc, const u8 *v0, const u8 *v1,
+                            size_t n) {
 
   size_t i;
 
   if (debug_fd != 1) {
 
     char debugbuf[4096];
-    snprintf(debugbuf, sizeof(debugbuf), "0x%llx %s %s %zu\n", cur_loc,
+    snprintf(debugbuf, sizeof(debugbuf), "0x%" PRIxPTR " %s %s %zu\n", cur_loc,
              v0 == NULL ? "(null)" : (char *)v0,
              v1 == NULL ? "(null)" : (char *)v1, n);
     write(debug_fd, debugbuf, strlen(debugbuf));
@@ -206,7 +209,7 @@ int strcmp(const char *str1, const char *str2) {
 
     if (n <= MAX_CMP_LENGTH) {
 
-      u64 cur_loc = (u64)retaddr;
+      uintptr_t cur_loc = (uintptr_t)retaddr;
       cur_loc = (cur_loc >> 4) ^ (cur_loc << 8);
       cur_loc &= MAP_SIZE - 1;
 
@@ -235,7 +238,7 @@ int strncmp(const char *str1, const char *str2, size_t len) {
 
     if (n <= MAX_CMP_LENGTH) {
 
-      u64 cur_loc = (u64)retaddr;
+      uintptr_t cur_loc = (uintptr_t)retaddr;
       cur_loc = (cur_loc >> 4) ^ (cur_loc << 8);
       cur_loc &= MAP_SIZE - 1;
 
@@ -265,7 +268,7 @@ int strcasecmp(const char *str1, const char *str2) {
 
     if (n <= MAX_CMP_LENGTH) {
 
-      u64 cur_loc = (u64)retaddr;
+      uintptr_t cur_loc = (uintptr_t)retaddr;
       cur_loc = (cur_loc >> 4) ^ (cur_loc << 8);
       cur_loc &= MAP_SIZE - 1;
 
@@ -296,7 +299,7 @@ int strncasecmp(const char *str1, const char *str2, size_t len) {
 
     if (n <= MAX_CMP_LENGTH) {
 
-      u64 cur_loc = (u64)retaddr;
+      uintptr_t cur_loc = (uintptr_t)retaddr;
       cur_loc = (cur_loc >> 4) ^ (cur_loc << 8);
       cur_loc &= MAP_SIZE - 1;
 
@@ -324,7 +327,7 @@ int memcmp(const void *mem1, const void *mem2, size_t len) {
 
     if (n <= MAX_CMP_LENGTH) {
 
-      u64 cur_loc = (u64)retaddr;
+      uintptr_t cur_loc = (uintptr_t)retaddr;
       cur_loc = (cur_loc >> 4) ^ (cur_loc << 8);
       cur_loc &= MAP_SIZE - 1;
 
diff --git a/qemu_mode/libqasan/README.md b/qemu_mode/libqasan/README.md
index b5c77044..4a241233 100644
--- a/qemu_mode/libqasan/README.md
+++ b/qemu_mode/libqasan/README.md
@@ -4,16 +4,25 @@ This library is the injected runtime used by QEMU AddressSanitizer (QASan).
 
 The original repository is [here](https://github.com/andreafioraldi/qasan).
 
-The version embedded in qemuafl is an updated version of just the usermode part and this runtime in injected via LD_PRELOAD (so works just for dynamically linked binaries).
+The version embedded in qemuafl is an updated version of just the usermode part
+and this runtime is injected via LD_PRELOAD (so works just for dynamically
+linked binaries).
 
-The usage is super simple, just set the env var `AFL_USE_QASAN=1` when fuzzing in qemu mode (-Q). afl-fuzz will automatically set AFL_PRELOAD to load this library and enable the QASan instrumentation in afl-qemu-trace.
+The usage is super simple, just set the env var `AFL_USE_QASAN=1` when fuzzing
+in qemu mode (-Q). afl-fuzz will automatically set AFL_PRELOAD to load this
+library and enable the QASan instrumentation in afl-qemu-trace.
 
-For debugging purposes, we still suggest to run the original QASan as the stacktrace support for ARM (just a debug feature, it does not affect the bug finding capabilities during fuzzing) is WIP.
+For debugging purposes, we still suggest to run the original QASan as the
+stacktrace support for ARM (just a debug feature, it does not affect the bug
+finding capabilities during fuzzing) is WIP.
 
-### When I should use QASan?
+### When should I use QASan?
 
-If your target binary is PIC x86_64, you should before give a try to [retrowrite](https://github.com/HexHive/retrowrite) for static rewriting.
+If your target binary is PIC x86_64, you should also give a try to
+[retrowrite](https://github.com/HexHive/retrowrite) for static rewriting.
 
-If it fails, or if your binary is for another architecture, or you want to use persistent and snapshot mdoe, AFL++ QASan mode is what you want/have to use.
+If it fails, or if your binary is for another architecture, or you want to use
+persistent and snapshot mode, AFL++ QASan mode is what you want/have to use.
 
-Note that the overhead of libdislocator when combined with QEMU mode is much lower but it can catch less bugs. This is a short blanket, take your choice.
+Note that the overhead of libdislocator when combined with QEMU mode is much
+lower but it can catch less bugs. This is a short blanket, take your choice.
diff --git a/qemu_mode/libqasan/dlmalloc.c b/qemu_mode/libqasan/dlmalloc.c
index 39ca4301..aff58ad5 100644
--- a/qemu_mode/libqasan/dlmalloc.c
+++ b/qemu_mode/libqasan/dlmalloc.c
@@ -1,3 +1,7 @@
+#include <features.h>
+
+#ifndef __GLIBC__
+
 /*
   This is a version (aka dlmalloc) of malloc/free/realloc written by
   Doug Lea and released to the public domain, as explained at
@@ -524,202 +528,203 @@ MAX_RELEASE_CHECK_RATE   default: 4095 unless not HAVE_MMAP
   improvement at the expense of carrying around more memory.
 */
 
-#define USE_DL_PREFIX
-
-/* Version identifier to allow people to support multiple versions */
-#ifndef DLMALLOC_VERSION
-  #define DLMALLOC_VERSION 20806
-#endif                                                  /* DLMALLOC_VERSION */
-
-#ifndef DLMALLOC_EXPORT
-  #define DLMALLOC_EXPORT extern
-#endif
-
-#ifndef WIN32
-  #ifdef _WIN32
-    #define WIN32 1
-  #endif                                                          /* _WIN32 */
-  #ifdef _WIN32_WCE
-    #define LACKS_FCNTL_H
-    #define WIN32 1
-  #endif                                                      /* _WIN32_WCE */
-#endif                                                             /* WIN32 */
-#ifdef WIN32
-  #define WIN32_LEAN_AND_MEAN
-  #include <windows.h>
-  #include <tchar.h>
-  #define HAVE_MMAP 1
-  #define HAVE_MORECORE 0
-  #define LACKS_UNISTD_H
-  #define LACKS_SYS_PARAM_H
-  #define LACKS_SYS_MMAN_H
-  #define LACKS_STRING_H
-  #define LACKS_STRINGS_H
-  #define LACKS_SYS_TYPES_H
-  #define LACKS_ERRNO_H
-  #define LACKS_SCHED_H
-  #ifndef MALLOC_FAILURE_ACTION
-    #define MALLOC_FAILURE_ACTION
-  #endif                                           /* MALLOC_FAILURE_ACTION */
-  #ifndef MMAP_CLEARS
-    #ifdef _WIN32_WCE                    /* WINCE reportedly does not clear */
-      #define MMAP_CLEARS 0
-    #else
-      #define MMAP_CLEARS 1
-    #endif                                                    /* _WIN32_WCE */
-  #endif                                                      /*MMAP_CLEARS */
-#endif                                                             /* WIN32 */
+  #define USE_DL_PREFIX
 
-#if defined(DARWIN) || defined(_DARWIN)
-  /* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
-  #ifndef HAVE_MORECORE
-    #define HAVE_MORECORE 0
+  /* Version identifier to allow people to support multiple versions */
+  #ifndef DLMALLOC_VERSION
+    #define DLMALLOC_VERSION 20806
+  #endif                                                /* DLMALLOC_VERSION */
+
+  #ifndef DLMALLOC_EXPORT
+    #define DLMALLOC_EXPORT extern
+  #endif
+
+  #ifndef WIN32
+    #ifdef _WIN32
+      #define WIN32 1
+    #endif                                                        /* _WIN32 */
+    #ifdef _WIN32_WCE
+      #define LACKS_FCNTL_H
+      #define WIN32 1
+    #endif                                                    /* _WIN32_WCE */
+  #endif                                                           /* WIN32 */
+  #ifdef WIN32
+    #define WIN32_LEAN_AND_MEAN
+    #include <windows.h>
+    #include <tchar.h>
     #define HAVE_MMAP 1
-    /* OSX allocators provide 16 byte alignment */
-    #ifndef MALLOC_ALIGNMENT
-      #define MALLOC_ALIGNMENT ((size_t)16U)
-    #endif
-  #endif                                                   /* HAVE_MORECORE */
-#endif                                                            /* DARWIN */
-
-#ifndef LACKS_SYS_TYPES_H
-  #include <sys/types.h>                                      /* For size_t */
-#endif                                                 /* LACKS_SYS_TYPES_H */
-
-/* The maximum possible size_t value has all bits set */
-#define MAX_SIZE_T (~(size_t)0)
-
-#ifndef USE_LOCKS             /* ensure true if spin or recursive locks set */
-  #define USE_LOCKS                                      \
-    ((defined(USE_SPIN_LOCKS) && USE_SPIN_LOCKS != 0) || \
-     (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0))
-#endif                                                         /* USE_LOCKS */
-
-#if USE_LOCKS   /* Spin locks for gcc >= 4.1, older gcc on x86, MSC >= 1310 */
-  #if ((defined(__GNUC__) &&                                         \
-        ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) || \
-         defined(__i386__) || defined(__x86_64__))) ||               \
-       (defined(_MSC_VER) && _MSC_VER >= 1310))
-    #ifndef USE_SPIN_LOCKS
-      #define USE_SPIN_LOCKS 1
-    #endif                                                /* USE_SPIN_LOCKS */
-  #elif USE_SPIN_LOCKS
-    #error "USE_SPIN_LOCKS defined without implementation"
-  #endif                                          /* ... locks available... */
-#elif !defined(USE_SPIN_LOCKS)
-  #define USE_SPIN_LOCKS 0
-#endif                                                         /* USE_LOCKS */
-
-#ifndef ONLY_MSPACES
-  #define ONLY_MSPACES 0
-#endif                                                      /* ONLY_MSPACES */
-#ifndef MSPACES
-  #if ONLY_MSPACES
-    #define MSPACES 1
-  #else                                                     /* ONLY_MSPACES */
-    #define MSPACES 0
-  #endif                                                    /* ONLY_MSPACES */
-#endif                                                           /* MSPACES */
-#ifndef MALLOC_ALIGNMENT
-  #define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
-#endif                                                  /* MALLOC_ALIGNMENT */
-#ifndef FOOTERS
-  #define FOOTERS 0
-#endif                                                           /* FOOTERS */
-#ifndef ABORT
-  #define ABORT abort()
-#endif                                                             /* ABORT */
-#ifndef ABORT_ON_ASSERT_FAILURE
-  #define ABORT_ON_ASSERT_FAILURE 1
-#endif                                           /* ABORT_ON_ASSERT_FAILURE */
-#ifndef PROCEED_ON_ERROR
-  #define PROCEED_ON_ERROR 0
-#endif                                                  /* PROCEED_ON_ERROR */
-
-#ifndef INSECURE
-  #define INSECURE 0
-#endif                                                          /* INSECURE */
-#ifndef MALLOC_INSPECT_ALL
-  #define MALLOC_INSPECT_ALL 0
-#endif                                                /* MALLOC_INSPECT_ALL */
-#ifndef HAVE_MMAP
-  #define HAVE_MMAP 1
-#endif                                                         /* HAVE_MMAP */
-#ifndef MMAP_CLEARS
-  #define MMAP_CLEARS 1
-#endif                                                       /* MMAP_CLEARS */
-#ifndef HAVE_MREMAP
-  #ifdef linux
-    #define HAVE_MREMAP 1
-    #define _GNU_SOURCE                     /* Turns on mremap() definition */
-  #else                                                            /* linux */
-    #define HAVE_MREMAP 0
-  #endif                                                           /* linux */
-#endif                                                       /* HAVE_MREMAP */
-#ifndef MALLOC_FAILURE_ACTION
-  #define MALLOC_FAILURE_ACTION errno = ENOMEM;
-#endif                                             /* MALLOC_FAILURE_ACTION */
-#ifndef HAVE_MORECORE
-  #if ONLY_MSPACES
     #define HAVE_MORECORE 0
-  #else                                                     /* ONLY_MSPACES */
-    #define HAVE_MORECORE 1
+    #define LACKS_UNISTD_H
+    #define LACKS_SYS_PARAM_H
+    #define LACKS_SYS_MMAN_H
+    #define LACKS_STRING_H
+    #define LACKS_STRINGS_H
+    #define LACKS_SYS_TYPES_H
+    #define LACKS_ERRNO_H
+    #define LACKS_SCHED_H
+    #ifndef MALLOC_FAILURE_ACTION
+      #define MALLOC_FAILURE_ACTION
+    #endif                                         /* MALLOC_FAILURE_ACTION */
+    #ifndef MMAP_CLEARS
+      #ifdef _WIN32_WCE                  /* WINCE reportedly does not clear */
+        #define MMAP_CLEARS 0
+      #else
+        #define MMAP_CLEARS 1
+      #endif                                                  /* _WIN32_WCE */
+    #endif                                                    /*MMAP_CLEARS */
+  #endif                                                           /* WIN32 */
+
+  #if defined(DARWIN) || defined(_DARWIN)
+    /* Mac OSX docs advise not to use sbrk; it seems better to use mmap */
+    #ifndef HAVE_MORECORE
+      #define HAVE_MORECORE 0
+      #define HAVE_MMAP 1
+      /* OSX allocators provide 16 byte alignment */
+      #ifndef MALLOC_ALIGNMENT
+        #define MALLOC_ALIGNMENT ((size_t)16U)
+      #endif
+    #endif                                                 /* HAVE_MORECORE */
+  #endif                                                          /* DARWIN */
+
+  #ifndef LACKS_SYS_TYPES_H
+    #include <sys/types.h>                                    /* For size_t */
+  #endif                                               /* LACKS_SYS_TYPES_H */
+
+  /* The maximum possible size_t value has all bits set */
+  #define MAX_SIZE_T (~(size_t)0)
+
+  #ifndef USE_LOCKS           /* ensure true if spin or recursive locks set */
+    #define USE_LOCKS                                      \
+      ((defined(USE_SPIN_LOCKS) && USE_SPIN_LOCKS != 0) || \
+       (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0))
+  #endif                                                       /* USE_LOCKS */
+
+  #if USE_LOCKS /* Spin locks for gcc >= 4.1, older gcc on x86, MSC >= 1310 */
+    #if ((defined(__GNUC__) &&                                         \
+          ((__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) || \
+           defined(__i386__) || defined(__x86_64__))) ||               \
+         (defined(_MSC_VER) && _MSC_VER >= 1310))
+      #ifndef USE_SPIN_LOCKS
+        #define USE_SPIN_LOCKS 1
+      #endif                                              /* USE_SPIN_LOCKS */
+    #elif USE_SPIN_LOCKS
+      #error "USE_SPIN_LOCKS defined without implementation"
+    #endif                                        /* ... locks available... */
+  #elif !defined(USE_SPIN_LOCKS)
+    #define USE_SPIN_LOCKS 0
+  #endif                                                       /* USE_LOCKS */
+
+  #ifndef ONLY_MSPACES
+    #define ONLY_MSPACES 0
   #endif                                                    /* ONLY_MSPACES */
-#endif                                                     /* HAVE_MORECORE */
-#if !HAVE_MORECORE
-  #define MORECORE_CONTIGUOUS 0
-#else                                                     /* !HAVE_MORECORE */
-  #define MORECORE_DEFAULT sbrk
-  #ifndef MORECORE_CONTIGUOUS
-    #define MORECORE_CONTIGUOUS 1
-  #endif                                             /* MORECORE_CONTIGUOUS */
-#endif                                                     /* HAVE_MORECORE */
-#ifndef DEFAULT_GRANULARITY
-  #if (MORECORE_CONTIGUOUS || defined(WIN32))
-    #define DEFAULT_GRANULARITY (0)   /* 0 means to compute in init_mparams */
-  #else                                              /* MORECORE_CONTIGUOUS */
-    #define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
-  #endif                                             /* MORECORE_CONTIGUOUS */
-#endif                                               /* DEFAULT_GRANULARITY */
-#ifndef DEFAULT_TRIM_THRESHOLD
-  #ifndef MORECORE_CANNOT_TRIM
-    #define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
-  #else                                             /* MORECORE_CANNOT_TRIM */
-    #define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
-  #endif                                            /* MORECORE_CANNOT_TRIM */
-#endif                                            /* DEFAULT_TRIM_THRESHOLD */
-#ifndef DEFAULT_MMAP_THRESHOLD
-  #if HAVE_MMAP
-    #define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
-  #else                                                        /* HAVE_MMAP */
-    #define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
-  #endif                                                       /* HAVE_MMAP */
-#endif                                            /* DEFAULT_MMAP_THRESHOLD */
-#ifndef MAX_RELEASE_CHECK_RATE
-  #if HAVE_MMAP
-    #define MAX_RELEASE_CHECK_RATE 4095
-  #else
-    #define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
+  #ifndef MSPACES
+    #if ONLY_MSPACES
+      #define MSPACES 1
+    #else                                                   /* ONLY_MSPACES */
+      #define MSPACES 0
+    #endif                                                  /* ONLY_MSPACES */
+  #endif                                                         /* MSPACES */
+  #ifndef MALLOC_ALIGNMENT
+    #define MALLOC_ALIGNMENT ((size_t)(2 * sizeof(void *)))
+  #endif                                                /* MALLOC_ALIGNMENT */
+  #ifndef FOOTERS
+    #define FOOTERS 0
+  #endif                                                         /* FOOTERS */
+  #ifndef ABORT
+    #define ABORT abort()
+  #endif                                                           /* ABORT */
+  #ifndef ABORT_ON_ASSERT_FAILURE
+    #define ABORT_ON_ASSERT_FAILURE 1
+  #endif                                         /* ABORT_ON_ASSERT_FAILURE */
+  #ifndef PROCEED_ON_ERROR
+    #define PROCEED_ON_ERROR 0
+  #endif                                                /* PROCEED_ON_ERROR */
+
+  #ifndef INSECURE
+    #define INSECURE 0
+  #endif                                                        /* INSECURE */
+  #ifndef MALLOC_INSPECT_ALL
+    #define MALLOC_INSPECT_ALL 0
+  #endif                                              /* MALLOC_INSPECT_ALL */
+  #ifndef HAVE_MMAP
+    #define HAVE_MMAP 1
   #endif                                                       /* HAVE_MMAP */
-#endif                                            /* MAX_RELEASE_CHECK_RATE */
-#ifndef USE_BUILTIN_FFS
-  #define USE_BUILTIN_FFS 0
-#endif                                                   /* USE_BUILTIN_FFS */
-#ifndef USE_DEV_RANDOM
-  #define USE_DEV_RANDOM 0
-#endif                                                    /* USE_DEV_RANDOM */
-#ifndef NO_MALLINFO
-  #define NO_MALLINFO 0
-#endif                                                       /* NO_MALLINFO */
-#ifndef MALLINFO_FIELD_TYPE
-  #define MALLINFO_FIELD_TYPE size_t
-#endif                                               /* MALLINFO_FIELD_TYPE */
-#ifndef NO_MALLOC_STATS
-  #define NO_MALLOC_STATS 0
-#endif                                                   /* NO_MALLOC_STATS */
-#ifndef NO_SEGMENT_TRAVERSAL
-  #define NO_SEGMENT_TRAVERSAL 0
-#endif                                              /* NO_SEGMENT_TRAVERSAL */
+  #ifndef MMAP_CLEARS
+    #define MMAP_CLEARS 1
+  #endif                                                     /* MMAP_CLEARS */
+  #ifndef HAVE_MREMAP
+    #ifdef linux
+      #define HAVE_MREMAP 1
+      #define _GNU_SOURCE                   /* Turns on mremap() definition */
+    #else                                                          /* linux */
+      #define HAVE_MREMAP 0
+    #endif                                                         /* linux */
+  #endif                                                     /* HAVE_MREMAP */
+  #ifndef MALLOC_FAILURE_ACTION
+    #define MALLOC_FAILURE_ACTION errno = ENOMEM;
+  #endif                                           /* MALLOC_FAILURE_ACTION */
+  #ifndef HAVE_MORECORE
+    #if ONLY_MSPACES
+      #define HAVE_MORECORE 0
+    #else                                                   /* ONLY_MSPACES */
+      #define HAVE_MORECORE 1
+    #endif                                                  /* ONLY_MSPACES */
+  #endif                                                   /* HAVE_MORECORE */
+  #if !HAVE_MORECORE
+    #define MORECORE_CONTIGUOUS 0
+  #else                                                   /* !HAVE_MORECORE */
+    #define MORECORE_DEFAULT sbrk
+    #ifndef MORECORE_CONTIGUOUS
+      #define MORECORE_CONTIGUOUS 1
+    #endif                                           /* MORECORE_CONTIGUOUS */
+  #endif                                                   /* HAVE_MORECORE */
+  #ifndef DEFAULT_GRANULARITY
+    #if (MORECORE_CONTIGUOUS || defined(WIN32))
+      #define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */
+    #else                                            /* MORECORE_CONTIGUOUS */
+      #define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U)
+    #endif                                           /* MORECORE_CONTIGUOUS */
+  #endif                                             /* DEFAULT_GRANULARITY */
+  #ifndef DEFAULT_TRIM_THRESHOLD
+    #ifndef MORECORE_CANNOT_TRIM
+      #define DEFAULT_TRIM_THRESHOLD \
+        ((size_t)2U * (size_t)1024U * (size_t)1024U)
+    #else                                           /* MORECORE_CANNOT_TRIM */
+      #define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T
+    #endif                                          /* MORECORE_CANNOT_TRIM */
+  #endif                                          /* DEFAULT_TRIM_THRESHOLD */
+  #ifndef DEFAULT_MMAP_THRESHOLD
+    #if HAVE_MMAP
+      #define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U)
+    #else                                                      /* HAVE_MMAP */
+      #define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T
+    #endif                                                     /* HAVE_MMAP */
+  #endif                                          /* DEFAULT_MMAP_THRESHOLD */
+  #ifndef MAX_RELEASE_CHECK_RATE
+    #if HAVE_MMAP
+      #define MAX_RELEASE_CHECK_RATE 4095
+    #else
+      #define MAX_RELEASE_CHECK_RATE MAX_SIZE_T
+    #endif                                                     /* HAVE_MMAP */
+  #endif                                          /* MAX_RELEASE_CHECK_RATE */
+  #ifndef USE_BUILTIN_FFS
+    #define USE_BUILTIN_FFS 0
+  #endif                                                 /* USE_BUILTIN_FFS */
+  #ifndef USE_DEV_RANDOM
+    #define USE_DEV_RANDOM 0
+  #endif                                                  /* USE_DEV_RANDOM */
+  #ifndef NO_MALLINFO
+    #define NO_MALLINFO 0
+  #endif                                                     /* NO_MALLINFO */
+  #ifndef MALLINFO_FIELD_TYPE
+    #define MALLINFO_FIELD_TYPE size_t
+  #endif                                             /* MALLINFO_FIELD_TYPE */
+  #ifndef NO_MALLOC_STATS
+    #define NO_MALLOC_STATS 0
+  #endif                                                 /* NO_MALLOC_STATS */
+  #ifndef NO_SEGMENT_TRAVERSAL
+    #define NO_SEGMENT_TRAVERSAL 0
+  #endif                                            /* NO_SEGMENT_TRAVERSAL */
 
 /*
   mallopt tuning options.  SVID/XPG defines four standard parameter
@@ -728,48 +733,48 @@ MAX_RELEASE_CHECK_RATE   default: 4095 unless not HAVE_MMAP
   malloc does support the following options.
 */
 
-#undef M_TRIM_THRESHOLD
-#undef M_GRANULARITY
-#undef M_MMAP_THRESHOLD
-#define M_TRIM_THRESHOLD (-1)
-#define M_GRANULARITY (-2)
-#define M_MMAP_THRESHOLD (-3)
+  #undef M_TRIM_THRESHOLD
+  #undef M_GRANULARITY
+  #undef M_MMAP_THRESHOLD
+  #define M_TRIM_THRESHOLD (-1)
+  #define M_GRANULARITY (-2)
+  #define M_MMAP_THRESHOLD (-3)
 
 /* ------------------------ Mallinfo declarations ------------------------ */
 
-#if !NO_MALLINFO
-/*
-  This version of malloc supports the standard SVID/XPG mallinfo
-  routine that returns a struct containing usage properties and
-  statistics. It should work on any system that has a
-  /usr/include/malloc.h defining struct mallinfo.  The main
-  declaration needed is the mallinfo struct that is returned (by-copy)
-  by mallinfo().  The malloinfo struct contains a bunch of fields that
-  are not even meaningful in this version of malloc.  These fields are
-  are instead filled by mallinfo() with other numbers that might be of
-  interest.
-
-  HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
-  /usr/include/malloc.h file that includes a declaration of struct
-  mallinfo.  If so, it is included; else a compliant version is
-  declared below.  These must be precisely the same for mallinfo() to
-  work.  The original SVID version of this struct, defined on most
-  systems with mallinfo, declares all fields as ints. But some others
-  define as unsigned long. If your system defines the fields using a
-  type of different width than listed here, you MUST #include your
-  system version and #define HAVE_USR_INCLUDE_MALLOC_H.
-*/
+  #if !NO_MALLINFO
+  /*
+    This version of malloc supports the standard SVID/XPG mallinfo
+    routine that returns a struct containing usage properties and
+    statistics. It should work on any system that has a
+    /usr/include/malloc.h defining struct mallinfo.  The main
+    declaration needed is the mallinfo struct that is returned (by-copy)
+    by mallinfo().  The malloinfo struct contains a bunch of fields that
+    are not even meaningful in this version of malloc.  These fields are
+    are instead filled by mallinfo() with other numbers that might be of
+    interest.
+
+    HAVE_USR_INCLUDE_MALLOC_H should be set if you have a
+    /usr/include/malloc.h file that includes a declaration of struct
+    mallinfo.  If so, it is included; else a compliant version is
+    declared below.  These must be precisely the same for mallinfo() to
+    work.  The original SVID version of this struct, defined on most
+    systems with mallinfo, declares all fields as ints. But some others
+    define as unsigned long. If your system defines the fields using a
+    type of different width than listed here, you MUST #include your
+    system version and #define HAVE_USR_INCLUDE_MALLOC_H.
+  */
 
-/* #define HAVE_USR_INCLUDE_MALLOC_H */
+  /* #define HAVE_USR_INCLUDE_MALLOC_H */
 
-  #ifdef HAVE_USR_INCLUDE_MALLOC_H
-    #include "/usr/include/malloc.h"
-  #else                                        /* HAVE_USR_INCLUDE_MALLOC_H */
-    #ifndef STRUCT_MALLINFO_DECLARED
-      /* HP-UX (and others?) redefines mallinfo unless _STRUCT_MALLINFO is
-       * defined */
-      #define _STRUCT_MALLINFO
-      #define STRUCT_MALLINFO_DECLARED 1
+    #ifdef HAVE_USR_INCLUDE_MALLOC_H
+      #include "/usr/include/malloc.h"
+    #else                                      /* HAVE_USR_INCLUDE_MALLOC_H */
+      #ifndef STRUCT_MALLINFO_DECLARED
+        /* HP-UX (and others?) redefines mallinfo unless _STRUCT_MALLINFO is
+         * defined */
+        #define _STRUCT_MALLINFO
+        #define STRUCT_MALLINFO_DECLARED 1
 struct mallinfo {
 
   MALLINFO_FIELD_TYPE arena;     /* non-mmapped space allocated from system */
@@ -785,71 +790,71 @@ struct mallinfo {
 
 };
 
-    #endif                                      /* STRUCT_MALLINFO_DECLARED */
-  #endif                                       /* HAVE_USR_INCLUDE_MALLOC_H */
-#endif                                                       /* NO_MALLINFO */
+      #endif                                    /* STRUCT_MALLINFO_DECLARED */
+    #endif                                     /* HAVE_USR_INCLUDE_MALLOC_H */
+  #endif                                                     /* NO_MALLINFO */
 
 /*
   Try to persuade compilers to inline. The most critical functions for
   inlining are defined as macros, so these aren't used for them.
 */
 
-#ifndef FORCEINLINE
-  #if defined(__GNUC__)
-    #define FORCEINLINE __inline __attribute__((always_inline))
-  #elif defined(_MSC_VER)
-    #define FORCEINLINE __forceinline
+  #ifndef FORCEINLINE
+    #if defined(__GNUC__)
+      #define FORCEINLINE __inline __attribute__((always_inline))
+    #elif defined(_MSC_VER)
+      #define FORCEINLINE __forceinline
+    #endif
   #endif
-#endif
-#ifndef NOINLINE
-  #if defined(__GNUC__)
-    #define NOINLINE __attribute__((noinline))
-  #elif defined(_MSC_VER)
-    #define NOINLINE __declspec(noinline)
-  #else
-    #define NOINLINE
+  #ifndef NOINLINE
+    #if defined(__GNUC__)
+      #define NOINLINE __attribute__((noinline))
+    #elif defined(_MSC_VER)
+      #define NOINLINE __declspec(noinline)
+    #else
+      #define NOINLINE
+    #endif
   #endif
-#endif
 
-#ifdef __cplusplus
+  #ifdef __cplusplus
 extern "C" {
 
+    #ifndef FORCEINLINE
+      #define FORCEINLINE inline
+    #endif
+  #endif                                                     /* __cplusplus */
   #ifndef FORCEINLINE
-    #define FORCEINLINE inline
+    #define FORCEINLINE
   #endif
-#endif                                                       /* __cplusplus */
-#ifndef FORCEINLINE
-  #define FORCEINLINE
-#endif
-
-#if !ONLY_MSPACES
-
-/* ------------------- Declarations of public routines ------------------- */
-
-  #ifndef USE_DL_PREFIX
-    #define dlcalloc calloc
-    #define dlfree free
-    #define dlmalloc malloc
-    #define dlmemalign memalign
-    #define dlposix_memalign posix_memalign
-    #define dlrealloc realloc
-    #define dlrealloc_in_place realloc_in_place
-    #define dlvalloc valloc
-    #define dlpvalloc pvalloc
-    #define dlmallinfo mallinfo
-    #define dlmallopt mallopt
-    #define dlmalloc_trim malloc_trim
-    #define dlmalloc_stats malloc_stats
-    #define dlmalloc_usable_size malloc_usable_size
-    #define dlmalloc_footprint malloc_footprint
-    #define dlmalloc_max_footprint malloc_max_footprint
-    #define dlmalloc_footprint_limit malloc_footprint_limit
-    #define dlmalloc_set_footprint_limit malloc_set_footprint_limit
-    #define dlmalloc_inspect_all malloc_inspect_all
-    #define dlindependent_calloc independent_calloc
-    #define dlindependent_comalloc independent_comalloc
-    #define dlbulk_free bulk_free
-  #endif                                                   /* USE_DL_PREFIX */
+
+  #if !ONLY_MSPACES
+
+  /* ------------------- Declarations of public routines ------------------- */
+
+    #ifndef USE_DL_PREFIX
+      #define dlcalloc calloc
+      #define dlfree free
+      #define dlmalloc malloc
+      #define dlmemalign memalign
+      #define dlposix_memalign posix_memalign
+      #define dlrealloc realloc
+      #define dlrealloc_in_place realloc_in_place
+      #define dlvalloc valloc
+      #define dlpvalloc pvalloc
+      #define dlmallinfo mallinfo
+      #define dlmallopt mallopt
+      #define dlmalloc_trim malloc_trim
+      #define dlmalloc_stats malloc_stats
+      #define dlmalloc_usable_size malloc_usable_size
+      #define dlmalloc_footprint malloc_footprint
+      #define dlmalloc_max_footprint malloc_max_footprint
+      #define dlmalloc_footprint_limit malloc_footprint_limit
+      #define dlmalloc_set_footprint_limit malloc_set_footprint_limit
+      #define dlmalloc_inspect_all malloc_inspect_all
+      #define dlindependent_calloc independent_calloc
+      #define dlindependent_comalloc independent_comalloc
+      #define dlbulk_free bulk_free
+    #endif                                                 /* USE_DL_PREFIX */
 
 /*
   malloc(size_t n)
@@ -1026,7 +1031,7 @@ DLMALLOC_EXPORT size_t dlmalloc_footprint_limit();
 */
 DLMALLOC_EXPORT size_t dlmalloc_set_footprint_limit(size_t bytes);
 
-  #if MALLOC_INSPECT_ALL
+    #if MALLOC_INSPECT_ALL
 /*
   malloc_inspect_all(void(*handler)(void *start,
                                     void *end,
@@ -1062,9 +1067,9 @@ DLMALLOC_EXPORT void dlmalloc_inspect_all(void (*handler)(void *, void *,
                                                           size_t, void *),
                                           void *arg);
 
-  #endif                                              /* MALLOC_INSPECT_ALL */
+    #endif                                            /* MALLOC_INSPECT_ALL */
 
-  #if !NO_MALLINFO
+    #if !NO_MALLINFO
 /*
   mallinfo()
   Returns (by copy) a struct containing various summary statistics:
@@ -1088,7 +1093,7 @@ DLMALLOC_EXPORT void dlmalloc_inspect_all(void (*handler)(void *, void *,
   thus be inaccurate.
 */
 DLMALLOC_EXPORT struct mallinfo dlmallinfo(void);
-  #endif                                                     /* NO_MALLINFO */
+    #endif                                                   /* NO_MALLINFO */
 
 /*
   independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
@@ -1282,9 +1287,9 @@ DLMALLOC_EXPORT void dlmalloc_stats(void);
 */
 size_t dlmalloc_usable_size(void *);
 
-#endif                                                      /* ONLY_MSPACES */
+  #endif                                                    /* ONLY_MSPACES */
 
-#if MSPACES
+  #if MSPACES
 
 /*
   mspace is an opaque type representing an independent
@@ -1408,13 +1413,13 @@ DLMALLOC_EXPORT size_t mspace_footprint(mspace msp);
 */
 DLMALLOC_EXPORT size_t mspace_max_footprint(mspace msp);
 
-  #if !NO_MALLINFO
+    #if !NO_MALLINFO
 /*
   mspace_mallinfo behaves as mallinfo, but reports properties of
   the given space.
 */
 DLMALLOC_EXPORT struct mallinfo mspace_mallinfo(mspace msp);
-  #endif                                                     /* NO_MALLINFO */
+    #endif                                                   /* NO_MALLINFO */
 
 /*
   malloc_usable_size(void* p) behaves the same as malloc_usable_size;
@@ -1438,13 +1443,13 @@ DLMALLOC_EXPORT int mspace_trim(mspace msp, size_t pad);
 */
 DLMALLOC_EXPORT int mspace_mallopt(int, int);
 
-#endif                                                           /* MSPACES */
+  #endif                                                         /* MSPACES */
 
-#ifdef __cplusplus
+  #ifdef __cplusplus
 
 }                                                      /* end of extern "C" */
 
-#endif                                                       /* __cplusplus */
+  #endif                                                     /* __cplusplus */
 
 /*
   ========================================================================
@@ -1458,169 +1463,170 @@ DLMALLOC_EXPORT int mspace_mallopt(int, int);
 
 /*------------------------------ internal #includes ---------------------- */
 
-#ifdef _MSC_VER
-  #pragma warning(disable : 4146)                 /* no "unsigned" warnings */
-#endif                                                          /* _MSC_VER */
-#if !NO_MALLOC_STATS
-  #include <stdio.h>                        /* for printing in malloc_stats */
-#endif                                                   /* NO_MALLOC_STATS */
-#ifndef LACKS_ERRNO_H
-  #include <errno.h>                           /* for MALLOC_FAILURE_ACTION */
-#endif                                                     /* LACKS_ERRNO_H */
-#ifdef DEBUG
-  #if ABORT_ON_ASSERT_FAILURE
-    #undef assert
-    #define assert(x) \
-      if (!(x)) ABORT
-  #else                                          /* ABORT_ON_ASSERT_FAILURE */
-    #include <assert.h>
-  #endif                                         /* ABORT_ON_ASSERT_FAILURE */
-#else                                                              /* DEBUG */
-  #ifndef assert
-    #define assert(x)
-  #endif
-  #define DEBUG 0
-#endif                                                             /* DEBUG */
-#if !defined(WIN32) && !defined(LACKS_TIME_H)
-  #include <time.h>                             /* for magic initialization */
-#endif                                                             /* WIN32 */
-#ifndef LACKS_STDLIB_H
-  #include <stdlib.h>                                        /* for abort() */
-#endif                                                    /* LACKS_STDLIB_H */
-#ifndef LACKS_STRING_H
-  #include <string.h>                                     /* for memset etc */
-#endif                                                    /* LACKS_STRING_H */
-#if USE_BUILTIN_FFS
-  #ifndef LACKS_STRINGS_H
-    #include <strings.h>                                         /* for ffs */
-  #endif                                                 /* LACKS_STRINGS_H */
-#endif                                                   /* USE_BUILTIN_FFS */
-#if HAVE_MMAP
-  #ifndef LACKS_SYS_MMAN_H
-    /* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
-    #if (defined(linux) && !defined(__USE_GNU))
-      #define __USE_GNU 1
-      #include <sys/mman.h>                                     /* for mmap */
-      #undef __USE_GNU
-    #else
-      #include <sys/mman.h>                                     /* for mmap */
-    #endif                                                         /* linux */
-  #endif                                                /* LACKS_SYS_MMAN_H */
-  #ifndef LACKS_FCNTL_H
-    #include <fcntl.h>
-  #endif                                                   /* LACKS_FCNTL_H */
-#endif                                                         /* HAVE_MMAP */
-#ifndef LACKS_UNISTD_H
-  #include <unistd.h>                                  /* for sbrk, sysconf */
-#else                                                     /* LACKS_UNISTD_H */
-  #if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
+  #ifdef _MSC_VER
+    #pragma warning(disable : 4146)               /* no "unsigned" warnings */
+  #endif                                                        /* _MSC_VER */
+  #if !NO_MALLOC_STATS
+    #include <stdio.h>                      /* for printing in malloc_stats */
+  #endif                                                 /* NO_MALLOC_STATS */
+  #ifndef LACKS_ERRNO_H
+    #include <errno.h>                         /* for MALLOC_FAILURE_ACTION */
+  #endif                                                   /* LACKS_ERRNO_H */
+  #ifdef DEBUG
+    #if ABORT_ON_ASSERT_FAILURE
+      #undef assert
+      #define assert(x) \
+        if (!(x)) ABORT
+    #else                                        /* ABORT_ON_ASSERT_FAILURE */
+      #include <assert.h>
+    #endif                                       /* ABORT_ON_ASSERT_FAILURE */
+  #else                                                            /* DEBUG */
+    #ifndef assert
+      #define assert(x)
+    #endif
+    #define DEBUG 0
+  #endif                                                           /* DEBUG */
+  #if !defined(WIN32) && !defined(LACKS_TIME_H)
+    #include <time.h>                           /* for magic initialization */
+  #endif                                                           /* WIN32 */
+  #ifndef LACKS_STDLIB_H
+    #include <stdlib.h>                                      /* for abort() */
+  #endif                                                  /* LACKS_STDLIB_H */
+  #ifndef LACKS_STRING_H
+    #include <string.h>                                   /* for memset etc */
+  #endif                                                  /* LACKS_STRING_H */
+  #if USE_BUILTIN_FFS
+    #ifndef LACKS_STRINGS_H
+      #include <strings.h>                                       /* for ffs */
+    #endif                                               /* LACKS_STRINGS_H */
+  #endif                                                 /* USE_BUILTIN_FFS */
+  #if HAVE_MMAP
+    #ifndef LACKS_SYS_MMAN_H
+      /* On some versions of linux, mremap decl in mman.h needs __USE_GNU set */
+      #if (defined(linux) && !defined(__USE_GNU))
+        #define __USE_GNU 1
+        #include <sys/mman.h>                                   /* for mmap */
+        #undef __USE_GNU
+      #else
+        #include <sys/mman.h>                                   /* for mmap */
+      #endif                                                       /* linux */
+    #endif                                              /* LACKS_SYS_MMAN_H */
+    #ifndef LACKS_FCNTL_H
+      #include <fcntl.h>
+    #endif                                                 /* LACKS_FCNTL_H */
+  #endif                                                       /* HAVE_MMAP */
+  #ifndef LACKS_UNISTD_H
+    #include <unistd.h>                                /* for sbrk, sysconf */
+  #else                                                   /* LACKS_UNISTD_H */
+    #if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__)
 extern void *sbrk(ptrdiff_t);
-  #endif                                                     /* FreeBSD etc */
-#endif                                                    /* LACKS_UNISTD_H */
+    #endif                                                   /* FreeBSD etc */
+  #endif                                                  /* LACKS_UNISTD_H */
 
-/* Declarations for locking */
-#if USE_LOCKS
-  #ifndef WIN32
-    #if defined(__SVR4) && defined(__sun)                        /* solaris */
-      #include <thread.h>
-    #elif !defined(LACKS_SCHED_H)
-      #include <sched.h>
-    #endif                                      /* solaris or LACKS_SCHED_H */
-    #if (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0) || \
-        !USE_SPIN_LOCKS
-      #include <pthread.h>
-    #endif                                       /* USE_RECURSIVE_LOCKS ... */
-  #elif defined(_MSC_VER)
-    #ifndef _M_AMD64
-      /* These are already defined on AMD64 builds */
-      #ifdef __cplusplus
+  /* Declarations for locking */
+  #if USE_LOCKS
+    #ifndef WIN32
+      #if defined(__SVR4) && defined(__sun)                      /* solaris */
+        #include <thread.h>
+      #elif !defined(LACKS_SCHED_H)
+        #include <sched.h>
+      #endif                                    /* solaris or LACKS_SCHED_H */
+      #if (defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0) || \
+          !USE_SPIN_LOCKS
+        #include <pthread.h>
+      #endif                                     /* USE_RECURSIVE_LOCKS ... */
+    #elif defined(_MSC_VER)
+      #ifndef _M_AMD64
+        /* These are already defined on AMD64 builds */
+        #ifdef __cplusplus
 extern "C" {
 
-      #endif                                                 /* __cplusplus */
+        #endif                                               /* __cplusplus */
 LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange,
                                          LONG Comp);
 LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value);
-      #ifdef __cplusplus
+        #ifdef __cplusplus
 
 }
 
-      #endif                                                 /* __cplusplus */
-    #endif                                                      /* _M_AMD64 */
-    #pragma intrinsic(_InterlockedCompareExchange)
-    #pragma intrinsic(_InterlockedExchange)
-    #define interlockedcompareexchange _InterlockedCompareExchange
-    #define interlockedexchange _InterlockedExchange
-  #elif defined(WIN32) && defined(__GNUC__)
-    #define interlockedcompareexchange(a, b, c) \
-      __sync_val_compare_and_swap(a, c, b)
-    #define interlockedexchange __sync_lock_test_and_set
-  #endif                                                           /* Win32 */
-#else                                                          /* USE_LOCKS */
-#endif                                                         /* USE_LOCKS */
-
-#ifndef LOCK_AT_FORK
-  #define LOCK_AT_FORK 0
-#endif
-
-/* Declarations for bit scanning on win32 */
-#if defined(_MSC_VER) && _MSC_VER >= 1300
-  #ifndef BitScanForward                 /* Try to avoid pulling in WinNT.h */
-    #ifdef __cplusplus
+        #endif                                               /* __cplusplus */
+      #endif                                                    /* _M_AMD64 */
+      #pragma intrinsic(_InterlockedCompareExchange)
+      #pragma intrinsic(_InterlockedExchange)
+      #define interlockedcompareexchange _InterlockedCompareExchange
+      #define interlockedexchange _InterlockedExchange
+    #elif defined(WIN32) && defined(__GNUC__)
+      #define interlockedcompareexchange(a, b, c) \
+        __sync_val_compare_and_swap(a, c, b)
+      #define interlockedexchange __sync_lock_test_and_set
+    #endif                                                         /* Win32 */
+  #else                                                        /* USE_LOCKS */
+  #endif                                                       /* USE_LOCKS */
+
+  #ifndef LOCK_AT_FORK
+    #define LOCK_AT_FORK 0
+  #endif
+
+  /* Declarations for bit scanning on win32 */
+  #if defined(_MSC_VER) && _MSC_VER >= 1300
+    #ifndef BitScanForward               /* Try to avoid pulling in WinNT.h */
+      #ifdef __cplusplus
 extern "C" {
 
-    #endif                                                   /* __cplusplus */
+      #endif                                                 /* __cplusplus */
 unsigned char _BitScanForward(unsigned long *index, unsigned long mask);
 unsigned char _BitScanReverse(unsigned long *index, unsigned long mask);
-    #ifdef __cplusplus
+      #ifdef __cplusplus
 
 }
 
-    #endif                                                   /* __cplusplus */
+      #endif                                                 /* __cplusplus */
 
-    #define BitScanForward _BitScanForward
-    #define BitScanReverse _BitScanReverse
-    #pragma intrinsic(_BitScanForward)
-    #pragma intrinsic(_BitScanReverse)
-  #endif                                                  /* BitScanForward */
-#endif                               /* defined(_MSC_VER) && _MSC_VER>=1300 */
+      #define BitScanForward _BitScanForward
+      #define BitScanReverse _BitScanReverse
+      #pragma intrinsic(_BitScanForward)
+      #pragma intrinsic(_BitScanReverse)
+    #endif                                                /* BitScanForward */
+  #endif                             /* defined(_MSC_VER) && _MSC_VER>=1300 */
 
-#ifndef WIN32
-  #ifndef malloc_getpagesize
-    #ifdef _SC_PAGESIZE             /* some SVR4 systems omit an underscore */
-      #ifndef _SC_PAGE_SIZE
-        #define _SC_PAGE_SIZE _SC_PAGESIZE
+  #ifndef WIN32
+    #ifndef malloc_getpagesize
+      #ifdef _SC_PAGESIZE           /* some SVR4 systems omit an underscore */
+        #ifndef _SC_PAGE_SIZE
+          #define _SC_PAGE_SIZE _SC_PAGESIZE
+        #endif
       #endif
-    #endif
-    #ifdef _SC_PAGE_SIZE
-      #define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
-    #else
-      #if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
-extern size_t getpagesize();
-        #define malloc_getpagesize getpagesize()
+      #ifdef _SC_PAGE_SIZE
+        #define malloc_getpagesize sysconf(_SC_PAGE_SIZE)
       #else
-        #ifdef WIN32               /* use supplied emulation of getpagesize */
+        #if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE)
+extern size_t getpagesize();
           #define malloc_getpagesize getpagesize()
         #else
-          #ifndef LACKS_SYS_PARAM_H
-            #include <sys/param.h>
-          #endif
-          #ifdef EXEC_PAGESIZE
-            #define malloc_getpagesize EXEC_PAGESIZE
+          #ifdef WIN32             /* use supplied emulation of getpagesize */
+            #define malloc_getpagesize getpagesize()
           #else
-            #ifdef NBPG
-              #ifndef CLSIZE
-                #define malloc_getpagesize NBPG
-              #else
-                #define malloc_getpagesize (NBPG * CLSIZE)
-              #endif
+            #ifndef LACKS_SYS_PARAM_H
+              #include <sys/param.h>
+            #endif
+            #ifdef EXEC_PAGESIZE
+              #define malloc_getpagesize EXEC_PAGESIZE
             #else
-              #ifdef NBPC
-                #define malloc_getpagesize NBPC
+              #ifdef NBPG
+                #ifndef CLSIZE
+                  #define malloc_getpagesize NBPG
+                #else
+                  #define malloc_getpagesize (NBPG * CLSIZE)
+                #endif
               #else
-                #ifdef PAGESIZE
-                  #define malloc_getpagesize PAGESIZE
-                #else                                         /* just guess */
-                  #define malloc_getpagesize ((size_t)4096U)
+                #ifdef NBPC
+                  #define malloc_getpagesize NBPC
+                #else
+                  #ifdef PAGESIZE
+                    #define malloc_getpagesize PAGESIZE
+                  #else                                       /* just guess */
+                    #define malloc_getpagesize ((size_t)4096U)
+                  #endif
                 #endif
               #endif
             #endif
@@ -1629,60 +1635,59 @@ extern size_t getpagesize();
       #endif
     #endif
   #endif
-#endif
 
-/* ------------------- size_t and alignment properties -------------------- */
+  /* ------------------- size_t and alignment properties -------------------- */
 
-/* The byte and bit size of a size_t */
-#define SIZE_T_SIZE (sizeof(size_t))
-#define SIZE_T_BITSIZE (sizeof(size_t) << 3)
+  /* The byte and bit size of a size_t */
+  #define SIZE_T_SIZE (sizeof(size_t))
+  #define SIZE_T_BITSIZE (sizeof(size_t) << 3)
 
-/* Some constants coerced to size_t */
-/* Annoying but necessary to avoid errors on some platforms */
-#define SIZE_T_ZERO ((size_t)0)
-#define SIZE_T_ONE ((size_t)1)
-#define SIZE_T_TWO ((size_t)2)
-#define SIZE_T_FOUR ((size_t)4)
-#define TWO_SIZE_T_SIZES (SIZE_T_SIZE << 1)
-#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE << 2)
-#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES + TWO_SIZE_T_SIZES)
-#define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U)
+  /* Some constants coerced to size_t */
+  /* Annoying but necessary to avoid errors on some platforms */
+  #define SIZE_T_ZERO ((size_t)0)
+  #define SIZE_T_ONE ((size_t)1)
+  #define SIZE_T_TWO ((size_t)2)
+  #define SIZE_T_FOUR ((size_t)4)
+  #define TWO_SIZE_T_SIZES (SIZE_T_SIZE << 1)
+  #define FOUR_SIZE_T_SIZES (SIZE_T_SIZE << 2)
+  #define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES + TWO_SIZE_T_SIZES)
+  #define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U)
 
-/* The bit mask value corresponding to MALLOC_ALIGNMENT */
-#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE)
+  /* The bit mask value corresponding to MALLOC_ALIGNMENT */
+  #define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE)
 
-/* True if address a has acceptable alignment */
-#define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0)
+  /* True if address a has acceptable alignment */
+  #define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0)
 
-/* the number of bytes to offset an address to align it */
-#define align_offset(A)                                         \
-  ((((size_t)(A)&CHUNK_ALIGN_MASK) == 0)                        \
-       ? 0                                                      \
-       : ((MALLOC_ALIGNMENT - ((size_t)(A)&CHUNK_ALIGN_MASK)) & \
-          CHUNK_ALIGN_MASK))
+  /* the number of bytes to offset an address to align it */
+  #define align_offset(A)                                         \
+    ((((size_t)(A)&CHUNK_ALIGN_MASK) == 0)                        \
+         ? 0                                                      \
+         : ((MALLOC_ALIGNMENT - ((size_t)(A)&CHUNK_ALIGN_MASK)) & \
+            CHUNK_ALIGN_MASK))
 
-/* -------------------------- MMAP preliminaries ------------------------- */
+  /* -------------------------- MMAP preliminaries ------------------------- */
 
-/*
-   If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
-   checks to fail so compiler optimizer can delete code rather than
-   using so many "#if"s.
-*/
+  /*
+     If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and
+     checks to fail so compiler optimizer can delete code rather than
+     using so many "#if"s.
+  */
 
-/* MORECORE and MMAP must return MFAIL on failure */
-#define MFAIL ((void *)(MAX_SIZE_T))
-#define CMFAIL ((char *)(MFAIL))                 /* defined for convenience */
+  /* MORECORE and MMAP must return MFAIL on failure */
+  #define MFAIL ((void *)(MAX_SIZE_T))
+  #define CMFAIL ((char *)(MFAIL))               /* defined for convenience */
 
-#if HAVE_MMAP
+  #if HAVE_MMAP
 
-  #ifndef WIN32
-    #define MMAP_PROT (PROT_READ | PROT_WRITE)
-    #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
-      #define MAP_ANONYMOUS MAP_ANON
-    #endif                                                      /* MAP_ANON */
-    #ifdef MAP_ANONYMOUS
+    #ifndef WIN32
+      #define MMAP_PROT (PROT_READ | PROT_WRITE)
+      #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+        #define MAP_ANONYMOUS MAP_ANON
+      #endif                                                    /* MAP_ANON */
+      #ifdef MAP_ANONYMOUS
 
-      #define MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
+        #define MMAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
 
 static FORCEINLINE void *unixmmap(size_t size) {
 
@@ -1706,27 +1711,27 @@ static FORCEINLINE int unixmunmap(void *ptr, size_t size) {
 
 }
 
-      #define MMAP_DEFAULT(s) unixmmap(s)
-      #define MUNMAP_DEFAULT(a, s) unixmunmap((a), (s))
+        #define MMAP_DEFAULT(s) unixmmap(s)
+        #define MUNMAP_DEFAULT(a, s) unixmunmap((a), (s))
 
-    #else                                                  /* MAP_ANONYMOUS */
-      /*
-         Nearly all versions of mmap support MAP_ANONYMOUS, so the following
-         is unlikely to be needed, but is supplied just in case.
-      */
-      #define MMAP_FLAGS (MAP_PRIVATE)
+      #else                                                /* MAP_ANONYMOUS */
+        /*
+           Nearly all versions of mmap support MAP_ANONYMOUS, so the following
+           is unlikely to be needed, but is supplied just in case.
+        */
+        #define MMAP_FLAGS (MAP_PRIVATE)
 static int dev_zero_fd = -1;       /* Cached file descriptor for /dev/zero. */
-      #define MMAP_DEFAULT(s)                                        \
-        ((dev_zero_fd < 0)                                           \
-             ? (dev_zero_fd = open("/dev/zero", O_RDWR),             \
-                mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) \
-             : mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
-      #define MUNMAP_DEFAULT(a, s) munmap((a), (s))
-    #endif                                                 /* MAP_ANONYMOUS */
+        #define MMAP_DEFAULT(s)                                        \
+          ((dev_zero_fd < 0)                                           \
+               ? (dev_zero_fd = open("/dev/zero", O_RDWR),             \
+                  mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) \
+               : mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0))
+        #define MUNMAP_DEFAULT(a, s) munmap((a), (s))
+      #endif                                               /* MAP_ANONYMOUS */
 
-    #define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)
+      #define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s)
 
-  #else                                                            /* WIN32 */
+    #else                                                          /* WIN32 */
 
 /* Win32 MMAP via VirtualAlloc */
 static FORCEINLINE void *win32mmap(size_t size) {
@@ -1775,14 +1780,14 @@ static FORCEINLINE int win32munmap(void *ptr, size_t size) {
 
 }
 
-    #define MMAP_DEFAULT(s) win32mmap(s)
-    #define MUNMAP_DEFAULT(a, s) win32munmap((a), (s))
-    #define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s)
-  #endif                                                           /* WIN32 */
-#endif                                                         /* HAVE_MMAP */
+      #define MMAP_DEFAULT(s) win32mmap(s)
+      #define MUNMAP_DEFAULT(a, s) win32munmap((a), (s))
+      #define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s)
+    #endif                                                         /* WIN32 */
+  #endif                                                       /* HAVE_MMAP */
 
-#if HAVE_MREMAP
-  #ifndef WIN32
+  #if HAVE_MREMAP
+    #ifndef WIN32
 
 static FORCEINLINE void *dlmremap(void *old_address, size_t old_size,
                                   size_t new_size, int flags) {
@@ -1796,75 +1801,75 @@ static FORCEINLINE void *dlmremap(void *old_address, size_t old_size,
 
 }
 
-    #define MREMAP_DEFAULT(addr, osz, nsz, mv) \
-      dlmremap((addr), (osz), (nsz), (mv))
-  #endif                                                           /* WIN32 */
-#endif                                                       /* HAVE_MREMAP */
+      #define MREMAP_DEFAULT(addr, osz, nsz, mv) \
+        dlmremap((addr), (osz), (nsz), (mv))
+    #endif                                                         /* WIN32 */
+  #endif                                                     /* HAVE_MREMAP */
+
+  /**
+   * Define CALL_MORECORE
+   */
+  #if HAVE_MORECORE
+    #ifdef MORECORE
+      #define CALL_MORECORE(S) MORECORE(S)
+    #else                                                       /* MORECORE */
+      #define CALL_MORECORE(S) MORECORE_DEFAULT(S)
+    #endif                                                      /* MORECORE */
+  #else                                                    /* HAVE_MORECORE */
+    #define CALL_MORECORE(S) MFAIL
+  #endif                                                   /* HAVE_MORECORE */
 
-/**
- * Define CALL_MORECORE
- */
-#if HAVE_MORECORE
-  #ifdef MORECORE
-    #define CALL_MORECORE(S) MORECORE(S)
-  #else                                                         /* MORECORE */
-    #define CALL_MORECORE(S) MORECORE_DEFAULT(S)
-  #endif                                                        /* MORECORE */
-#else                                                      /* HAVE_MORECORE */
-  #define CALL_MORECORE(S) MFAIL
-#endif                                                     /* HAVE_MORECORE */
-
-/**
- * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP
- */
-#if HAVE_MMAP
-  #define USE_MMAP_BIT (SIZE_T_ONE)
+  /**
+   * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP
+   */
+  #if HAVE_MMAP
+    #define USE_MMAP_BIT (SIZE_T_ONE)
+
+    #ifdef MMAP
+      #define CALL_MMAP(s) MMAP(s)
+    #else                                                           /* MMAP */
+      #define CALL_MMAP(s) MMAP_DEFAULT(s)
+    #endif                                                          /* MMAP */
+    #ifdef MUNMAP
+      #define CALL_MUNMAP(a, s) MUNMAP((a), (s))
+    #else                                                         /* MUNMAP */
+      #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s))
+    #endif                                                        /* MUNMAP */
+    #ifdef DIRECT_MMAP
+      #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
+    #else                                                    /* DIRECT_MMAP */
+      #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s)
+    #endif                                                   /* DIRECT_MMAP */
+  #else                                                        /* HAVE_MMAP */
+    #define USE_MMAP_BIT (SIZE_T_ZERO)
 
-  #ifdef MMAP
+    #define MMAP(s) MFAIL
+    #define MUNMAP(a, s) (-1)
+    #define DIRECT_MMAP(s) MFAIL
+    #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
     #define CALL_MMAP(s) MMAP(s)
-  #else                                                             /* MMAP */
-    #define CALL_MMAP(s) MMAP_DEFAULT(s)
-  #endif                                                            /* MMAP */
-  #ifdef MUNMAP
     #define CALL_MUNMAP(a, s) MUNMAP((a), (s))
-  #else                                                           /* MUNMAP */
-    #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s))
-  #endif                                                          /* MUNMAP */
-  #ifdef DIRECT_MMAP
-    #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
-  #else                                                      /* DIRECT_MMAP */
-    #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s)
-  #endif                                                     /* DIRECT_MMAP */
-#else                                                          /* HAVE_MMAP */
-  #define USE_MMAP_BIT (SIZE_T_ZERO)
-
-  #define MMAP(s) MFAIL
-  #define MUNMAP(a, s) (-1)
-  #define DIRECT_MMAP(s) MFAIL
-  #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s)
-  #define CALL_MMAP(s) MMAP(s)
-  #define CALL_MUNMAP(a, s) MUNMAP((a), (s))
-#endif                                                         /* HAVE_MMAP */
-
-/**
- * Define CALL_MREMAP
- */
-#if HAVE_MMAP && HAVE_MREMAP
-  #ifdef MREMAP
-    #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
-  #else                                                           /* MREMAP */
-    #define CALL_MREMAP(addr, osz, nsz, mv) \
-      MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
-  #endif                                                          /* MREMAP */
-#else                                           /* HAVE_MMAP && HAVE_MREMAP */
-  #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL
-#endif                                          /* HAVE_MMAP && HAVE_MREMAP */
-
-/* mstate bit set if continguous morecore disabled or failed */
-#define USE_NONCONTIGUOUS_BIT (4U)
-
-/* segment bit set in create_mspace_with_base */
-#define EXTERN_BIT (8U)
+  #endif                                                       /* HAVE_MMAP */
+
+  /**
+   * Define CALL_MREMAP
+   */
+  #if HAVE_MMAP && HAVE_MREMAP
+    #ifdef MREMAP
+      #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv))
+    #else                                                         /* MREMAP */
+      #define CALL_MREMAP(addr, osz, nsz, mv) \
+        MREMAP_DEFAULT((addr), (osz), (nsz), (mv))
+    #endif                                                        /* MREMAP */
+  #else                                         /* HAVE_MMAP && HAVE_MREMAP */
+    #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL
+  #endif                                        /* HAVE_MMAP && HAVE_MREMAP */
+
+  /* mstate bit set if continguous morecore disabled or failed */
+  #define USE_NONCONTIGUOUS_BIT (4U)
+
+  /* segment bit set in create_mspace_with_base */
+  #define EXTERN_BIT (8U)
 
 /* --------------------------- Lock preliminaries ------------------------ */
 
@@ -1896,35 +1901,35 @@ static FORCEINLINE void *dlmremap(void *old_address, size_t old_size,
 
 */
 
-#if !USE_LOCKS
-  #define USE_LOCK_BIT (0U)
-  #define INITIAL_LOCK(l) (0)
-  #define DESTROY_LOCK(l) (0)
-  #define ACQUIRE_MALLOC_GLOBAL_LOCK()
-  #define RELEASE_MALLOC_GLOBAL_LOCK()
-
-#else
-  #if USE_LOCKS > 1
-  /* -----------------------  User-defined locks ------------------------ */
-  /* Define your own lock implementation here */
-  /* #define INITIAL_LOCK(lk)  ... */
-  /* #define DESTROY_LOCK(lk)  ... */
-  /* #define ACQUIRE_LOCK(lk)  ... */
-  /* #define RELEASE_LOCK(lk)  ... */
-  /* #define TRY_LOCK(lk) ... */
-  /* static MLOCK_T malloc_global_mutex = ... */
-
-  #elif USE_SPIN_LOCKS
-
-  /* First, define CAS_LOCK and CLEAR_LOCK on ints */
-  /* Note CAS_LOCK defined to return 0 on success */
-
-    #if defined(__GNUC__) && \
-        (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
-      #define CAS_LOCK(sl) __sync_lock_test_and_set(sl, 1)
-      #define CLEAR_LOCK(sl) __sync_lock_release(sl)
-
-    #elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
+  #if !USE_LOCKS
+    #define USE_LOCK_BIT (0U)
+    #define INITIAL_LOCK(l) (0)
+    #define DESTROY_LOCK(l) (0)
+    #define ACQUIRE_MALLOC_GLOBAL_LOCK()
+    #define RELEASE_MALLOC_GLOBAL_LOCK()
+
+  #else
+    #if USE_LOCKS > 1
+    /* -----------------------  User-defined locks ------------------------ */
+    /* Define your own lock implementation here */
+    /* #define INITIAL_LOCK(lk)  ... */
+    /* #define DESTROY_LOCK(lk)  ... */
+    /* #define ACQUIRE_LOCK(lk)  ... */
+    /* #define RELEASE_LOCK(lk)  ... */
+    /* #define TRY_LOCK(lk) ... */
+    /* static MLOCK_T malloc_global_mutex = ... */
+
+    #elif USE_SPIN_LOCKS
+
+    /* First, define CAS_LOCK and CLEAR_LOCK on ints */
+    /* Note CAS_LOCK defined to return 0 on success */
+
+      #if defined(__GNUC__) && \
+          (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
+        #define CAS_LOCK(sl) __sync_lock_test_and_set(sl, 1)
+        #define CLEAR_LOCK(sl) __sync_lock_release(sl)
+
+      #elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
 /* Custom spin locks for older gcc on x86 */
 static FORCEINLINE int x86_cas_lock(int *sl) {
 
@@ -1951,29 +1956,29 @@ static FORCEINLINE void x86_clear_lock(int *sl) {
 
 }
 
-      #define CAS_LOCK(sl) x86_cas_lock(sl)
-      #define CLEAR_LOCK(sl) x86_clear_lock(sl)
+        #define CAS_LOCK(sl) x86_cas_lock(sl)
+        #define CLEAR_LOCK(sl) x86_clear_lock(sl)
 
-    #else                                                      /* Win32 MSC */
-      #define CAS_LOCK(sl) interlockedexchange((volatile LONG *)sl, (LONG)1)
-      #define CLEAR_LOCK(sl) interlockedexchange((volatile LONG *)sl, (LONG)0)
+      #else                                                    /* Win32 MSC */
+        #define CAS_LOCK(sl) interlockedexchange((volatile LONG *)sl, (LONG)1)
+        #define CLEAR_LOCK(sl) interlockedexchange((volatile LONG *)sl, (LONG)0)
 
-    #endif                                       /* ... gcc spins locks ... */
+      #endif                                     /* ... gcc spins locks ... */
 
-    /* How to yield for a spin lock */
-    #define SPINS_PER_YIELD 63
-    #if defined(_MSC_VER)
-      #define SLEEP_EX_DURATION 50                 /* delay for yield/sleep */
-      #define SPIN_LOCK_YIELD SleepEx(SLEEP_EX_DURATION, FALSE)
-    #elif defined(__SVR4) && defined(__sun)                      /* solaris */
-      #define SPIN_LOCK_YIELD thr_yield();
-    #elif !defined(LACKS_SCHED_H)
-      #define SPIN_LOCK_YIELD sched_yield();
-    #else
-      #define SPIN_LOCK_YIELD
-    #endif                                                 /* ... yield ... */
+      /* How to yield for a spin lock */
+      #define SPINS_PER_YIELD 63
+      #if defined(_MSC_VER)
+        #define SLEEP_EX_DURATION 50               /* delay for yield/sleep */
+        #define SPIN_LOCK_YIELD SleepEx(SLEEP_EX_DURATION, FALSE)
+      #elif defined(__SVR4) && defined(__sun)                    /* solaris */
+        #define SPIN_LOCK_YIELD thr_yield();
+      #elif !defined(LACKS_SCHED_H)
+        #define SPIN_LOCK_YIELD sched_yield();
+      #else
+        #define SPIN_LOCK_YIELD
+      #endif                                               /* ... yield ... */
 
-    #if !defined(USE_RECURSIVE_LOCKS) || USE_RECURSIVE_LOCKS == 0
+      #if !defined(USE_RECURSIVE_LOCKS) || USE_RECURSIVE_LOCKS == 0
 /* Plain spin locks use single word (embedded in malloc_states) */
 static int spin_acquire_lock(int *sl) {
 
@@ -1988,30 +1993,30 @@ static int spin_acquire_lock(int *sl) {
 
 }
 
-      #define MLOCK_T int
-      #define TRY_LOCK(sl) !CAS_LOCK(sl)
-      #define RELEASE_LOCK(sl) CLEAR_LOCK(sl)
-      #define ACQUIRE_LOCK(sl) (CAS_LOCK(sl) ? spin_acquire_lock(sl) : 0)
-      #define INITIAL_LOCK(sl) (*sl = 0)
-      #define DESTROY_LOCK(sl) (0)
+        #define MLOCK_T int
+        #define TRY_LOCK(sl) !CAS_LOCK(sl)
+        #define RELEASE_LOCK(sl) CLEAR_LOCK(sl)
+        #define ACQUIRE_LOCK(sl) (CAS_LOCK(sl) ? spin_acquire_lock(sl) : 0)
+        #define INITIAL_LOCK(sl) (*sl = 0)
+        #define DESTROY_LOCK(sl) (0)
 static MLOCK_T malloc_global_mutex = 0;
 
-    #else                                            /* USE_RECURSIVE_LOCKS */
-      /* types for lock owners */
-      #ifdef WIN32
-        #define THREAD_ID_T DWORD
-        #define CURRENT_THREAD GetCurrentThreadId()
-        #define EQ_OWNER(X, Y) ((X) == (Y))
-      #else
-        /*
-          Note: the following assume that pthread_t is a type that can be
-          initialized to (casted) zero. If this is not the case, you will need
-          to somehow redefine these or not use spin locks.
-        */
-        #define THREAD_ID_T pthread_t
-        #define CURRENT_THREAD pthread_self()
-        #define EQ_OWNER(X, Y) pthread_equal(X, Y)
-      #endif
+      #else                                          /* USE_RECURSIVE_LOCKS */
+        /* types for lock owners */
+        #ifdef WIN32
+          #define THREAD_ID_T DWORD
+          #define CURRENT_THREAD GetCurrentThreadId()
+          #define EQ_OWNER(X, Y) ((X) == (Y))
+        #else
+          /*
+            Note: the following assume that pthread_t is a type that can be
+            initialized to (casted) zero. If this is not the case, you will need
+            to somehow redefine these or not use spin locks.
+          */
+          #define THREAD_ID_T pthread_t
+          #define CURRENT_THREAD pthread_self()
+          #define EQ_OWNER(X, Y) pthread_equal(X, Y)
+        #endif
 
 struct malloc_recursive_lock {
 
@@ -2021,7 +2026,7 @@ struct malloc_recursive_lock {
 
 };
 
-      #define MLOCK_T struct malloc_recursive_lock
+        #define MLOCK_T struct malloc_recursive_lock
 static MLOCK_T malloc_global_mutex = {0, 0, (THREAD_ID_T)0};
 
 static FORCEINLINE void recursive_release_lock(MLOCK_T *lk) {
@@ -2084,23 +2089,23 @@ static FORCEINLINE int recursive_try_lock(MLOCK_T *lk) {
 
 }
 
-      #define RELEASE_LOCK(lk) recursive_release_lock(lk)
-      #define TRY_LOCK(lk) recursive_try_lock(lk)
-      #define ACQUIRE_LOCK(lk) recursive_acquire_lock(lk)
+        #define RELEASE_LOCK(lk) recursive_release_lock(lk)
+        #define TRY_LOCK(lk) recursive_try_lock(lk)
+        #define ACQUIRE_LOCK(lk) recursive_acquire_lock(lk)
+        #define INITIAL_LOCK(lk) \
+          ((lk)->threadid = (THREAD_ID_T)0, (lk)->sl = 0, (lk)->c = 0)
+        #define DESTROY_LOCK(lk) (0)
+      #endif                                         /* USE_RECURSIVE_LOCKS */
+
+    #elif defined(WIN32)                         /* Win32 critical sections */
+      #define MLOCK_T CRITICAL_SECTION
+      #define ACQUIRE_LOCK(lk) (EnterCriticalSection(lk), 0)
+      #define RELEASE_LOCK(lk) LeaveCriticalSection(lk)
+      #define TRY_LOCK(lk) TryEnterCriticalSection(lk)
       #define INITIAL_LOCK(lk) \
-        ((lk)->threadid = (THREAD_ID_T)0, (lk)->sl = 0, (lk)->c = 0)
-      #define DESTROY_LOCK(lk) (0)
-    #endif                                           /* USE_RECURSIVE_LOCKS */
-
-  #elif defined(WIN32)                           /* Win32 critical sections */
-    #define MLOCK_T CRITICAL_SECTION
-    #define ACQUIRE_LOCK(lk) (EnterCriticalSection(lk), 0)
-    #define RELEASE_LOCK(lk) LeaveCriticalSection(lk)
-    #define TRY_LOCK(lk) TryEnterCriticalSection(lk)
-    #define INITIAL_LOCK(lk) \
-      (!InitializeCriticalSectionAndSpinCount((lk), 0x80000000 | 4000))
-    #define DESTROY_LOCK(lk) (DeleteCriticalSection(lk), 0)
-    #define NEED_GLOBAL_LOCK_INIT
+        (!InitializeCriticalSectionAndSpinCount((lk), 0x80000000 | 4000))
+      #define DESTROY_LOCK(lk) (DeleteCriticalSection(lk), 0)
+      #define NEED_GLOBAL_LOCK_INIT
 
 static MLOCK_T       malloc_global_mutex;
 static volatile LONG malloc_global_mutex_status;
@@ -2128,23 +2133,24 @@ static void init_malloc_global_mutex() {
 
 }
 
-  #else                                             /* pthreads-based locks */
-    #define MLOCK_T pthread_mutex_t
-    #define ACQUIRE_LOCK(lk) pthread_mutex_lock(lk)
-    #define RELEASE_LOCK(lk) pthread_mutex_unlock(lk)
-    #define TRY_LOCK(lk) (!pthread_mutex_trylock(lk))
-    #define INITIAL_LOCK(lk) pthread_init_lock(lk)
-    #define DESTROY_LOCK(lk) pthread_mutex_destroy(lk)
+    #else                                           /* pthreads-based locks */
+      #define MLOCK_T pthread_mutex_t
+      #define ACQUIRE_LOCK(lk) pthread_mutex_lock(lk)
+      #define RELEASE_LOCK(lk) pthread_mutex_unlock(lk)
+      #define TRY_LOCK(lk) (!pthread_mutex_trylock(lk))
+      #define INITIAL_LOCK(lk) pthread_init_lock(lk)
+      #define DESTROY_LOCK(lk) pthread_mutex_destroy(lk)
 
-    #if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 && \
-        defined(linux) && !defined(PTHREAD_MUTEX_RECURSIVE)
+      #if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0 && \
+          defined(linux) && !defined(PTHREAD_MUTEX_RECURSIVE)
 /* Cope with old-style linux recursive lock initialization by adding */
 /* skipped internal declaration from pthread.h */
 extern int pthread_mutexattr_setkind_np __P((pthread_mutexattr_t * __attr,
                                              int __kind));
-      #define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP
-      #define pthread_mutexattr_settype(x, y) pthread_mutexattr_setkind_np(x, y)
-    #endif                                       /* USE_RECURSIVE_LOCKS ... */
+        #define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP
+        #define pthread_mutexattr_settype(x, y) \
+          pthread_mutexattr_setkind_np(x, y)
+      #endif                                     /* USE_RECURSIVE_LOCKS ... */
 
 static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -2152,29 +2158,29 @@ static int pthread_init_lock(MLOCK_T *lk) {
 
   pthread_mutexattr_t attr;
   if (pthread_mutexattr_init(&attr)) return 1;
-    #if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0
+      #if defined(USE_RECURSIVE_LOCKS) && USE_RECURSIVE_LOCKS != 0
   if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1;
-    #endif
+      #endif
   if (pthread_mutex_init(lk, &attr)) return 1;
   if (pthread_mutexattr_destroy(&attr)) return 1;
   return 0;
 
 }
 
-  #endif                                              /* ... lock types ... */
+    #endif                                            /* ... lock types ... */
 
-  /* Common code for all lock types */
-  #define USE_LOCK_BIT (2U)
+    /* Common code for all lock types */
+    #define USE_LOCK_BIT (2U)
 
-  #ifndef ACQUIRE_MALLOC_GLOBAL_LOCK
-    #define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex);
-  #endif
+    #ifndef ACQUIRE_MALLOC_GLOBAL_LOCK
+      #define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex);
+    #endif
 
-  #ifndef RELEASE_MALLOC_GLOBAL_LOCK
-    #define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex);
-  #endif
+    #ifndef RELEASE_MALLOC_GLOBAL_LOCK
+      #define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex);
+    #endif
 
-#endif                                                         /* USE_LOCKS */
+  #endif                                                       /* USE_LOCKS */
 
 /* -----------------------  Chunk representations ------------------------ */
 
@@ -2331,39 +2337,39 @@ typedef unsigned int         flag_t;   /* The type of various bit flag sets */
 
 /* ------------------- Chunks sizes and alignments ----------------------- */
 
-#define MCHUNK_SIZE (sizeof(mchunk))
+  #define MCHUNK_SIZE (sizeof(mchunk))
 
-#if FOOTERS
-  #define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
-#else                                                            /* FOOTERS */
-  #define CHUNK_OVERHEAD (SIZE_T_SIZE)
-#endif                                                           /* FOOTERS */
+  #if FOOTERS
+    #define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
+  #else                                                          /* FOOTERS */
+    #define CHUNK_OVERHEAD (SIZE_T_SIZE)
+  #endif                                                         /* FOOTERS */
 
-/* MMapped chunks need a second word of overhead ... */
-#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
-/* ... and additional padding for fake next-chunk at foot */
-#define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES)
+  /* MMapped chunks need a second word of overhead ... */
+  #define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
+  /* ... and additional padding for fake next-chunk at foot */
+  #define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES)
 
-/* The smallest size we can malloc is an aligned minimal chunk */
-#define MIN_CHUNK_SIZE ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+  /* The smallest size we can malloc is an aligned minimal chunk */
+  #define MIN_CHUNK_SIZE ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
 
-/* conversion from malloc headers to user pointers, and back */
-#define chunk2mem(p) ((void *)((char *)(p) + TWO_SIZE_T_SIZES))
-#define mem2chunk(mem) ((mchunkptr)((char *)(mem)-TWO_SIZE_T_SIZES))
-/* chunk associated with aligned address A */
-#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A)))
+  /* conversion from malloc headers to user pointers, and back */
+  #define chunk2mem(p) ((void *)((char *)(p) + TWO_SIZE_T_SIZES))
+  #define mem2chunk(mem) ((mchunkptr)((char *)(mem)-TWO_SIZE_T_SIZES))
+  /* chunk associated with aligned address A */
+  #define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A)))
 
-/* Bounds on request (not chunk) sizes. */
-#define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2)
-#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
+  /* Bounds on request (not chunk) sizes. */
+  #define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2)
+  #define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
 
-/* pad request bytes into a usable size */
-#define pad_request(req) \
-  (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+  /* pad request bytes into a usable size */
+  #define pad_request(req) \
+    (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
 
-/* pad request, checking for minimum (but not maximum) */
-#define request2size(req) \
-  (((req) < MIN_REQUEST) ? MIN_CHUNK_SIZE : pad_request(req))
+  /* pad request, checking for minimum (but not maximum) */
+  #define request2size(req) \
+    (((req) < MIN_REQUEST) ? MIN_CHUNK_SIZE : pad_request(req))
 
 /* ------------------ Operations on head and foot fields ----------------- */
 
@@ -2375,60 +2381,60 @@ typedef unsigned int         flag_t;   /* The type of various bit flag sets */
   FLAG4_BIT is not used by this malloc, but might be useful in extensions.
 */
 
-#define PINUSE_BIT (SIZE_T_ONE)
-#define CINUSE_BIT (SIZE_T_TWO)
-#define FLAG4_BIT (SIZE_T_FOUR)
-#define INUSE_BITS (PINUSE_BIT | CINUSE_BIT)
-#define FLAG_BITS (PINUSE_BIT | CINUSE_BIT | FLAG4_BIT)
+  #define PINUSE_BIT (SIZE_T_ONE)
+  #define CINUSE_BIT (SIZE_T_TWO)
+  #define FLAG4_BIT (SIZE_T_FOUR)
+  #define INUSE_BITS (PINUSE_BIT | CINUSE_BIT)
+  #define FLAG_BITS (PINUSE_BIT | CINUSE_BIT | FLAG4_BIT)
 
-/* Head value for fenceposts */
-#define FENCEPOST_HEAD (INUSE_BITS | SIZE_T_SIZE)
+  /* Head value for fenceposts */
+  #define FENCEPOST_HEAD (INUSE_BITS | SIZE_T_SIZE)
 
-/* extraction of fields from head words */
-#define cinuse(p) ((p)->head & CINUSE_BIT)
-#define pinuse(p) ((p)->head & PINUSE_BIT)
-#define flag4inuse(p) ((p)->head & FLAG4_BIT)
-#define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT)
-#define is_mmapped(p) (((p)->head & INUSE_BITS) == 0)
+  /* extraction of fields from head words */
+  #define cinuse(p) ((p)->head & CINUSE_BIT)
+  #define pinuse(p) ((p)->head & PINUSE_BIT)
+  #define flag4inuse(p) ((p)->head & FLAG4_BIT)
+  #define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT)
+  #define is_mmapped(p) (((p)->head & INUSE_BITS) == 0)
 
-#define chunksize(p) ((p)->head & ~(FLAG_BITS))
+  #define chunksize(p) ((p)->head & ~(FLAG_BITS))
 
-#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT)
-#define set_flag4(p) ((p)->head |= FLAG4_BIT)
-#define clear_flag4(p) ((p)->head &= ~FLAG4_BIT)
+  #define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT)
+  #define set_flag4(p) ((p)->head |= FLAG4_BIT)
+  #define clear_flag4(p) ((p)->head &= ~FLAG4_BIT)
 
-/* Treat space at ptr +/- offset as a chunk */
-#define chunk_plus_offset(p, s) ((mchunkptr)(((char *)(p)) + (s)))
-#define chunk_minus_offset(p, s) ((mchunkptr)(((char *)(p)) - (s)))
+  /* Treat space at ptr +/- offset as a chunk */
+  #define chunk_plus_offset(p, s) ((mchunkptr)(((char *)(p)) + (s)))
+  #define chunk_minus_offset(p, s) ((mchunkptr)(((char *)(p)) - (s)))
 
-/* Ptr to next or previous physical malloc_chunk. */
-#define next_chunk(p) ((mchunkptr)(((char *)(p)) + ((p)->head & ~FLAG_BITS)))
-#define prev_chunk(p) ((mchunkptr)(((char *)(p)) - ((p)->prev_foot)))
+  /* Ptr to next or previous physical malloc_chunk. */
+  #define next_chunk(p) ((mchunkptr)(((char *)(p)) + ((p)->head & ~FLAG_BITS)))
+  #define prev_chunk(p) ((mchunkptr)(((char *)(p)) - ((p)->prev_foot)))
 
-/* extract next chunk's pinuse bit */
-#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT)
+  /* extract next chunk's pinuse bit */
+  #define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT)
 
-/* Get/set size at footer */
-#define get_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot)
-#define set_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot = (s))
+  /* Get/set size at footer */
+  #define get_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot)
+  #define set_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot = (s))
 
-/* Set size, pinuse bit, and foot */
-#define set_size_and_pinuse_of_free_chunk(p, s) \
-  ((p)->head = (s | PINUSE_BIT), set_foot(p, s))
+  /* Set size, pinuse bit, and foot */
+  #define set_size_and_pinuse_of_free_chunk(p, s) \
+    ((p)->head = (s | PINUSE_BIT), set_foot(p, s))
 
-/* Set size, pinuse bit, foot, and clear next pinuse */
-#define set_free_with_pinuse(p, s, n) \
-  (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
+  /* Set size, pinuse bit, foot, and clear next pinuse */
+  #define set_free_with_pinuse(p, s, n) \
+    (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
 
-/* Get the internal overhead associated with chunk p */
-#define overhead_for(p) (is_mmapped(p) ? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
+  /* Get the internal overhead associated with chunk p */
+  #define overhead_for(p) (is_mmapped(p) ? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
 
-/* Return true if malloced space is not necessarily cleared */
-#if MMAP_CLEARS
-  #define calloc_must_clear(p) (!is_mmapped(p))
-#else                                                        /* MMAP_CLEARS */
-  #define calloc_must_clear(p) (1)
-#endif                                                       /* MMAP_CLEARS */
+  /* Return true if malloced space is not necessarily cleared */
+  #if MMAP_CLEARS
+    #define calloc_must_clear(p) (!is_mmapped(p))
+  #else                                                      /* MMAP_CLEARS */
+    #define calloc_must_clear(p) (1)
+  #endif                                                     /* MMAP_CLEARS */
 
 /* ---------------------- Overlaid data structures ----------------------- */
 
@@ -2539,8 +2545,8 @@ typedef struct malloc_tree_chunk  tchunk;
 typedef struct malloc_tree_chunk *tchunkptr;
 typedef struct malloc_tree_chunk *tbinptr;     /* The type of bins of trees */
 
-/* A little helper macro for trees */
-#define leftmost_child(t) ((t)->child[0] != 0 ? (t)->child[0] : (t)->child[1])
+  /* A little helper macro for trees */
+  #define leftmost_child(t) ((t)->child[0] != 0 ? (t)->child[0] : (t)->child[1])
 
 /* ----------------------------- Segments -------------------------------- */
 
@@ -2608,108 +2614,108 @@ struct malloc_segment {
 
 };
 
-#define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT)
-#define is_extern_segment(S) ((S)->sflags & EXTERN_BIT)
+  #define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT)
+  #define is_extern_segment(S) ((S)->sflags & EXTERN_BIT)
 
 typedef struct malloc_segment  msegment;
 typedef struct malloc_segment *msegmentptr;
 
-/* ---------------------------- malloc_state ----------------------------- */
+  /* ---------------------------- malloc_state ----------------------------- */
 
-/*
-   A malloc_state holds all of the bookkeeping for a space.
-   The main fields are:
-
-  Top
-    The topmost chunk of the currently active segment. Its size is
-    cached in topsize.  The actual size of topmost space is
-    topsize+TOP_FOOT_SIZE, which includes space reserved for adding
-    fenceposts and segment records if necessary when getting more
-    space from the system.  The size at which to autotrim top is
-    cached from mparams in trim_check, except that it is disabled if
-    an autotrim fails.
-
-  Designated victim (dv)
-    This is the preferred chunk for servicing small requests that
-    don't have exact fits.  It is normally the chunk split off most
-    recently to service another small request.  Its size is cached in
-    dvsize. The link fields of this chunk are not maintained since it
-    is not kept in a bin.
-
-  SmallBins
-    An array of bin headers for free chunks.  These bins hold chunks
-    with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
-    chunks of all the same size, spaced 8 bytes apart.  To simplify
-    use in double-linked lists, each bin header acts as a malloc_chunk
-    pointing to the real first node, if it exists (else pointing to
-    itself).  This avoids special-casing for headers.  But to avoid
-    waste, we allocate only the fd/bk pointers of bins, and then use
-    repositioning tricks to treat these as the fields of a chunk.
-
-  TreeBins
-    Treebins are pointers to the roots of trees holding a range of
-    sizes. There are 2 equally spaced treebins for each power of two
-    from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
-    larger.
-
-  Bin maps
-    There is one bit map for small bins ("smallmap") and one for
-    treebins ("treemap).  Each bin sets its bit when non-empty, and
-    clears the bit when empty.  Bit operations are then used to avoid
-    bin-by-bin searching -- nearly all "search" is done without ever
-    looking at bins that won't be selected.  The bit maps
-    conservatively use 32 bits per map word, even if on 64bit system.
-    For a good description of some of the bit-based techniques used
-    here, see Henry S. Warren Jr's book "Hacker's Delight" (and
-    supplement at http://hackersdelight.org/). Many of these are
-    intended to reduce the branchiness of paths through malloc etc, as
-    well as to reduce the number of memory locations read or written.
-
-  Segments
-    A list of segments headed by an embedded malloc_segment record
-    representing the initial space.
-
-  Address check support
-    The least_addr field is the least address ever obtained from
-    MORECORE or MMAP. Attempted frees and reallocs of any address less
-    than this are trapped (unless INSECURE is defined).
-
-  Magic tag
-    A cross-check field that should always hold same value as mparams.magic.
-
-  Max allowed footprint
-    The maximum allowed bytes to allocate from system (zero means no limit)
-
-  Flags
-    Bits recording whether to use MMAP, locks, or contiguous MORECORE
-
-  Statistics
-    Each space keeps track of current and maximum system memory
-    obtained via MORECORE or MMAP.
-
-  Trim support
-    Fields holding the amount of unused topmost memory that should trigger
-    trimming, and a counter to force periodic scanning to release unused
-    non-topmost segments.
-
-  Locking
-    If USE_LOCKS is defined, the "mutex" lock is acquired and released
-    around every public call using this mspace.
-
-  Extension support
-    A void* pointer and a size_t field that can be used to help implement
-    extensions to this malloc.
-*/
+  /*
+     A malloc_state holds all of the bookkeeping for a space.
+     The main fields are:
+
+    Top
+      The topmost chunk of the currently active segment. Its size is
+      cached in topsize.  The actual size of topmost space is
+      topsize+TOP_FOOT_SIZE, which includes space reserved for adding
+      fenceposts and segment records if necessary when getting more
+      space from the system.  The size at which to autotrim top is
+      cached from mparams in trim_check, except that it is disabled if
+      an autotrim fails.
+
+    Designated victim (dv)
+      This is the preferred chunk for servicing small requests that
+      don't have exact fits.  It is normally the chunk split off most
+      recently to service another small request.  Its size is cached in
+      dvsize. The link fields of this chunk are not maintained since it
+      is not kept in a bin.
+
+    SmallBins
+      An array of bin headers for free chunks.  These bins hold chunks
+      with sizes less than MIN_LARGE_SIZE bytes. Each bin contains
+      chunks of all the same size, spaced 8 bytes apart.  To simplify
+      use in double-linked lists, each bin header acts as a malloc_chunk
+      pointing to the real first node, if it exists (else pointing to
+      itself).  This avoids special-casing for headers.  But to avoid
+      waste, we allocate only the fd/bk pointers of bins, and then use
+      repositioning tricks to treat these as the fields of a chunk.
+
+    TreeBins
+      Treebins are pointers to the roots of trees holding a range of
+      sizes. There are 2 equally spaced treebins for each power of two
+      from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything
+      larger.
+
+    Bin maps
+      There is one bit map for small bins ("smallmap") and one for
+      treebins ("treemap).  Each bin sets its bit when non-empty, and
+      clears the bit when empty.  Bit operations are then used to avoid
+      bin-by-bin searching -- nearly all "search" is done without ever
+      looking at bins that won't be selected.  The bit maps
+      conservatively use 32 bits per map word, even if on 64bit system.
+      For a good description of some of the bit-based techniques used
+      here, see Henry S. Warren Jr's book "Hacker's Delight" (and
+      supplement at http://hackersdelight.org/). Many of these are
+      intended to reduce the branchiness of paths through malloc etc, as
+      well as to reduce the number of memory locations read or written.
+
+    Segments
+      A list of segments headed by an embedded malloc_segment record
+      representing the initial space.
+
+    Address check support
+      The least_addr field is the least address ever obtained from
+      MORECORE or MMAP. Attempted frees and reallocs of any address less
+      than this are trapped (unless INSECURE is defined).
+
+    Magic tag
+      A cross-check field that should always hold same value as mparams.magic.
+
+    Max allowed footprint
+      The maximum allowed bytes to allocate from system (zero means no limit)
+
+    Flags
+      Bits recording whether to use MMAP, locks, or contiguous MORECORE
+
+    Statistics
+      Each space keeps track of current and maximum system memory
+      obtained via MORECORE or MMAP.
+
+    Trim support
+      Fields holding the amount of unused topmost memory that should trigger
+      trimming, and a counter to force periodic scanning to release unused
+      non-topmost segments.
+
+    Locking
+      If USE_LOCKS is defined, the "mutex" lock is acquired and released
+      around every public call using this mspace.
+
+    Extension support
+      A void* pointer and a size_t field that can be used to help implement
+      extensions to this malloc.
+  */
 
-/* Bin types, widths and sizes */
-#define NSMALLBINS (32U)
-#define NTREEBINS (32U)
-#define SMALLBIN_SHIFT (3U)
-#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT)
-#define TREEBIN_SHIFT (8U)
-#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT)
-#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE)
-#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
+  /* Bin types, widths and sizes */
+  #define NSMALLBINS (32U)
+  #define NTREEBINS (32U)
+  #define SMALLBIN_SHIFT (3U)
+  #define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT)
+  #define TREEBIN_SHIFT (8U)
+  #define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT)
+  #define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE)
+  #define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
 
 struct malloc_state {
 
@@ -2729,9 +2735,9 @@ struct malloc_state {
   size_t    max_footprint;
   size_t    footprint_limit;                         /* zero means no limit */
   flag_t    mflags;
-#if USE_LOCKS
+  #if USE_LOCKS
   MLOCK_T mutex;             /* locate lock among fields that rarely change */
-#endif                                                         /* USE_LOCKS */
+  #endif                                                       /* USE_LOCKS */
   msegment seg;
   void *   extp;                     /* Unused but available for extensions */
   size_t   exts;
@@ -2762,74 +2768,75 @@ struct malloc_params {
 
 static struct malloc_params mparams;
 
-/* Ensure mparams initialized */
-#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams())
+  /* Ensure mparams initialized */
+  #define ensure_initialization() (void)(mparams.magic != 0 || init_mparams())
 
-#if !ONLY_MSPACES
+  #if !ONLY_MSPACES
 
 /* The global malloc_state used for all non-"mspace" calls */
 static struct malloc_state _gm_;
-  #define gm (&_gm_)
-  #define is_global(M) ((M) == &_gm_)
+    #define gm (&_gm_)
+    #define is_global(M) ((M) == &_gm_)
 
-#endif                                                     /* !ONLY_MSPACES */
+  #endif                                                   /* !ONLY_MSPACES */
 
-#define is_initialized(M) ((M)->top != 0)
+  #define is_initialized(M) ((M)->top != 0)
 
 /* -------------------------- system alloc setup ------------------------- */
 
 /* Operations on mflags */
 
-#define use_lock(M) ((M)->mflags & USE_LOCK_BIT)
-#define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT)
-#if USE_LOCKS
-  #define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT)
-#else
-  #define disable_lock(M)
-#endif
-
-#define use_mmap(M) ((M)->mflags & USE_MMAP_BIT)
-#define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT)
-#if HAVE_MMAP
-  #define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT)
-#else
-  #define disable_mmap(M)
-#endif
-
-#define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT)
-#define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT)
-
-#define set_lock(M, L) \
-  ((M)->mflags =       \
-       (L) ? ((M)->mflags | USE_LOCK_BIT) : ((M)->mflags & ~USE_LOCK_BIT))
-
-/* page-align a size */
-#define page_align(S) \
-  (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE))
-
-/* granularity-align a size */
-#define granularity_align(S)                    \
-  (((S) + (mparams.granularity - SIZE_T_ONE)) & \
-   ~(mparams.granularity - SIZE_T_ONE))
-
-/* For mmap, use granularity alignment on windows, else page-align */
-#ifdef WIN32
-  #define mmap_align(S) granularity_align(S)
-#else
-  #define mmap_align(S) page_align(S)
-#endif
-
-/* For sys_alloc, enough padding to ensure can malloc request on success */
-#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT)
-
-#define is_page_aligned(S) \
-  (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0)
-#define is_granularity_aligned(S) \
-  (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0)
-
-/*  True if segment S holds address A */
-#define segment_holds(S, A) \
-  ((char *)(A) >= S->base && (char *)(A) < S->base + S->size)
+  #define use_lock(M) ((M)->mflags & USE_LOCK_BIT)
+  #define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT)
+  #if USE_LOCKS
+    #define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT)
+  #else
+    #define disable_lock(M)
+  #endif
+
+  #define use_mmap(M) ((M)->mflags & USE_MMAP_BIT)
+  #define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT)
+  #if HAVE_MMAP
+    #define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT)
+  #else
+    #define disable_mmap(M)
+  #endif
+
+  #define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT)
+  #define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT)
+
+  #define set_lock(M, L) \
+    ((M)->mflags =       \
+         (L) ? ((M)->mflags | USE_LOCK_BIT) : ((M)->mflags & ~USE_LOCK_BIT))
+
+  /* page-align a size */
+  #define page_align(S)                         \
+    (((S) + (mparams.page_size - SIZE_T_ONE)) & \
+     ~(mparams.page_size - SIZE_T_ONE))
+
+  /* granularity-align a size */
+  #define granularity_align(S)                    \
+    (((S) + (mparams.granularity - SIZE_T_ONE)) & \
+     ~(mparams.granularity - SIZE_T_ONE))
+
+  /* For mmap, use granularity alignment on windows, else page-align */
+  #ifdef WIN32
+    #define mmap_align(S) granularity_align(S)
+  #else
+    #define mmap_align(S) page_align(S)
+  #endif
+
+  /* For sys_alloc, enough padding to ensure can malloc request on success */
+  #define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT)
+
+  #define is_page_aligned(S) \
+    (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0)
+  #define is_granularity_aligned(S) \
+    (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0)
+
+  /*  True if segment S holds address A */
+  #define segment_holds(S, A) \
+    ((char *)(A) >= S->base && (char *)(A) < S->base + S->size)
 
 /* Return segment holding given address */
 static msegmentptr segment_holding(mstate m, char *addr) {
@@ -2857,20 +2864,20 @@ static int has_segment_link(mstate m, msegmentptr ss) {
 
 }
 
-#ifndef MORECORE_CANNOT_TRIM
-  #define should_trim(M, s) ((s) > (M)->trim_check)
-#else                                               /* MORECORE_CANNOT_TRIM */
-  #define should_trim(M, s) (0)
-#endif                                              /* MORECORE_CANNOT_TRIM */
+  #ifndef MORECORE_CANNOT_TRIM
+    #define should_trim(M, s) ((s) > (M)->trim_check)
+  #else                                             /* MORECORE_CANNOT_TRIM */
+    #define should_trim(M, s) (0)
+  #endif                                            /* MORECORE_CANNOT_TRIM */
 
-/*
-  TOP_FOOT_SIZE is padding at the end of a segment, including space
-  that may be needed to place segment records and fenceposts when new
-  noncontiguous segments are added.
-*/
-#define TOP_FOOT_SIZE                                                        \
-  (align_offset(chunk2mem(0)) + pad_request(sizeof(struct malloc_segment)) + \
-   MIN_CHUNK_SIZE)
+  /*
+    TOP_FOOT_SIZE is padding at the end of a segment, including space
+    that may be needed to place segment records and fenceposts when new
+    noncontiguous segments are added.
+  */
+  #define TOP_FOOT_SIZE                                                        \
+    (align_offset(chunk2mem(0)) + pad_request(sizeof(struct malloc_segment)) + \
+     MIN_CHUNK_SIZE)
 
 /* -------------------------------  Hooks -------------------------------- */
 
@@ -2880,25 +2887,25 @@ static int has_segment_link(mstate m, msegmentptr ss) {
   anything you like.
 */
 
-#if USE_LOCKS
-  #define PREACTION(M) ((use_lock(M)) ? ACQUIRE_LOCK(&(M)->mutex) : 0)
-  #define POSTACTION(M)                           \
-    {                                             \
-                                                  \
-      if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); \
-                                                  \
-    }
-#else                                                          /* USE_LOCKS */
+  #if USE_LOCKS
+    #define PREACTION(M) ((use_lock(M)) ? ACQUIRE_LOCK(&(M)->mutex) : 0)
+    #define POSTACTION(M)                           \
+      {                                             \
+                                                    \
+        if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); \
+                                                    \
+      }
+  #else                                                        /* USE_LOCKS */
 
-  #ifndef PREACTION
-    #define PREACTION(M) (0)
-  #endif                                                       /* PREACTION */
+    #ifndef PREACTION
+      #define PREACTION(M) (0)
+    #endif                                                     /* PREACTION */
 
-  #ifndef POSTACTION
-    #define POSTACTION(M)
-  #endif                                                      /* POSTACTION */
+    #ifndef POSTACTION
+      #define POSTACTION(M)
+    #endif                                                    /* POSTACTION */
 
-#endif                                                         /* USE_LOCKS */
+  #endif                                                       /* USE_LOCKS */
 
 /*
   CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses.
@@ -2908,7 +2915,7 @@ static int has_segment_link(mstate m, msegmentptr ss) {
   useful in custom actions that try to help diagnose errors.
 */
 
-#if PROCEED_ON_ERROR
+  #if PROCEED_ON_ERROR
 
 /* A count of the number of corruption errors causing resets */
 int malloc_corruption_error_count;
@@ -2916,39 +2923,39 @@ int malloc_corruption_error_count;
 /* default corruption action */
 static void reset_on_error(mstate m);
 
-  #define CORRUPTION_ERROR_ACTION(m) reset_on_error(m)
-  #define USAGE_ERROR_ACTION(m, p)
+    #define CORRUPTION_ERROR_ACTION(m) reset_on_error(m)
+    #define USAGE_ERROR_ACTION(m, p)
 
-#else                                                   /* PROCEED_ON_ERROR */
+  #else                                                 /* PROCEED_ON_ERROR */
 
-  #ifndef CORRUPTION_ERROR_ACTION
-    #define CORRUPTION_ERROR_ACTION(m) ABORT
-  #endif                                         /* CORRUPTION_ERROR_ACTION */
+    #ifndef CORRUPTION_ERROR_ACTION
+      #define CORRUPTION_ERROR_ACTION(m) ABORT
+    #endif                                       /* CORRUPTION_ERROR_ACTION */
 
-  #ifndef USAGE_ERROR_ACTION
-    #define USAGE_ERROR_ACTION(m, p) ABORT
-  #endif                                              /* USAGE_ERROR_ACTION */
+    #ifndef USAGE_ERROR_ACTION
+      #define USAGE_ERROR_ACTION(m, p) ABORT
+    #endif                                            /* USAGE_ERROR_ACTION */
 
-#endif                                                  /* PROCEED_ON_ERROR */
+  #endif                                                /* PROCEED_ON_ERROR */
 
 /* -------------------------- Debugging setup ---------------------------- */
 
-#if !DEBUG
+  #if !DEBUG
 
-  #define check_free_chunk(M, P)
-  #define check_inuse_chunk(M, P)
-  #define check_malloced_chunk(M, P, N)
-  #define check_mmapped_chunk(M, P)
-  #define check_malloc_state(M)
-  #define check_top_chunk(M, P)
+    #define check_free_chunk(M, P)
+    #define check_inuse_chunk(M, P)
+    #define check_malloced_chunk(M, P, N)
+    #define check_mmapped_chunk(M, P)
+    #define check_malloc_state(M)
+    #define check_top_chunk(M, P)
 
-#else                                                              /* DEBUG */
-  #define check_free_chunk(M, P) do_check_free_chunk(M, P)
-  #define check_inuse_chunk(M, P) do_check_inuse_chunk(M, P)
-  #define check_top_chunk(M, P) do_check_top_chunk(M, P)
-  #define check_malloced_chunk(M, P, N) do_check_malloced_chunk(M, P, N)
-  #define check_mmapped_chunk(M, P) do_check_mmapped_chunk(M, P)
-  #define check_malloc_state(M) do_check_malloc_state(M)
+  #else                                                            /* DEBUG */
+    #define check_free_chunk(M, P) do_check_free_chunk(M, P)
+    #define check_inuse_chunk(M, P) do_check_inuse_chunk(M, P)
+    #define check_top_chunk(M, P) do_check_top_chunk(M, P)
+    #define check_malloced_chunk(M, P, N) do_check_malloced_chunk(M, P, N)
+    #define check_mmapped_chunk(M, P) do_check_mmapped_chunk(M, P)
+    #define check_malloc_state(M) do_check_malloc_state(M)
 
 static void   do_check_any_chunk(mstate m, mchunkptr p);
 static void   do_check_top_chunk(mstate m, mchunkptr p);
@@ -2962,193 +2969,194 @@ static void   do_check_smallbin(mstate m, bindex_t i);
 static void   do_check_malloc_state(mstate m);
 static int    bin_find(mstate m, mchunkptr x);
 static size_t traverse_and_check(mstate m);
-#endif                                                             /* DEBUG */
+  #endif                                                           /* DEBUG */
 
 /* ---------------------------- Indexing Bins ---------------------------- */
 
-#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
-#define small_index(s) (bindex_t)((s) >> SMALLBIN_SHIFT)
-#define small_index2size(i) ((i) << SMALLBIN_SHIFT)
-#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE))
-
-/* addressing by index. See above about smallbin repositioning */
-#define smallbin_at(M, i) ((sbinptr)((char *)&((M)->smallbins[(i) << 1])))
-#define treebin_at(M, i) (&((M)->treebins[i]))
-
-/* assign tree index for size S to variable I. Use x86 asm if possible  */
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-  #define compute_tree_index(S, I)                                         \
-    {                                                                      \
-                                                                           \
-      unsigned int X = S >> TREEBIN_SHIFT;                                 \
-      if (X == 0)                                                          \
-        I = 0;                                                             \
-      else if (X > 0xFFFF)                                                 \
-        I = NTREEBINS - 1;                                                 \
-      else {                                                               \
-                                                                           \
-        unsigned int K = (unsigned)sizeof(X) * __CHAR_BIT__ - 1 -          \
-                         (unsigned)__builtin_clz(X);                       \
-        I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1))); \
-                                                                           \
-      }                                                                    \
-                                                                           \
-    }
+  #define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
+  #define small_index(s) (bindex_t)((s) >> SMALLBIN_SHIFT)
+  #define small_index2size(i) ((i) << SMALLBIN_SHIFT)
+  #define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE))
+
+  /* addressing by index. See above about smallbin repositioning */
+  #define smallbin_at(M, i) ((sbinptr)((char *)&((M)->smallbins[(i) << 1])))
+  #define treebin_at(M, i) (&((M)->treebins[i]))
+
+  /* assign tree index for size S to variable I. Use x86 asm if possible  */
+  #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+    #define compute_tree_index(S, I)                                         \
+      {                                                                      \
+                                                                             \
+        unsigned int X = S >> TREEBIN_SHIFT;                                 \
+        if (X == 0)                                                          \
+          I = 0;                                                             \
+        else if (X > 0xFFFF)                                                 \
+          I = NTREEBINS - 1;                                                 \
+        else {                                                               \
+                                                                             \
+          unsigned int K = (unsigned)sizeof(X) * __CHAR_BIT__ - 1 -          \
+                           (unsigned)__builtin_clz(X);                       \
+          I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1))); \
+                                                                             \
+        }                                                                    \
+                                                                             \
+      }
 
-#elif defined(__INTEL_COMPILER)
-  #define compute_tree_index(S, I)                                         \
-    {                                                                      \
-                                                                           \
-      size_t X = S >> TREEBIN_SHIFT;                                       \
-      if (X == 0)                                                          \
-        I = 0;                                                             \
-      else if (X > 0xFFFF)                                                 \
-        I = NTREEBINS - 1;                                                 \
-      else {                                                               \
-                                                                           \
-        unsigned int K = _bit_scan_reverse(X);                             \
-        I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1))); \
-                                                                           \
-      }                                                                    \
-                                                                           \
-    }
+  #elif defined(__INTEL_COMPILER)
+    #define compute_tree_index(S, I)                                         \
+      {                                                                      \
+                                                                             \
+        size_t X = S >> TREEBIN_SHIFT;                                       \
+        if (X == 0)                                                          \
+          I = 0;                                                             \
+        else if (X > 0xFFFF)                                                 \
+          I = NTREEBINS - 1;                                                 \
+        else {                                                               \
+                                                                             \
+          unsigned int K = _bit_scan_reverse(X);                             \
+          I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1))); \
+                                                                             \
+        }                                                                    \
+                                                                             \
+      }
 
-#elif defined(_MSC_VER) && _MSC_VER >= 1300
-  #define compute_tree_index(S, I)                                         \
-    {                                                                      \
-                                                                           \
-      size_t X = S >> TREEBIN_SHIFT;                                       \
-      if (X == 0)                                                          \
-        I = 0;                                                             \
-      else if (X > 0xFFFF)                                                 \
-        I = NTREEBINS - 1;                                                 \
-      else {                                                               \
-                                                                           \
-        unsigned int K;                                                    \
-        _BitScanReverse((DWORD *)&K, (DWORD)X);                            \
-        I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1))); \
-                                                                           \
-      }                                                                    \
-                                                                           \
-    }
+  #elif defined(_MSC_VER) && _MSC_VER >= 1300
+    #define compute_tree_index(S, I)                                         \
+      {                                                                      \
+                                                                             \
+        size_t X = S >> TREEBIN_SHIFT;                                       \
+        if (X == 0)                                                          \
+          I = 0;                                                             \
+        else if (X > 0xFFFF)                                                 \
+          I = NTREEBINS - 1;                                                 \
+        else {                                                               \
+                                                                             \
+          unsigned int K;                                                    \
+          _BitScanReverse((DWORD *)&K, (DWORD)X);                            \
+          I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1))); \
+                                                                             \
+        }                                                                    \
+                                                                             \
+      }
 
-#else                                                               /* GNUC */
-  #define compute_tree_index(S, I)                             \
-    {                                                          \
-                                                               \
-      size_t X = S >> TREEBIN_SHIFT;                           \
-      if (X == 0)                                              \
-        I = 0;                                                 \
-      else if (X > 0xFFFF)                                     \
-        I = NTREEBINS - 1;                                     \
-      else {                                                   \
-                                                               \
-        unsigned int Y = (unsigned int)X;                      \
-        unsigned int N = ((Y - 0x100) >> 16) & 8;              \
-        unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;     \
-        N += K;                                                \
-        N += K = (((Y <<= K) - 0x4000) >> 16) & 2;             \
-        K = 14 - N + ((Y <<= K) >> 15);                        \
-        I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1)); \
-                                                               \
-      }                                                        \
-                                                               \
-    }
-#endif                                                              /* GNUC */
+  #else                                                             /* GNUC */
+    #define compute_tree_index(S, I)                             \
+      {                                                          \
+                                                                 \
+        size_t X = S >> TREEBIN_SHIFT;                           \
+        if (X == 0)                                              \
+          I = 0;                                                 \
+        else if (X > 0xFFFF)                                     \
+          I = NTREEBINS - 1;                                     \
+        else {                                                   \
+                                                                 \
+          unsigned int Y = (unsigned int)X;                      \
+          unsigned int N = ((Y - 0x100) >> 16) & 8;              \
+          unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;     \
+          N += K;                                                \
+          N += K = (((Y <<= K) - 0x4000) >> 16) & 2;             \
+          K = 14 - N + ((Y <<= K) >> 15);                        \
+          I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT - 1)) & 1)); \
+                                                                 \
+        }                                                        \
+                                                                 \
+      }
+  #endif                                                            /* GNUC */
 
-/* Bit representing maximum resolved size in a treebin at i */
-#define bit_for_tree_index(i) \
-  (i == NTREEBINS - 1) ? (SIZE_T_BITSIZE - 1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
+  /* Bit representing maximum resolved size in a treebin at i */
+  #define bit_for_tree_index(i)                 \
+    (i == NTREEBINS - 1) ? (SIZE_T_BITSIZE - 1) \
+                         : (((i) >> 1) + TREEBIN_SHIFT - 2)
 
-/* Shift placing maximum resolved bit in a treebin at i as sign bit */
-#define leftshift_for_tree_index(i) \
-  ((i == NTREEBINS - 1)             \
-       ? 0                          \
-       : ((SIZE_T_BITSIZE - SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
+  /* Shift placing maximum resolved bit in a treebin at i as sign bit */
+  #define leftshift_for_tree_index(i) \
+    ((i == NTREEBINS - 1)             \
+         ? 0                          \
+         : ((SIZE_T_BITSIZE - SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
 
-/* The size of the smallest chunk held in bin with index i */
-#define minsize_for_tree_index(i)                 \
-  ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \
-   (((size_t)((i)&SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
+  /* The size of the smallest chunk held in bin with index i */
+  #define minsize_for_tree_index(i)                 \
+    ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \
+     (((size_t)((i)&SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
 
-/* ------------------------ Operations on bin maps ----------------------- */
+  /* ------------------------ Operations on bin maps ----------------------- */
 
-/* bit corresponding to given index */
-#define idx2bit(i) ((binmap_t)(1) << (i))
+  /* bit corresponding to given index */
+  #define idx2bit(i) ((binmap_t)(1) << (i))
 
-/* Mark/Clear bits with given index */
-#define mark_smallmap(M, i) ((M)->smallmap |= idx2bit(i))
-#define clear_smallmap(M, i) ((M)->smallmap &= ~idx2bit(i))
-#define smallmap_is_marked(M, i) ((M)->smallmap & idx2bit(i))
+  /* Mark/Clear bits with given index */
+  #define mark_smallmap(M, i) ((M)->smallmap |= idx2bit(i))
+  #define clear_smallmap(M, i) ((M)->smallmap &= ~idx2bit(i))
+  #define smallmap_is_marked(M, i) ((M)->smallmap & idx2bit(i))
 
-#define mark_treemap(M, i) ((M)->treemap |= idx2bit(i))
-#define clear_treemap(M, i) ((M)->treemap &= ~idx2bit(i))
-#define treemap_is_marked(M, i) ((M)->treemap & idx2bit(i))
+  #define mark_treemap(M, i) ((M)->treemap |= idx2bit(i))
+  #define clear_treemap(M, i) ((M)->treemap &= ~idx2bit(i))
+  #define treemap_is_marked(M, i) ((M)->treemap & idx2bit(i))
 
-/* isolate the least set bit of a bitmap */
-#define least_bit(x) ((x) & -(x))
+  /* isolate the least set bit of a bitmap */
+  #define least_bit(x) ((x) & -(x))
 
-/* mask with all bits to left of least bit of x on */
-#define left_bits(x) ((x << 1) | -(x << 1))
+  /* mask with all bits to left of least bit of x on */
+  #define left_bits(x) ((x << 1) | -(x << 1))
 
-/* mask with all bits to left of or equal to least bit of x on */
-#define same_or_left_bits(x) ((x) | -(x))
+  /* mask with all bits to left of or equal to least bit of x on */
+  #define same_or_left_bits(x) ((x) | -(x))
 
 /* index corresponding to given bit. Use x86 asm if possible */
 
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-  #define compute_bit2idx(X, I) \
-    {                           \
-                                \
-      unsigned int J;           \
-      J = __builtin_ctz(X);     \
-      I = (bindex_t)J;          \
-                                \
-    }
+  #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+    #define compute_bit2idx(X, I) \
+      {                           \
+                                  \
+        unsigned int J;           \
+        J = __builtin_ctz(X);     \
+        I = (bindex_t)J;          \
+                                  \
+      }
 
-#elif defined(__INTEL_COMPILER)
-  #define compute_bit2idx(X, I) \
-    {                           \
-                                \
-      unsigned int J;           \
-      J = _bit_scan_forward(X); \
-      I = (bindex_t)J;          \
-                                \
-    }
+  #elif defined(__INTEL_COMPILER)
+    #define compute_bit2idx(X, I) \
+      {                           \
+                                  \
+        unsigned int J;           \
+        J = _bit_scan_forward(X); \
+        I = (bindex_t)J;          \
+                                  \
+      }
 
-#elif defined(_MSC_VER) && _MSC_VER >= 1300
-  #define compute_bit2idx(X, I)        \
-    {                                  \
-                                       \
-      unsigned int J;                  \
-      _BitScanForward((DWORD *)&J, X); \
-      I = (bindex_t)J;                 \
-                                       \
-    }
+  #elif defined(_MSC_VER) && _MSC_VER >= 1300
+    #define compute_bit2idx(X, I)        \
+      {                                  \
+                                         \
+        unsigned int J;                  \
+        _BitScanForward((DWORD *)&J, X); \
+        I = (bindex_t)J;                 \
+                                         \
+      }
 
-#elif USE_BUILTIN_FFS
-  #define compute_bit2idx(X, I) I = ffs(X) - 1
-
-#else
-  #define compute_bit2idx(X, I)            \
-    {                                      \
-                                           \
-      unsigned int Y = X - 1;              \
-      unsigned int K = Y >> (16 - 4) & 16; \
-      unsigned int N = K;                  \
-      Y >>= K;                             \
-      N += K = Y >> (8 - 3) & 8;           \
-      Y >>= K;                             \
-      N += K = Y >> (4 - 2) & 4;           \
-      Y >>= K;                             \
-      N += K = Y >> (2 - 1) & 2;           \
-      Y >>= K;                             \
-      N += K = Y >> (1 - 0) & 1;           \
-      Y >>= K;                             \
-      I = (bindex_t)(N + Y);               \
-                                           \
-    }
-#endif                                                              /* GNUC */
+  #elif USE_BUILTIN_FFS
+    #define compute_bit2idx(X, I) I = ffs(X) - 1
+
+  #else
+    #define compute_bit2idx(X, I)            \
+      {                                      \
+                                             \
+        unsigned int Y = X - 1;              \
+        unsigned int K = Y >> (16 - 4) & 16; \
+        unsigned int N = K;                  \
+        Y >>= K;                             \
+        N += K = Y >> (8 - 3) & 8;           \
+        Y >>= K;                             \
+        N += K = Y >> (4 - 2) & 4;           \
+        Y >>= K;                             \
+        N += K = Y >> (2 - 1) & 2;           \
+        Y >>= K;                             \
+        N += K = Y >> (1 - 0) & 1;           \
+        Y >>= K;                             \
+        I = (bindex_t)(N + Y);               \
+                                             \
+      }
+  #endif                                                            /* GNUC */
 
 /* ----------------------- Runtime Check Support ------------------------- */
 
@@ -3178,92 +3186,92 @@ static size_t traverse_and_check(mstate m);
   next, etc). This turns out to be cheaper than relying on hashes.
 */
 
-#if !INSECURE
-  /* Check if address a is at least as high as any from MORECORE or MMAP */
-  #define ok_address(M, a) ((char *)(a) >= (M)->least_addr)
-  /* Check if address of next chunk n is higher than base chunk p */
-  #define ok_next(p, n) ((char *)(p) < (char *)(n))
-  /* Check if p has inuse status */
-  #define ok_inuse(p) is_inuse(p)
-  /* Check if p has its pinuse bit on */
-  #define ok_pinuse(p) pinuse(p)
-
-#else                                                          /* !INSECURE */
-  #define ok_address(M, a) (1)
-  #define ok_next(b, n) (1)
-  #define ok_inuse(p) (1)
-  #define ok_pinuse(p) (1)
-#endif                                                         /* !INSECURE */
-
-#if (FOOTERS && !INSECURE)
-  /* Check if (alleged) mstate m has expected magic field */
-  #define ok_magic(M) ((M)->magic == mparams.magic)
-#else                                             /* (FOOTERS && !INSECURE) */
-  #define ok_magic(M) (1)
-#endif                                            /* (FOOTERS && !INSECURE) */
-
-/* In gcc, use __builtin_expect to minimize impact of checks */
-#if !INSECURE
-  #if defined(__GNUC__) && __GNUC__ >= 3
-    #define RTCHECK(e) __builtin_expect(e, 1)
-  #else                                                             /* GNUC */
-    #define RTCHECK(e) (e)
-  #endif                                                            /* GNUC */
-#else                                                          /* !INSECURE */
-  #define RTCHECK(e) (1)
-#endif                                                         /* !INSECURE */
+  #if !INSECURE
+    /* Check if address a is at least as high as any from MORECORE or MMAP */
+    #define ok_address(M, a) ((char *)(a) >= (M)->least_addr)
+    /* Check if address of next chunk n is higher than base chunk p */
+    #define ok_next(p, n) ((char *)(p) < (char *)(n))
+    /* Check if p has inuse status */
+    #define ok_inuse(p) is_inuse(p)
+    /* Check if p has its pinuse bit on */
+    #define ok_pinuse(p) pinuse(p)
+
+  #else                                                        /* !INSECURE */
+    #define ok_address(M, a) (1)
+    #define ok_next(b, n) (1)
+    #define ok_inuse(p) (1)
+    #define ok_pinuse(p) (1)
+  #endif                                                       /* !INSECURE */
+
+  #if (FOOTERS && !INSECURE)
+    /* Check if (alleged) mstate m has expected magic field */
+    #define ok_magic(M) ((M)->magic == mparams.magic)
+  #else                                           /* (FOOTERS && !INSECURE) */
+    #define ok_magic(M) (1)
+  #endif                                          /* (FOOTERS && !INSECURE) */
+
+  /* In gcc, use __builtin_expect to minimize impact of checks */
+  #if !INSECURE
+    #if defined(__GNUC__) && __GNUC__ >= 3
+      #define RTCHECK(e) __builtin_expect(e, 1)
+    #else                                                           /* GNUC */
+      #define RTCHECK(e) (e)
+    #endif                                                          /* GNUC */
+  #else                                                        /* !INSECURE */
+    #define RTCHECK(e) (1)
+  #endif                                                       /* !INSECURE */
 
 /* macros to set up inuse chunks with or without footers */
 
-#if !FOOTERS
+  #if !FOOTERS
 
-  #define mark_inuse_foot(M, p, s)
+    #define mark_inuse_foot(M, p, s)
 
-  /* Macros for setting head/foot of non-mmapped chunks */
+    /* Macros for setting head/foot of non-mmapped chunks */
 
-  /* Set cinuse bit and pinuse bit of next chunk */
-  #define set_inuse(M, p, s)                                  \
-    ((p)->head = (((p)->head & PINUSE_BIT) | s | CINUSE_BIT), \
-     ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
+    /* Set cinuse bit and pinuse bit of next chunk */
+    #define set_inuse(M, p, s)                                  \
+      ((p)->head = (((p)->head & PINUSE_BIT) | s | CINUSE_BIT), \
+       ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
 
-  /* Set cinuse and pinuse of this chunk and pinuse of next chunk */
-  #define set_inuse_and_pinuse(M, p, s)         \
-    ((p)->head = (s | PINUSE_BIT | CINUSE_BIT), \
-     ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
+    /* Set cinuse and pinuse of this chunk and pinuse of next chunk */
+    #define set_inuse_and_pinuse(M, p, s)         \
+      ((p)->head = (s | PINUSE_BIT | CINUSE_BIT), \
+       ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
 
-  /* Set size, cinuse and pinuse bit of this chunk */
-  #define set_size_and_pinuse_of_inuse_chunk(M, p, s) \
-    ((p)->head = (s | PINUSE_BIT | CINUSE_BIT))
+    /* Set size, cinuse and pinuse bit of this chunk */
+    #define set_size_and_pinuse_of_inuse_chunk(M, p, s) \
+      ((p)->head = (s | PINUSE_BIT | CINUSE_BIT))
 
-#else                                                            /* FOOTERS */
+  #else                                                          /* FOOTERS */
 
-  /* Set foot of inuse chunk to be xor of mstate and seed */
-  #define mark_inuse_foot(M, p, s)                 \
-    (((mchunkptr)((char *)(p) + (s)))->prev_foot = \
-         ((size_t)(M) ^ mparams.magic))
+    /* Set foot of inuse chunk to be xor of mstate and seed */
+    #define mark_inuse_foot(M, p, s)                 \
+      (((mchunkptr)((char *)(p) + (s)))->prev_foot = \
+           ((size_t)(M) ^ mparams.magic))
 
-  #define get_mstate_for(p)                                            \
-    ((mstate)(((mchunkptr)((char *)(p) + (chunksize(p))))->prev_foot ^ \
-              mparams.magic))
+    #define get_mstate_for(p)                                            \
+      ((mstate)(((mchunkptr)((char *)(p) + (chunksize(p))))->prev_foot ^ \
+                mparams.magic))
 
-  #define set_inuse(M, p, s)                                   \
-    ((p)->head = (((p)->head & PINUSE_BIT) | s | CINUSE_BIT),  \
-     (((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT), \
-     mark_inuse_foot(M, p, s))
+    #define set_inuse(M, p, s)                                   \
+      ((p)->head = (((p)->head & PINUSE_BIT) | s | CINUSE_BIT),  \
+       (((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT), \
+       mark_inuse_foot(M, p, s))
 
-  #define set_inuse_and_pinuse(M, p, s)                        \
-    ((p)->head = (s | PINUSE_BIT | CINUSE_BIT),                \
-     (((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT), \
-     mark_inuse_foot(M, p, s))
+    #define set_inuse_and_pinuse(M, p, s)                        \
+      ((p)->head = (s | PINUSE_BIT | CINUSE_BIT),                \
+       (((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT), \
+       mark_inuse_foot(M, p, s))
 
-  #define set_size_and_pinuse_of_inuse_chunk(M, p, s) \
-    ((p)->head = (s | PINUSE_BIT | CINUSE_BIT), mark_inuse_foot(M, p, s))
+    #define set_size_and_pinuse_of_inuse_chunk(M, p, s) \
+      ((p)->head = (s | PINUSE_BIT | CINUSE_BIT), mark_inuse_foot(M, p, s))
 
-#endif                                                          /* !FOOTERS */
+  #endif                                                        /* !FOOTERS */
 
 /* ---------------------------- setting mparams -------------------------- */
 
-#if LOCK_AT_FORK
+  #if LOCK_AT_FORK
 static void pre_fork(void) {
 
   ACQUIRE_LOCK(&(gm)->mutex);
@@ -3282,14 +3290,14 @@ static void post_fork_child(void) {
 
 }
 
-#endif                                                      /* LOCK_AT_FORK */
+  #endif                                                    /* LOCK_AT_FORK */
 
 /* Initialize mparams */
 static int init_mparams(void) {
 
-#ifdef NEED_GLOBAL_LOCK_INIT
+  #ifdef NEED_GLOBAL_LOCK_INIT
   if (malloc_global_mutex_status <= 0) init_malloc_global_mutex();
-#endif
+  #endif
 
   ACQUIRE_MALLOC_GLOBAL_LOCK();
   if (mparams.magic == 0) {
@@ -3298,10 +3306,10 @@ static int init_mparams(void) {
     size_t psize;
     size_t gsize;
 
-#ifndef WIN32
+  #ifndef WIN32
     psize = malloc_getpagesize;
     gsize = ((DEFAULT_GRANULARITY != 0) ? DEFAULT_GRANULARITY : psize);
-#else                                                              /* WIN32 */
+  #else                                                            /* WIN32 */
     {
 
       SYSTEM_INFO system_info;
@@ -3313,7 +3321,7 @@ static int init_mparams(void) {
 
     }
 
-#endif                                                             /* WIN32 */
+  #endif                                                           /* WIN32 */
 
     /* Sanity-check configuration:
        size_t must be unsigned and as wide as pointer type.
@@ -3332,25 +3340,25 @@ static int init_mparams(void) {
     mparams.page_size = psize;
     mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD;
     mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD;
-#if MORECORE_CONTIGUOUS
+  #if MORECORE_CONTIGUOUS
     mparams.default_mflags = USE_LOCK_BIT | USE_MMAP_BIT;
-#else                                                /* MORECORE_CONTIGUOUS */
+  #else                                              /* MORECORE_CONTIGUOUS */
     mparams.default_mflags =
         USE_LOCK_BIT | USE_MMAP_BIT | USE_NONCONTIGUOUS_BIT;
-#endif                                               /* MORECORE_CONTIGUOUS */
+  #endif                                             /* MORECORE_CONTIGUOUS */
 
-#if !ONLY_MSPACES
+  #if !ONLY_MSPACES
     /* Set up lock for main malloc area */
     gm->mflags = mparams.default_mflags;
     (void)INITIAL_LOCK(&gm->mutex);
-#endif
-#if LOCK_AT_FORK
+  #endif
+  #if LOCK_AT_FORK
     pthread_atfork(&pre_fork, &post_fork_parent, &post_fork_child);
-#endif
+  #endif
 
     {
 
-#if USE_DEV_RANDOM
+  #if USE_DEV_RANDOM
       int           fd;
       unsigned char buf[sizeof(size_t)];
       /* Try to use /dev/urandom, else fall back on using time */
@@ -3362,14 +3370,14 @@ static int init_mparams(void) {
 
       } else
 
-#endif                                                    /* USE_DEV_RANDOM */
-#ifdef WIN32
+  #endif                                                  /* USE_DEV_RANDOM */
+  #ifdef WIN32
         magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U);
-#elif defined(LACKS_TIME_H)
+  #elif defined(LACKS_TIME_H)
       magic = (size_t)&magic ^ (size_t)0x55555555U;
-#else
+  #else
       magic = (size_t)(time(0) ^ (size_t)0x55555555U);
-#endif
+  #endif
       magic |= (size_t)8U;                                /* ensure nonzero */
       magic &= ~(size_t)7U;      /* improve chances of fault for bad values */
       /* Until memory modes commonly available, use volatile-write */
@@ -3414,7 +3422,7 @@ static int change_mparam(int param_number, int value) {
 
 }
 
-#if DEBUG
+  #if DEBUG
 /* ------------------------- Debugging Support --------------------------- */
 
 /* Check properties of any chunk, whether free, inuse, mmapped etc  */
@@ -3751,11 +3759,11 @@ static void do_check_malloc_state(mstate m) {
 
 }
 
-#endif                                                             /* DEBUG */
+  #endif                                                           /* DEBUG */
 
 /* ----------------------------- statistics ------------------------------ */
 
-#if !NO_MALLINFO
+  #if !NO_MALLINFO
 static struct mallinfo internal_mallinfo(mstate m) {
 
   struct mallinfo nm = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
@@ -3810,9 +3818,9 @@ static struct mallinfo internal_mallinfo(mstate m) {
 
 }
 
-#endif                                                      /* !NO_MALLINFO */
+  #endif                                                    /* !NO_MALLINFO */
 
-#if !NO_MALLOC_STATS
+  #if !NO_MALLOC_STATS
 static void internal_malloc_stats(mstate m) {
 
   ensure_initialization();
@@ -3855,344 +3863,355 @@ static void internal_malloc_stats(mstate m) {
 
 }
 
-#endif                                                   /* NO_MALLOC_STATS */
+  #endif                                                 /* NO_MALLOC_STATS */
 
-/* ----------------------- Operations on smallbins ----------------------- */
+  /* ----------------------- Operations on smallbins ----------------------- */
 
-/*
-  Various forms of linking and unlinking are defined as macros.  Even
-  the ones for trees, which are very long but have very short typical
-  paths.  This is ugly but reduces reliance on inlining support of
-  compilers.
-*/
+  /*
+    Various forms of linking and unlinking are defined as macros.  Even
+    the ones for trees, which are very long but have very short typical
+    paths.  This is ugly but reduces reliance on inlining support of
+    compilers.
+  */
 
-/* Link a free chunk into a smallbin  */
-#define insert_small_chunk(M, P, S)         \
-  {                                         \
-                                            \
-    bindex_t  I = small_index(S);           \
-    mchunkptr B = smallbin_at(M, I);        \
-    mchunkptr F = B;                        \
-    assert(S >= MIN_CHUNK_SIZE);            \
-    if (!smallmap_is_marked(M, I))          \
-      mark_smallmap(M, I);                  \
-    else if (RTCHECK(ok_address(M, B->fd))) \
-      F = B->fd;                            \
-    else {                                  \
-                                            \
-      CORRUPTION_ERROR_ACTION(M);           \
-                                            \
-    }                                       \
-    B->fd = P;                              \
-    F->bk = P;                              \
-    P->fd = F;                              \
-    P->bk = B;                              \
-                                            \
-  }
+  /* Link a free chunk into a smallbin  */
+  #define insert_small_chunk(M, P, S)         \
+    {                                         \
+                                              \
+      bindex_t  I = small_index(S);           \
+      mchunkptr B = smallbin_at(M, I);        \
+      mchunkptr F = B;                        \
+      assert(S >= MIN_CHUNK_SIZE);            \
+      if (!smallmap_is_marked(M, I))          \
+        mark_smallmap(M, I);                  \
+      else if (RTCHECK(ok_address(M, B->fd))) \
+        F = B->fd;                            \
+      else {                                  \
+                                              \
+        CORRUPTION_ERROR_ACTION(M);           \
+                                              \
+      }                                       \
+      B->fd = P;                              \
+      F->bk = P;                              \
+      P->fd = F;                              \
+      P->bk = B;                              \
+                                              \
+    }
 
-/* Unlink a chunk from a smallbin  */
-#define unlink_small_chunk(M, P, S)                                            \
-  {                                                                            \
-                                                                               \
-    mchunkptr F = P->fd;                                                       \
-    mchunkptr B = P->bk;                                                       \
-    bindex_t  I = small_index(S);                                              \
-    assert(P != B);                                                            \
-    assert(P != F);                                                            \
-    assert(chunksize(P) == small_index2size(I));                               \
-    if (RTCHECK(F == smallbin_at(M, I) || (ok_address(M, F) && F->bk == P))) { \
-                                                                               \
-      if (B == F) {                                                            \
-                                                                               \
-        clear_smallmap(M, I);                                                  \
-                                                                               \
-      } else if (RTCHECK(B == smallbin_at(M, I) ||                             \
-                         (ok_address(M, B) && B->fd == P))) {                  \
-                                                                               \
-        F->bk = B;                                                             \
-        B->fd = F;                                                             \
-                                                                               \
-      } else {                                                                 \
-                                                                               \
-        CORRUPTION_ERROR_ACTION(M);                                            \
-                                                                               \
-      }                                                                        \
-                                                                               \
-    } else {                                                                   \
-                                                                               \
-      CORRUPTION_ERROR_ACTION(M);                                              \
-                                                                               \
-    }                                                                          \
-                                                                               \
-  }
+  /* Unlink a chunk from a smallbin  */
+  #define unlink_small_chunk(M, P, S)                           \
+    {                                                           \
+                                                                \
+      mchunkptr F = P->fd;                                      \
+      mchunkptr B = P->bk;                                      \
+      bindex_t  I = small_index(S);                             \
+      assert(P != B);                                           \
+      assert(P != F);                                           \
+      assert(chunksize(P) == small_index2size(I));              \
+      if (RTCHECK(F == smallbin_at(M, I) ||                     \
+                  (ok_address(M, F) && F->bk == P))) {          \
+                                                                \
+        if (B == F) {                                           \
+                                                                \
+          clear_smallmap(M, I);                                 \
+                                                                \
+        } else if (RTCHECK(B == smallbin_at(M, I) ||            \
+                                                                \
+                                                                \
+                           (ok_address(M, B) && B->fd == P))) { \
+                                                                \
+          F->bk = B;                                            \
+          B->fd = F;                                            \
+                                                                \
+        } else {                                                \
+                                                                \
+          CORRUPTION_ERROR_ACTION(M);                           \
+                                                                \
+        }                                                       \
+                                                                \
+      } else {                                                  \
+                                                                \
+        CORRUPTION_ERROR_ACTION(M);                             \
+                                                                \
+      }                                                         \
+                                                                \
+    }
 
-/* Unlink the first chunk from a smallbin */
-#define unlink_first_small_chunk(M, B, P, I)              \
-  {                                                       \
-                                                          \
-    mchunkptr F = P->fd;                                  \
-    assert(P != B);                                       \
-    assert(P != F);                                       \
-    assert(chunksize(P) == small_index2size(I));          \
-    if (B == F) {                                         \
-                                                          \
-      clear_smallmap(M, I);                               \
-                                                          \
-    } else if (RTCHECK(ok_address(M, F) && F->bk == P)) { \
-                                                          \
-      F->bk = B;                                          \
-      B->fd = F;                                          \
-                                                          \
-    } else {                                              \
-                                                          \
-      CORRUPTION_ERROR_ACTION(M);                         \
-                                                          \
-    }                                                     \
-                                                          \
-  }
+  /* Unlink the first chunk from a smallbin */
+  #define unlink_first_small_chunk(M, B, P, I)              \
+    {                                                       \
+                                                            \
+      mchunkptr F = P->fd;                                  \
+      assert(P != B);                                       \
+      assert(P != F);                                       \
+      assert(chunksize(P) == small_index2size(I));          \
+      if (B == F) {                                         \
+                                                            \
+        clear_smallmap(M, I);                               \
+                                                            \
+      } else if (RTCHECK(ok_address(M, F) && F->bk == P)) { \
+                                                            \
+        F->bk = B;                                          \
+        B->fd = F;                                          \
+                                                            \
+      } else {                                              \
+                                                            \
+        CORRUPTION_ERROR_ACTION(M);                         \
+                                                            \
+      }                                                     \
+                                                            \
+    }
 
-/* Replace dv node, binning the old one */
-/* Used only when dvsize known to be small */
-#define replace_dv(M, P, S)           \
-  {                                   \
-                                      \
-    size_t DVS = M->dvsize;           \
-    assert(is_small(DVS));            \
-    if (DVS != 0) {                   \
-                                      \
-      mchunkptr DV = M->dv;           \
-      insert_small_chunk(M, DV, DVS); \
-                                      \
-    }                                 \
-    M->dvsize = S;                    \
-    M->dv = P;                        \
-                                      \
-  }
+  /* Replace dv node, binning the old one */
+  /* Used only when dvsize known to be small */
+  #define replace_dv(M, P, S)           \
+    {                                   \
+                                        \
+      size_t DVS = M->dvsize;           \
+      assert(is_small(DVS));            \
+      if (DVS != 0) {                   \
+                                        \
+        mchunkptr DV = M->dv;           \
+        insert_small_chunk(M, DV, DVS); \
+                                        \
+      }                                 \
+      M->dvsize = S;                    \
+      M->dv = P;                        \
+                                        \
+    }
+
+  /* ------------------------- Operations on trees ------------------------- */
+
+  /* Insert chunk into tree */
+  #define insert_large_chunk(M, X, S)                                  \
+    {                                                                  \
+                                                                       \
+      tbinptr *H;                                                      \
+      bindex_t I;                                                      \
+      compute_tree_index(S, I);                                        \
+      H = treebin_at(M, I);                                            \
+      X->index = I;                                                    \
+      X->child[0] = X->child[1] = 0;                                   \
+      if (!treemap_is_marked(M, I)) {                                  \
+                                                                       \
+        mark_treemap(M, I);                                            \
+        *H = X;                                                        \
+        X->parent = (tchunkptr)H;                                      \
+        X->fd = X->bk = X;                                             \
+                                                                       \
+      } else {                                                         \
+                                                                       \
+        tchunkptr T = *H;                                              \
+        size_t    K = S << leftshift_for_tree_index(I);                \
+        for (;;) {                                                     \
+                                                                       \
+          if (chunksize(T) != S) {                                     \
+                                                                       \
+            tchunkptr *C =                                             \
+                &(T->child[(K >> (SIZE_T_BITSIZE - SIZE_T_ONE)) & 1]); \
+            K <<= 1;                                                   \
+            if (*C != 0)                                               \
+              T = *C;                                                  \
+            else if (RTCHECK(ok_address(M, C))) {                      \
+                                                                       \
+              *C = X;                                                  \
+              X->parent = T;                                           \
+              X->fd = X->bk = X;                                       \
+              break;                                                   \
+                                                                       \
+            } else {                                                   \
+                                                                       \
+              CORRUPTION_ERROR_ACTION(M);                              \
+              break;                                                   \
+                                                                       \
+            }                                                          \
+                                                                       \
+          } else {                                                     \
+                                                                       \
+            tchunkptr F = T->fd;                                       \
+            if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {       \
+                                                                       \
+              T->fd = F->bk = X;                                       \
+              X->fd = F;                                               \
+              X->bk = T;                                               \
+              X->parent = 0;                                           \
+              break;                                                   \
+                                                                       \
+            } else {                                                   \
+                                                                       \
+              CORRUPTION_ERROR_ACTION(M);                              \
+              break;                                                   \
+                                                                       \
+            }                                                          \
+                                                                       \
+          }                                                            \
+                                                                       \
+        }                                                              \
+                                                                       \
+      }                                                                \
+                                                                       \
+    }
 
-/* ------------------------- Operations on trees ------------------------- */
+/*
+  Unlink steps:
 
-/* Insert chunk into tree */
-#define insert_large_chunk(M, X, S)                                  \
-  {                                                                  \
+  1. If x is a chained node, unlink it from its same-sized fd/bk links
+     and choose its bk node as its replacement.
+  2. If x was the last node of its size, but not a leaf node, it must
+     be replaced with a leaf node (not merely one with an open left or
+     right), to make sure that lefts and rights of descendents
+     correspond properly to bit masks.  We use the rightmost descendent
+     of x.  We could use any other leaf, but this is easy to locate and
+     tends to counteract removal of leftmosts elsewhere, and so keeps
+     paths shorter than minimally guaranteed.  This doesn't loop much
+     because on average a node in a tree is near the bottom.
+  3. If x is the base of a chain (i.e., has parent links) relink
+     x's parent and children to x's replacement (or null if none).
+*/
+
+  #define unlink_large_chunk(M, X)                                   \
+    {                                                                \
+                                                                     \
+      tchunkptr XP = X->parent;                                      \
+      tchunkptr R;                                                   \
+      if (X->bk != X) {                                              \
                                                                      \
-    tbinptr *H;                                                      \
-    bindex_t I;                                                      \
-    compute_tree_index(S, I);                                        \
-    H = treebin_at(M, I);                                            \
-    X->index = I;                                                    \
-    X->child[0] = X->child[1] = 0;                                   \
-    if (!treemap_is_marked(M, I)) {                                  \
+        tchunkptr F = X->fd;                                         \
+        R = X->bk;                                                   \
+        if (RTCHECK(ok_address(M, F) && F->bk == X && R->fd == X)) { \
                                                                      \
-      mark_treemap(M, I);                                            \
-      *H = X;                                                        \
-      X->parent = (tchunkptr)H;                                      \
-      X->fd = X->bk = X;                                             \
+          F->bk = R;                                                 \
+          R->fd = F;                                                 \
                                                                      \
-    } else {                                                         \
+        } else {                                                     \
+                                                                     \
+          CORRUPTION_ERROR_ACTION(M);                                \
                                                                      \
-      tchunkptr T = *H;                                              \
-      size_t    K = S << leftshift_for_tree_index(I);                \
-      for (;;) {                                                     \
+        }                                                            \
                                                                      \
-        if (chunksize(T) != S) {                                     \
+      } else {                                                       \
                                                                      \
-          tchunkptr *C =                                             \
-              &(T->child[(K >> (SIZE_T_BITSIZE - SIZE_T_ONE)) & 1]); \
-          K <<= 1;                                                   \
-          if (*C != 0)                                               \
-            T = *C;                                                  \
-          else if (RTCHECK(ok_address(M, C))) {                      \
+        tchunkptr *RP;                                               \
+        if (((R = *(RP = &(X->child[1]))) != 0) ||                   \
+            ((R = *(RP = &(X->child[0]))) != 0)) {                   \
                                                                      \
-            *C = X;                                                  \
-            X->parent = T;                                           \
-            X->fd = X->bk = X;                                       \
-            break;                                                   \
+          tchunkptr *CP;                                             \
+          while ((*(CP = &(R->child[1])) != 0) ||                    \
+                 (*(CP = &(R->child[0])) != 0)) {                    \
                                                                      \
-          } else {                                                   \
+            R = *(RP = CP);                                          \
+                                                                     \
+          }                                                          \
+          if (RTCHECK(ok_address(M, RP)))                            \
+            *RP = 0;                                                 \
+          else {                                                     \
                                                                      \
             CORRUPTION_ERROR_ACTION(M);                              \
-            break;                                                   \
                                                                      \
           }                                                          \
                                                                      \
-        } else {                                                     \
+        }                                                            \
+                                                                     \
+      }                                                              \
+      if (XP != 0) {                                                 \
                                                                      \
-          tchunkptr F = T->fd;                                       \
-          if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {       \
+        tbinptr *H = treebin_at(M, X->index);                        \
+        if (X == *H) {                                               \
                                                                      \
-            T->fd = F->bk = X;                                       \
-            X->fd = F;                                               \
-            X->bk = T;                                               \
-            X->parent = 0;                                           \
-            break;                                                   \
+          if ((*H = R) == 0) clear_treemap(M, X->index);             \
                                                                      \
-          } else {                                                   \
+        } else if (RTCHECK(ok_address(M, XP))) {                     \
                                                                      \
-            CORRUPTION_ERROR_ACTION(M);                              \
-            break;                                                   \
+          if (XP->child[0] == X)                                     \
+            XP->child[0] = R;                                        \
+          else                                                       \
+            XP->child[1] = R;                                        \
+                                                                     \
+        } else                                                       \
+                                                                     \
+                                                                     \
+          CORRUPTION_ERROR_ACTION(M);                                \
+        if (R != 0) {                                                \
+                                                                     \
+          if (RTCHECK(ok_address(M, R))) {                           \
+                                                                     \
+            tchunkptr C0, C1;                                        \
+            R->parent = XP;                                          \
+            if ((C0 = X->child[0]) != 0) {                           \
+                                                                     \
+              if (RTCHECK(ok_address(M, C0))) {                      \
+                                                                     \
+                R->child[0] = C0;                                    \
+                C0->parent = R;                                      \
+                                                                     \
+              } else                                                 \
                                                                      \
-          }                                                          \
+                                                                     \
+                CORRUPTION_ERROR_ACTION(M);                          \
+                                                                     \
+            }                                                        \
+            if ((C1 = X->child[1]) != 0) {                           \
+                                                                     \
+              if (RTCHECK(ok_address(M, C1))) {                      \
+                                                                     \
+                R->child[1] = C1;                                    \
+                C1->parent = R;                                      \
+                                                                     \
+              } else                                                 \
+                                                                     \
+                                                                     \
+                CORRUPTION_ERROR_ACTION(M);                          \
+                                                                     \
+            }                                                        \
+                                                                     \
+          } else                                                     \
+                                                                     \
+                                                                     \
+            CORRUPTION_ERROR_ACTION(M);                              \
                                                                      \
         }                                                            \
                                                                      \
       }                                                              \
                                                                      \
-    }                                                                \
-                                                                     \
-  }
-
-/*
-  Unlink steps:
-
-  1. If x is a chained node, unlink it from its same-sized fd/bk links
-     and choose its bk node as its replacement.
-  2. If x was the last node of its size, but not a leaf node, it must
-     be replaced with a leaf node (not merely one with an open left or
-     right), to make sure that lefts and rights of descendents
-     correspond properly to bit masks.  We use the rightmost descendent
-     of x.  We could use any other leaf, but this is easy to locate and
-     tends to counteract removal of leftmosts elsewhere, and so keeps
-     paths shorter than minimally guaranteed.  This doesn't loop much
-     because on average a node in a tree is near the bottom.
-  3. If x is the base of a chain (i.e., has parent links) relink
-     x's parent and children to x's replacement (or null if none).
-*/
-
-#define unlink_large_chunk(M, X)                                   \
-  {                                                                \
-                                                                   \
-    tchunkptr XP = X->parent;                                      \
-    tchunkptr R;                                                   \
-    if (X->bk != X) {                                              \
-                                                                   \
-      tchunkptr F = X->fd;                                         \
-      R = X->bk;                                                   \
-      if (RTCHECK(ok_address(M, F) && F->bk == X && R->fd == X)) { \
-                                                                   \
-        F->bk = R;                                                 \
-        R->fd = F;                                                 \
-                                                                   \
-      } else {                                                     \
-                                                                   \
-        CORRUPTION_ERROR_ACTION(M);                                \
-                                                                   \
-      }                                                            \
-                                                                   \
-    } else {                                                       \
-                                                                   \
-      tchunkptr *RP;                                               \
-      if (((R = *(RP = &(X->child[1]))) != 0) ||                   \
-          ((R = *(RP = &(X->child[0]))) != 0)) {                   \
-                                                                   \
-        tchunkptr *CP;                                             \
-        while ((*(CP = &(R->child[1])) != 0) ||                    \
-               (*(CP = &(R->child[0])) != 0)) {                    \
-                                                                   \
-          R = *(RP = CP);                                          \
-                                                                   \
-        }                                                          \
-        if (RTCHECK(ok_address(M, RP)))                            \
-          *RP = 0;                                                 \
-        else {                                                     \
-                                                                   \
-          CORRUPTION_ERROR_ACTION(M);                              \
-                                                                   \
-        }                                                          \
-                                                                   \
-      }                                                            \
-                                                                   \
-    }                                                              \
-    if (XP != 0) {                                                 \
-                                                                   \
-      tbinptr *H = treebin_at(M, X->index);                        \
-      if (X == *H) {                                               \
-                                                                   \
-        if ((*H = R) == 0) clear_treemap(M, X->index);             \
-                                                                   \
-      } else if (RTCHECK(ok_address(M, XP))) {                     \
-                                                                   \
-        if (XP->child[0] == X)                                     \
-          XP->child[0] = R;                                        \
-        else                                                       \
-          XP->child[1] = R;                                        \
-                                                                   \
-      } else                                                       \
-        CORRUPTION_ERROR_ACTION(M);                                \
-      if (R != 0) {                                                \
-                                                                   \
-        if (RTCHECK(ok_address(M, R))) {                           \
-                                                                   \
-          tchunkptr C0, C1;                                        \
-          R->parent = XP;                                          \
-          if ((C0 = X->child[0]) != 0) {                           \
-                                                                   \
-            if (RTCHECK(ok_address(M, C0))) {                      \
-                                                                   \
-              R->child[0] = C0;                                    \
-              C0->parent = R;                                      \
-                                                                   \
-            } else                                                 \
-              CORRUPTION_ERROR_ACTION(M);                          \
-                                                                   \
-          }                                                        \
-          if ((C1 = X->child[1]) != 0) {                           \
-                                                                   \
-            if (RTCHECK(ok_address(M, C1))) {                      \
-                                                                   \
-              R->child[1] = C1;                                    \
-              C1->parent = R;                                      \
-                                                                   \
-            } else                                                 \
-              CORRUPTION_ERROR_ACTION(M);                          \
-                                                                   \
-          }                                                        \
-                                                                   \
-        } else                                                     \
-          CORRUPTION_ERROR_ACTION(M);                              \
-                                                                   \
-      }                                                            \
-                                                                   \
-    }                                                              \
-                                                                   \
-  }
+    }
 
 /* Relays to large vs small bin operations */
 
-#define insert_chunk(M, P, S)                         \
-  if (is_small(S)) insert_small_chunk(M, P, S) else { \
-                                                      \
-      tchunkptr TP = (tchunkptr)(P);                  \
-      insert_large_chunk(M, TP, S);                   \
-                                                      \
-    }
+  #define insert_chunk(M, P, S)                         \
+    if (is_small(S)) insert_small_chunk(M, P, S) else { \
+                                                        \
+        tchunkptr TP = (tchunkptr)(P);                  \
+        insert_large_chunk(M, TP, S);                   \
+                                                        \
+      }
 
-#define unlink_chunk(M, P, S)                         \
-  if (is_small(S)) unlink_small_chunk(M, P, S) else { \
-                                                      \
-      tchunkptr TP = (tchunkptr)(P);                  \
-      unlink_large_chunk(M, TP);                      \
-                                                      \
-    }
+  #define unlink_chunk(M, P, S)                         \
+    if (is_small(S)) unlink_small_chunk(M, P, S) else { \
+                                                        \
+        tchunkptr TP = (tchunkptr)(P);                  \
+        unlink_large_chunk(M, TP);                      \
+                                                        \
+      }
 
 /* Relays to internal calls to malloc/free from realloc, memalign etc */
 
-#if ONLY_MSPACES
-  #define internal_malloc(m, b) mspace_malloc(m, b)
-  #define internal_free(m, mem) mspace_free(m, mem);
-#else                                                       /* ONLY_MSPACES */
-  #if MSPACES
-    #define internal_malloc(m, b) \
-      ((m == gm) ? dlmalloc(b) : mspace_malloc(m, b))
-    #define internal_free(m, mem) \
-      if (m == gm)                \
-        dlfree(mem);              \
-      else                        \
-        mspace_free(m, mem);
-  #else                                                          /* MSPACES */
-    #define internal_malloc(m, b) dlmalloc(b)
-    #define internal_free(m, mem) dlfree(mem)
-  #endif                                                         /* MSPACES */
-#endif                                                      /* ONLY_MSPACES */
+  #if ONLY_MSPACES
+    #define internal_malloc(m, b) mspace_malloc(m, b)
+    #define internal_free(m, mem) mspace_free(m, mem);
+  #else                                                     /* ONLY_MSPACES */
+    #if MSPACES
+      #define internal_malloc(m, b) \
+        ((m == gm) ? dlmalloc(b) : mspace_malloc(m, b))
+      #define internal_free(m, mem) \
+        if (m == gm)                \
+          dlfree(mem);              \
+        else                        \
+          mspace_free(m, mem);
+    #else                                                        /* MSPACES */
+      #define internal_malloc(m, b) dlmalloc(b)
+      #define internal_free(m, mem) dlfree(mem)
+    #endif                                                       /* MSPACES */
+  #endif                                                    /* ONLY_MSPACES */
 
 /* -----------------------  Direct-mmapping chunks ----------------------- */
 
@@ -4317,7 +4336,7 @@ static void init_bins(mstate m) {
 
 }
 
-#if PROCEED_ON_ERROR
+  #if PROCEED_ON_ERROR
 
 /* default corruption action */
 static void reset_on_error(mstate m) {
@@ -4337,7 +4356,7 @@ static void reset_on_error(mstate m) {
 
 }
 
-#endif                                                  /* PROCEED_ON_ERROR */
+  #endif                                                /* PROCEED_ON_ERROR */
 
 /* Allocate chunk and prepend remainder with chunk in successor base. */
 static void *prepend_alloc(mstate m, char *newbase, char *oldbase, size_t nb) {
@@ -4629,11 +4648,11 @@ static void *sys_alloc(mstate m, size_t nb) {
       m->magic = mparams.magic;
       m->release_checks = MAX_RELEASE_CHECK_RATE;
       init_bins(m);
-#if !ONLY_MSPACES
+  #if !ONLY_MSPACES
       if (is_global(m))
         init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
       else
-#endif
+  #endif
       {
 
         /* Offset top by embedded malloc_state */
@@ -5099,36 +5118,36 @@ static void *tmalloc_small(mstate m, size_t nb) {
 
 }
 
-#if !ONLY_MSPACES
+  #if !ONLY_MSPACES
 
 void *dlmalloc(size_t bytes) {
 
-  /*
-     Basic algorithm:
-     If a small request (< 256 bytes minus per-chunk overhead):
-       1. If one exists, use a remainderless chunk in associated smallbin.
-          (Remainderless means that there are too few excess bytes to
-          represent as a chunk.)
-       2. If it is big enough, use the dv chunk, which is normally the
-          chunk adjacent to the one used for the most recent small request.
-       3. If one exists, split the smallest available chunk in a bin,
-          saving remainder in dv.
-       4. If it is big enough, use the top chunk.
-       5. If available, get memory from system and use it
-     Otherwise, for a large request:
-       1. Find the smallest available binned chunk that fits, and use it
-          if it is better fitting than dv chunk, splitting if necessary.
-       2. If better fitting than any binned chunk, use the dv chunk.
-       3. If it is big enough, use the top chunk.
-       4. If request size >= mmap threshold, try to directly mmap this chunk.
-       5. If available, get memory from system and use it
-
-     The ugly goto's here ensure that postaction occurs along all paths.
-  */
+    /*
+       Basic algorithm:
+       If a small request (< 256 bytes minus per-chunk overhead):
+         1. If one exists, use a remainderless chunk in associated smallbin.
+            (Remainderless means that there are too few excess bytes to
+            represent as a chunk.)
+         2. If it is big enough, use the dv chunk, which is normally the
+            chunk adjacent to the one used for the most recent small request.
+         3. If one exists, split the smallest available chunk in a bin,
+            saving remainder in dv.
+         4. If it is big enough, use the top chunk.
+         5. If available, get memory from system and use it
+       Otherwise, for a large request:
+         1. Find the smallest available binned chunk that fits, and use it
+            if it is better fitting than dv chunk, splitting if necessary.
+         2. If better fitting than any binned chunk, use the dv chunk.
+         3. If it is big enough, use the top chunk.
+         4. If request size >= mmap threshold, try to directly mmap this chunk.
+         5. If available, get memory from system and use it
+
+       The ugly goto's here ensure that postaction occurs along all paths.
+    */
 
-  #if USE_LOCKS
+    #if USE_LOCKS
   ensure_initialization();    /* initialize in sys_alloc if not using locks */
-  #endif
+    #endif
 
   if (!PREACTION(gm)) {
 
@@ -5275,7 +5294,7 @@ void dlfree(void *mem) {
   if (mem != 0) {
 
     mchunkptr p = mem2chunk(mem);
-  #if FOOTERS
+    #if FOOTERS
     mstate fm = get_mstate_for(p);
     if (!ok_magic(fm)) {
 
@@ -5284,9 +5303,9 @@ void dlfree(void *mem) {
 
     }
 
-  #else                                                          /* FOOTERS */
-    #define fm gm
-  #endif                                                         /* FOOTERS */
+    #else                                                        /* FOOTERS */
+      #define fm gm
+    #endif                                                       /* FOOTERS */
     if (!PREACTION(fm)) {
 
       check_inuse_chunk(fm, p);
@@ -5403,9 +5422,9 @@ void dlfree(void *mem) {
 
   }
 
-  #if !FOOTERS
-    #undef fm
-  #endif                                                         /* FOOTERS */
+    #if !FOOTERS
+      #undef fm
+    #endif                                                       /* FOOTERS */
 
 }
 
@@ -5429,7 +5448,7 @@ void *dlcalloc(size_t n_elements, size_t elem_size) {
 
 }
 
-#endif                                                     /* !ONLY_MSPACES */
+  #endif                                                   /* !ONLY_MSPACES */
 
 /* ------------ Internal support for realloc, memalign, etc -------------- */
 
@@ -5747,7 +5766,7 @@ static void **ialloc(mstate m, size_t n_elements, size_t *sizes, int opts,
 
   }
 
-#if DEBUG
+  #if DEBUG
   if (marray != chunks) {
 
     /* final element must have exactly exhausted chunk */
@@ -5768,7 +5787,7 @@ static void **ialloc(mstate m, size_t n_elements, size_t *sizes, int opts,
   for (i = 0; i != n_elements; ++i)
     check_inuse_chunk(m, mem2chunk(marray[i]));
 
-#endif                                                             /* DEBUG */
+  #endif                                                           /* DEBUG */
 
   POSTACTION(m);
   return marray;
@@ -5796,7 +5815,7 @@ static size_t internal_bulk_free(mstate m, void *array[], size_t nelem) {
 
         mchunkptr p = mem2chunk(mem);
         size_t    psize = chunksize(p);
-#if FOOTERS
+  #if FOOTERS
         if (get_mstate_for(p) != m) {
 
           ++unfreed;
@@ -5804,7 +5823,7 @@ static size_t internal_bulk_free(mstate m, void *array[], size_t nelem) {
 
         }
 
-#endif
+  #endif
         check_inuse_chunk(m, p);
         *a = 0;
         if (RTCHECK(ok_address(m, p) && ok_inuse(p))) {
@@ -5841,8 +5860,8 @@ static size_t internal_bulk_free(mstate m, void *array[], size_t nelem) {
 
 }
 
-/* Traversal */
-#if MALLOC_INSPECT_ALL
+  /* Traversal */
+  #if MALLOC_INSPECT_ALL
 static void internal_inspect_all(mstate m,
                                  void (*handler)(void *start, void *end,
                                                  size_t used_bytes,
@@ -5894,11 +5913,11 @@ static void internal_inspect_all(mstate m,
 
 }
 
-#endif                                                /* MALLOC_INSPECT_ALL */
+  #endif                                              /* MALLOC_INSPECT_ALL */
 
 /* ------------------ Exported realloc, memalign, etc -------------------- */
 
-#if !ONLY_MSPACES
+  #if !ONLY_MSPACES
 
 void *dlrealloc(void *oldmem, size_t bytes) {
 
@@ -5913,21 +5932,21 @@ void *dlrealloc(void *oldmem, size_t bytes) {
 
   }
 
-  #ifdef REALLOC_ZERO_BYTES_FREES
+    #ifdef REALLOC_ZERO_BYTES_FREES
   else if (bytes == 0) {
 
     dlfree(oldmem);
 
   }
 
-  #endif                                        /* REALLOC_ZERO_BYTES_FREES */
+    #endif                                      /* REALLOC_ZERO_BYTES_FREES */
   else {
 
     size_t    nb = request2size(bytes);
     mchunkptr oldp = mem2chunk(oldmem);
-  #if !FOOTERS
+    #if !FOOTERS
     mstate m = gm;
-  #else                                                          /* FOOTERS */
+    #else                                                        /* FOOTERS */
     mstate m = get_mstate_for(oldp);
     if (!ok_magic(m)) {
 
@@ -5936,7 +5955,7 @@ void *dlrealloc(void *oldmem, size_t bytes) {
 
     }
 
-  #endif                                                         /* FOOTERS */
+    #endif                                                       /* FOOTERS */
     if (!PREACTION(m)) {
 
       mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1);
@@ -5980,9 +5999,9 @@ void *dlrealloc_in_place(void *oldmem, size_t bytes) {
 
       size_t    nb = request2size(bytes);
       mchunkptr oldp = mem2chunk(oldmem);
-  #if !FOOTERS
+    #if !FOOTERS
       mstate m = gm;
-  #else                                                          /* FOOTERS */
+    #else                                                        /* FOOTERS */
       mstate m = get_mstate_for(oldp);
       if (!ok_magic(m)) {
 
@@ -5991,7 +6010,7 @@ void *dlrealloc_in_place(void *oldmem, size_t bytes) {
 
       }
 
-  #endif                                                         /* FOOTERS */
+    #endif                                                       /* FOOTERS */
       if (!PREACTION(m)) {
 
         mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0);
@@ -6091,7 +6110,7 @@ size_t dlbulk_free(void *array[], size_t nelem) {
 
 }
 
-  #if MALLOC_INSPECT_ALL
+    #if MALLOC_INSPECT_ALL
 void dlmalloc_inspect_all(void (*handler)(void *start, void *end,
                                           size_t used_bytes,
                                           void * callback_arg),
@@ -6107,7 +6126,7 @@ void dlmalloc_inspect_all(void (*handler)(void *start, void *end,
 
 }
 
-  #endif                                              /* MALLOC_INSPECT_ALL */
+    #endif                                            /* MALLOC_INSPECT_ALL */
 
 int dlmalloc_trim(size_t pad) {
 
@@ -6155,23 +6174,23 @@ size_t dlmalloc_set_footprint_limit(size_t bytes) {
 
 }
 
-  #if !NO_MALLINFO
+    #if !NO_MALLINFO
 struct mallinfo dlmallinfo(void) {
 
   return internal_mallinfo(gm);
 
 }
 
-  #endif                                                     /* NO_MALLINFO */
+    #endif                                                   /* NO_MALLINFO */
 
-  #if !NO_MALLOC_STATS
+    #if !NO_MALLOC_STATS
 void dlmalloc_stats() {
 
   internal_malloc_stats(gm);
 
 }
 
-  #endif                                                 /* NO_MALLOC_STATS */
+    #endif                                               /* NO_MALLOC_STATS */
 
 int dlmallopt(int param_number, int value) {
 
@@ -6192,11 +6211,11 @@ size_t dlmalloc_usable_size(void *mem) {
 
 }
 
-#endif                                                     /* !ONLY_MSPACES */
+  #endif                                                   /* !ONLY_MSPACES */
 
 /* ----------------------------- user mspaces ---------------------------- */
 
-#if MSPACES
+  #if MSPACES
 
 static mstate init_user_mstate(char *tbase, size_t tsize) {
 
@@ -6476,12 +6495,12 @@ void mspace_free(mspace msp, void *mem) {
   if (mem != 0) {
 
     mchunkptr p = mem2chunk(mem);
-  #if FOOTERS
+    #if FOOTERS
     mstate fm = get_mstate_for(p);
     (void)msp;                         /* placate people compiling -Wunused */
-  #else                                                          /* FOOTERS */
+    #else                                                        /* FOOTERS */
     mstate fm = (mstate)msp;
-  #endif                                                         /* FOOTERS */
+    #endif                                                       /* FOOTERS */
     if (!ok_magic(fm)) {
 
       USAGE_ERROR_ACTION(fm, p);
@@ -6648,21 +6667,21 @@ void *mspace_realloc(mspace msp, void *oldmem, size_t bytes) {
 
   }
 
-  #ifdef REALLOC_ZERO_BYTES_FREES
+    #ifdef REALLOC_ZERO_BYTES_FREES
   else if (bytes == 0) {
 
     mspace_free(msp, oldmem);
 
   }
 
-  #endif                                        /* REALLOC_ZERO_BYTES_FREES */
+    #endif                                      /* REALLOC_ZERO_BYTES_FREES */
   else {
 
     size_t    nb = request2size(bytes);
     mchunkptr oldp = mem2chunk(oldmem);
-  #if !FOOTERS
+    #if !FOOTERS
     mstate m = (mstate)msp;
-  #else                                                          /* FOOTERS */
+    #else                                                        /* FOOTERS */
     mstate m = get_mstate_for(oldp);
     if (!ok_magic(m)) {
 
@@ -6671,7 +6690,7 @@ void *mspace_realloc(mspace msp, void *oldmem, size_t bytes) {
 
     }
 
-  #endif                                                         /* FOOTERS */
+    #endif                                                       /* FOOTERS */
     if (!PREACTION(m)) {
 
       mchunkptr newp = try_realloc_chunk(m, oldp, nb, 1);
@@ -6715,9 +6734,9 @@ void *mspace_realloc_in_place(mspace msp, void *oldmem, size_t bytes) {
 
       size_t    nb = request2size(bytes);
       mchunkptr oldp = mem2chunk(oldmem);
-  #if !FOOTERS
+    #if !FOOTERS
       mstate m = (mstate)msp;
-  #else                                                          /* FOOTERS */
+    #else                                                        /* FOOTERS */
       mstate m = get_mstate_for(oldp);
       (void)msp;                       /* placate people compiling -Wunused */
       if (!ok_magic(m)) {
@@ -6727,7 +6746,7 @@ void *mspace_realloc_in_place(mspace msp, void *oldmem, size_t bytes) {
 
       }
 
-  #endif                                                         /* FOOTERS */
+    #endif                                                       /* FOOTERS */
       if (!PREACTION(m)) {
 
         mchunkptr newp = try_realloc_chunk(m, oldp, nb, 0);
@@ -6801,7 +6820,7 @@ size_t mspace_bulk_free(mspace msp, void *array[], size_t nelem) {
 
 }
 
-  #if MALLOC_INSPECT_ALL
+    #if MALLOC_INSPECT_ALL
 void mspace_inspect_all(mspace msp,
                         void (*handler)(void *start, void *end,
                                         size_t used_bytes, void *callback_arg),
@@ -6825,7 +6844,7 @@ void mspace_inspect_all(mspace msp,
 
 }
 
-  #endif                                              /* MALLOC_INSPECT_ALL */
+    #endif                                            /* MALLOC_INSPECT_ALL */
 
 int mspace_trim(mspace msp, size_t pad) {
 
@@ -6850,7 +6869,7 @@ int mspace_trim(mspace msp, size_t pad) {
 
 }
 
-  #if !NO_MALLOC_STATS
+    #if !NO_MALLOC_STATS
 void mspace_malloc_stats(mspace msp) {
 
   mstate ms = (mstate)msp;
@@ -6866,7 +6885,7 @@ void mspace_malloc_stats(mspace msp) {
 
 }
 
-  #endif                                                 /* NO_MALLOC_STATS */
+    #endif                                               /* NO_MALLOC_STATS */
 
 size_t mspace_footprint(mspace msp) {
 
@@ -6946,7 +6965,7 @@ size_t mspace_set_footprint_limit(mspace msp, size_t bytes) {
 
 }
 
-  #if !NO_MALLINFO
+    #if !NO_MALLINFO
 struct mallinfo mspace_mallinfo(mspace msp) {
 
   mstate ms = (mstate)msp;
@@ -6955,7 +6974,7 @@ struct mallinfo mspace_mallinfo(mspace msp) {
 
 }
 
-  #endif                                                     /* NO_MALLINFO */
+    #endif                                                   /* NO_MALLINFO */
 
 size_t mspace_usable_size(const void *mem) {
 
@@ -6976,7 +6995,7 @@ int mspace_mallopt(int param_number, int value) {
 
 }
 
-#endif                                                           /* MSPACES */
+  #endif                                                         /* MSPACES */
 
 /* -------------------- Alternative MORECORE functions ------------------- */
 
@@ -7305,3 +7324,5 @@ History:
 
 */
 
+#endif  // __GLIBC__
+
diff --git a/qemu_mode/libqasan/hooks.c b/qemu_mode/libqasan/hooks.c
index 3bb4cc42..0e6c3e08 100644
--- a/qemu_mode/libqasan/hooks.c
+++ b/qemu_mode/libqasan/hooks.c
@@ -26,6 +26,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "libqasan.h"
 #include "map_macro.h"
 
+ssize_t (*__lq_libc_write)(int, const void *, size_t);
+ssize_t (*__lq_libc_read)(int, void *, size_t);
 char *(*__lq_libc_fgets)(char *, int, FILE *);
 int (*__lq_libc_atoi)(const char *);
 long (*__lq_libc_atol)(const char *);
@@ -35,6 +37,8 @@ void __libqasan_init_hooks(void) {
 
   __libqasan_init_malloc();
 
+  __lq_libc_write = ASSERT_DLSYM(write);
+  __lq_libc_read = ASSERT_DLSYM(read);
   __lq_libc_fgets = ASSERT_DLSYM(fgets);
   __lq_libc_atoi = ASSERT_DLSYM(atoi);
   __lq_libc_atol = ASSERT_DLSYM(atol);
@@ -42,6 +46,32 @@ void __libqasan_init_hooks(void) {
 
 }
 
+ssize_t write(int fd, const void *buf, size_t count) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: write(%d, %p, %zu)\n", rtv, fd, buf, count);
+  QASAN_LOAD(buf, count);
+  ssize_t r = __lq_libc_write(fd, buf, count);
+  QASAN_DEBUG("\t\t = %zd\n", r);
+
+  return r;
+
+}
+
+ssize_t read(int fd, void *buf, size_t count) {
+
+  void *rtv = __builtin_return_address(0);
+
+  QASAN_DEBUG("%14p: read(%d, %p, %zu)\n", rtv, fd, buf, count);
+  QASAN_STORE(buf, count);
+  ssize_t r = __lq_libc_read(fd, buf, count);
+  QASAN_DEBUG("\t\t = %zd\n", r);
+
+  return r;
+
+}
+
 #ifdef __ANDROID__
 size_t malloc_usable_size(const void *ptr) {
 
@@ -54,7 +84,7 @@ size_t malloc_usable_size(void *ptr) {
 
   QASAN_DEBUG("%14p: malloc_usable_size(%p)\n", rtv, ptr);
   size_t r = __libqasan_malloc_usable_size((void *)ptr);
-  QASAN_DEBUG("\t\t = %ld\n", r);
+  QASAN_DEBUG("\t\t = %zu\n", r);
 
   return r;
 
@@ -64,7 +94,7 @@ void *malloc(size_t size) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: malloc(%ld)\n", rtv, size);
+  QASAN_DEBUG("%14p: malloc(%zu)\n", rtv, size);
   void *r = __libqasan_malloc(size);
   QASAN_DEBUG("\t\t = %p\n", r);
 
@@ -76,7 +106,7 @@ void *calloc(size_t nmemb, size_t size) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: calloc(%ld, %ld)\n", rtv, nmemb, size);
+  QASAN_DEBUG("%14p: calloc(%zu, %zu)\n", rtv, nmemb, size);
   void *r = __libqasan_calloc(nmemb, size);
   QASAN_DEBUG("\t\t = %p\n", r);
 
@@ -88,7 +118,7 @@ void *realloc(void *ptr, size_t size) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: realloc(%p, %ld)\n", rtv, ptr, size);
+  QASAN_DEBUG("%14p: realloc(%p, %zu)\n", rtv, ptr, size);
   void *r = __libqasan_realloc(ptr, size);
   QASAN_DEBUG("\t\t = %p\n", r);
 
@@ -100,7 +130,7 @@ int posix_memalign(void **memptr, size_t alignment, size_t size) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: posix_memalign(%p, %ld, %ld)\n", rtv, memptr, alignment,
+  QASAN_DEBUG("%14p: posix_memalign(%p, %zu, %zu)\n", rtv, memptr, alignment,
               size);
   int r = __libqasan_posix_memalign(memptr, alignment, size);
   QASAN_DEBUG("\t\t = %d [*memptr = %p]\n", r, *memptr);
@@ -113,7 +143,7 @@ void *memalign(size_t alignment, size_t size) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: memalign(%ld, %ld)\n", rtv, alignment, size);
+  QASAN_DEBUG("%14p: memalign(%zu, %zu)\n", rtv, alignment, size);
   void *r = __libqasan_memalign(alignment, size);
   QASAN_DEBUG("\t\t = %p\n", r);
 
@@ -125,7 +155,7 @@ void *aligned_alloc(size_t alignment, size_t size) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: aligned_alloc(%ld, %ld)\n", rtv, alignment, size);
+  QASAN_DEBUG("%14p: aligned_alloc(%zu, %zu)\n", rtv, alignment, size);
   void *r = __libqasan_aligned_alloc(alignment, size);
   QASAN_DEBUG("\t\t = %p\n", r);
 
@@ -137,7 +167,7 @@ void *valloc(size_t size) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: valloc(%ld)\n", rtv, size);
+  QASAN_DEBUG("%14p: valloc(%zu)\n", rtv, size);
   void *r = __libqasan_memalign(sysconf(_SC_PAGESIZE), size);
   QASAN_DEBUG("\t\t = %p\n", r);
 
@@ -149,7 +179,7 @@ void *pvalloc(size_t size) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: pvalloc(%ld)\n", rtv, size);
+  QASAN_DEBUG("%14p: pvalloc(%zu)\n", rtv, size);
   size_t page_size = sysconf(_SC_PAGESIZE);
   size = (size & (page_size - 1)) + page_size;
   void *r = __libqasan_memalign(page_size, size);
@@ -174,7 +204,9 @@ char *fgets(char *s, int size, FILE *stream) {
 
   QASAN_DEBUG("%14p: fgets(%p, %d, %p)\n", rtv, s, size, stream);
   QASAN_STORE(s, size);
+#ifndef __ANDROID__
   QASAN_LOAD(stream, sizeof(FILE));
+#endif
   char *r = __lq_libc_fgets(s, size, stream);
   QASAN_DEBUG("\t\t = %p\n", r);
 
@@ -186,7 +218,7 @@ int memcmp(const void *s1, const void *s2, size_t n) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: memcmp(%p, %p, %ld)\n", rtv, s1, s2, n);
+  QASAN_DEBUG("%14p: memcmp(%p, %p, %zu)\n", rtv, s1, s2, n);
   QASAN_LOAD(s1, n);
   QASAN_LOAD(s2, n);
   int r = __libqasan_memcmp(s1, s2, n);
@@ -200,7 +232,7 @@ void *memcpy(void *dest, const void *src, size_t n) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: memcpy(%p, %p, %ld)\n", rtv, dest, src, n);
+  QASAN_DEBUG("%14p: memcpy(%p, %p, %zu)\n", rtv, dest, src, n);
   QASAN_LOAD(src, n);
   QASAN_STORE(dest, n);
   void *r = __libqasan_memcpy(dest, src, n);
@@ -214,7 +246,7 @@ void *mempcpy(void *dest, const void *src, size_t n) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: mempcpy(%p, %p, %ld)\n", rtv, dest, src, n);
+  QASAN_DEBUG("%14p: mempcpy(%p, %p, %zu)\n", rtv, dest, src, n);
   QASAN_LOAD(src, n);
   QASAN_STORE(dest, n);
   void *r = (uint8_t *)__libqasan_memcpy(dest, src, n) + n;
@@ -228,7 +260,7 @@ void *memmove(void *dest, const void *src, size_t n) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: memmove(%p, %p, %ld)\n", rtv, dest, src, n);
+  QASAN_DEBUG("%14p: memmove(%p, %p, %zu)\n", rtv, dest, src, n);
   QASAN_LOAD(src, n);
   QASAN_STORE(dest, n);
   void *r = __libqasan_memmove(dest, src, n);
@@ -242,7 +274,7 @@ void *memset(void *s, int c, size_t n) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: memset(%p, %d, %ld)\n", rtv, s, c, n);
+  QASAN_DEBUG("%14p: memset(%p, %d, %zu)\n", rtv, s, c, n);
   QASAN_STORE(s, n);
   void *r = __libqasan_memset(s, c, n);
   QASAN_DEBUG("\t\t = %p\n", r);
@@ -255,7 +287,7 @@ void *memchr(const void *s, int c, size_t n) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: memchr(%p, %d, %ld)\n", rtv, s, c, n);
+  QASAN_DEBUG("%14p: memchr(%p, %d, %zu)\n", rtv, s, c, n);
   void *r = __libqasan_memchr(s, c, n);
   if (r == NULL)
     QASAN_LOAD(s, n);
@@ -271,7 +303,7 @@ void *memrchr(const void *s, int c, size_t n) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: memrchr(%p, %d, %ld)\n", rtv, s, c, n);
+  QASAN_DEBUG("%14p: memrchr(%p, %d, %zu)\n", rtv, s, c, n);
   QASAN_LOAD(s, n);
   void *r = __libqasan_memrchr(s, c, n);
   QASAN_DEBUG("\t\t = %p\n", r);
@@ -285,7 +317,7 @@ void *memmem(const void *haystack, size_t haystacklen, const void *needle,
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: memmem(%p, %ld, %p, %ld)\n", rtv, haystack, haystacklen,
+  QASAN_DEBUG("%14p: memmem(%p, %zu, %p, %zu)\n", rtv, haystack, haystacklen,
               needle, needlelen);
   QASAN_LOAD(haystack, haystacklen);
   QASAN_LOAD(needle, needlelen);
@@ -301,7 +333,7 @@ void bzero(void *s, size_t n) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: bzero(%p, %ld)\n", rtv, s, n);
+  QASAN_DEBUG("%14p: bzero(%p, %zu)\n", rtv, s, n);
   QASAN_STORE(s, n);
   __libqasan_memset(s, 0, n);
 
@@ -313,7 +345,7 @@ void explicit_bzero(void *s, size_t n) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: bzero(%p, %ld)\n", rtv, s, n);
+  QASAN_DEBUG("%14p: bzero(%p, %zu)\n", rtv, s, n);
   QASAN_STORE(s, n);
   __libqasan_memset(s, 0, n);
 
@@ -323,7 +355,7 @@ int bcmp(const void *s1, const void *s2, size_t n) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: bcmp(%p, %p, %ld)\n", rtv, s1, s2, n);
+  QASAN_DEBUG("%14p: bcmp(%p, %p, %zu)\n", rtv, s1, s2, n);
   QASAN_LOAD(s1, n);
   QASAN_LOAD(s2, n);
   int r = __libqasan_bcmp(s1, s2, n);
@@ -381,7 +413,7 @@ int strncasecmp(const char *s1, const char *s2, size_t n) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: strncasecmp(%p, %p, %ld)\n", rtv, s1, s2, n);
+  QASAN_DEBUG("%14p: strncasecmp(%p, %p, %zu)\n", rtv, s1, s2, n);
   size_t l1 = __libqasan_strnlen(s1, n);
   QASAN_LOAD(s1, l1);
   size_t l2 = __libqasan_strnlen(s2, n);
@@ -431,7 +463,7 @@ int strncmp(const char *s1, const char *s2, size_t n) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: strncmp(%p, %p, %ld)\n", rtv, s1, s2, n);
+  QASAN_DEBUG("%14p: strncmp(%p, %p, %zu)\n", rtv, s1, s2, n);
   size_t l1 = __libqasan_strnlen(s1, n);
   QASAN_LOAD(s1, l1);
   size_t l2 = __libqasan_strnlen(s2, n);
@@ -462,7 +494,7 @@ char *strncpy(char *dest, const char *src, size_t n) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: strncpy(%p, %p, %ld)\n", rtv, dest, src, n);
+  QASAN_DEBUG("%14p: strncpy(%p, %p, %zu)\n", rtv, dest, src, n);
   size_t l = __libqasan_strnlen(src, n);
   QASAN_STORE(dest, n);
   void *r;
@@ -521,7 +553,7 @@ size_t strlen(const char *s) {
   QASAN_DEBUG("%14p: strlen(%p)\n", rtv, s);
   size_t r = __libqasan_strlen(s);
   QASAN_LOAD(s, r + 1);
-  QASAN_DEBUG("\t\t = %ld\n", r);
+  QASAN_DEBUG("\t\t = %zu\n", r);
 
   return r;
 
@@ -531,10 +563,10 @@ size_t strnlen(const char *s, size_t n) {
 
   void *rtv = __builtin_return_address(0);
 
-  QASAN_DEBUG("%14p: strnlen(%p, %ld)\n", rtv, s, n);
+  QASAN_DEBUG("%14p: strnlen(%p, %zu)\n", rtv, s, n);
   size_t r = __libqasan_strnlen(s, n);
   QASAN_LOAD(s, r);
-  QASAN_DEBUG("\t\t = %ld\n", r);
+  QASAN_DEBUG("\t\t = %zu\n", r);
 
   return r;
 
@@ -621,7 +653,7 @@ size_t wcslen(const wchar_t *s) {
   QASAN_DEBUG("%14p: wcslen(%p)\n", rtv, s);
   size_t r = __libqasan_wcslen(s);
   QASAN_LOAD(s, sizeof(wchar_t) * (r + 1));
-  QASAN_DEBUG("\t\t = %ld\n", r);
+  QASAN_DEBUG("\t\t = %zu\n", r);
 
   return r;
 
diff --git a/qemu_mode/libqasan/libqasan.c b/qemu_mode/libqasan/libqasan.c
index 11b50270..9fc4ef7a 100644
--- a/qemu_mode/libqasan/libqasan.c
+++ b/qemu_mode/libqasan/libqasan.c
@@ -72,7 +72,7 @@ void __libqasan_print_maps(void) {
 
   QASAN_LOG("QEMU-AddressSanitizer (v%s)\n", QASAN_VERSTR);
   QASAN_LOG(
-      "Copyright (C) 2019-2020 Andrea Fioraldi <andreafioraldi@gmail.com>\n");
+      "Copyright (C) 2019-2021 Andrea Fioraldi <andreafioraldi@gmail.com>\n");
   QASAN_LOG("\n");
 
   if (__qasan_log) __libqasan_print_maps();
diff --git a/qemu_mode/libqasan/malloc.c b/qemu_mode/libqasan/malloc.c
index f8237826..6fe6fc8c 100644
--- a/qemu_mode/libqasan/malloc.c
+++ b/qemu_mode/libqasan/malloc.c
@@ -24,6 +24,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *******************************************************************************/
 
 #include "libqasan.h"
+#include <features.h>
 #include <errno.h>
 #include <stddef.h>
 #include <assert.h>
@@ -65,9 +66,26 @@ struct chunk_struct {
 
 };
 
+#ifdef __GLIBC__
+
+void *(*__lq_libc_malloc)(size_t);
+void (*__lq_libc_free)(void *);
+  #define backend_malloc __lq_libc_malloc
+  #define backend_free __lq_libc_free
+
+  #define TMP_ZONE_SIZE 4096
+static int           __tmp_alloc_zone_idx;
+static unsigned char __tmp_alloc_zone[TMP_ZONE_SIZE];
+
+#else
+
 // From dlmalloc.c
-void *dlmalloc(size_t);
-void  dlfree(void *);
+void *                    dlmalloc(size_t);
+void                      dlfree(void *);
+  #define backend_malloc dlmalloc
+  #define backend_free dlfree
+
+#endif
 
 int __libqasan_malloc_initialized;
 
@@ -102,9 +120,9 @@ static int quanratine_push(struct chunk_begin *ck) {
     quarantine_bytes -= tmp->requested_size;
 
     if (tmp->aligned_orig)
-      dlfree(tmp->aligned_orig);
+      backend_free(tmp->aligned_orig);
     else
-      dlfree(tmp);
+      backend_free(tmp);
 
   }
 
@@ -122,6 +140,11 @@ void __libqasan_init_malloc(void) {
 
   if (__libqasan_malloc_initialized) return;
 
+#ifdef __GLIBC__
+  __lq_libc_malloc = dlsym(RTLD_NEXT, "malloc");
+  __lq_libc_free = dlsym(RTLD_NEXT, "free");
+#endif
+
   LOCK_INIT(&quarantine_lock, PTHREAD_PROCESS_PRIVATE);
 
   __libqasan_malloc_initialized = 1;
@@ -136,19 +159,36 @@ size_t __libqasan_malloc_usable_size(void *ptr) {
   char *p = ptr;
   p -= sizeof(struct chunk_begin);
 
+  // Validate that the chunk marker is readable (a crude check
+  // to verify that ptr is a valid malloc region before we dereference it)
+  QASAN_LOAD(p, sizeof(struct chunk_begin) - REDZONE_SIZE);
   return ((struct chunk_begin *)p)->requested_size;
 
 }
 
 void *__libqasan_malloc(size_t size) {
 
-  if (!__libqasan_malloc_initialized) { __libqasan_init_malloc(); }
+  if (!__libqasan_malloc_initialized) {
+
+    __libqasan_init_malloc();
+
+#ifdef __GLIBC__
+    void *r = &__tmp_alloc_zone[__tmp_alloc_zone_idx];
+
+    if (size & (ALLOC_ALIGN_SIZE - 1))
+      __tmp_alloc_zone_idx +=
+          (size & ~(ALLOC_ALIGN_SIZE - 1)) + ALLOC_ALIGN_SIZE;
+    else
+      __tmp_alloc_zone_idx += size;
+
+    return r;
+#endif
 
-  if (!__libqasan_malloc_initialized) __libqasan_init_malloc();
+  }
 
   int state = QASAN_SWAP(QASAN_DISABLED);  // disable qasan for this thread
 
-  struct chunk_begin *p = dlmalloc(sizeof(struct chunk_struct) + size);
+  struct chunk_begin *p = backend_malloc(sizeof(struct chunk_struct) + size);
 
   QASAN_SWAP(state);
 
@@ -179,9 +219,18 @@ void __libqasan_free(void *ptr) {
 
   if (!ptr) return;
 
+#ifdef __GLIBC__
+  if (ptr >= (void *)__tmp_alloc_zone &&
+      ptr < ((void *)__tmp_alloc_zone + TMP_ZONE_SIZE))
+    return;
+#endif
+
   struct chunk_begin *p = ptr;
   p -= 1;
 
+  // Validate that the chunk marker is readable (a crude check
+  // to verify that ptr is a valid malloc region before we dereference it)
+  QASAN_LOAD(p, sizeof(struct chunk_begin) - REDZONE_SIZE);
   size_t n = p->requested_size;
 
   QASAN_STORE(ptr, n);
@@ -190,9 +239,9 @@ void __libqasan_free(void *ptr) {
   if (!quanratine_push(p)) {
 
     if (p->aligned_orig)
-      dlfree(p->aligned_orig);
+      backend_free(p->aligned_orig);
     else
-      dlfree(p);
+      backend_free(p);
 
   }
 
@@ -210,6 +259,17 @@ void *__libqasan_calloc(size_t nmemb, size_t size) {
 
   size *= nmemb;
 
+#ifdef __GLIBC__
+  if (!__libqasan_malloc_initialized) {
+
+    void *r = &__tmp_alloc_zone[__tmp_alloc_zone_idx];
+    __tmp_alloc_zone_idx += size;
+    return r;
+
+  }
+
+#endif
+
   char *p = __libqasan_malloc(size);
   if (!p) return NULL;
 
@@ -252,7 +312,7 @@ int __libqasan_posix_memalign(void **ptr, size_t align, size_t len) {
 
   int state = QASAN_SWAP(QASAN_DISABLED);  // disable qasan for this thread
 
-  char *orig = dlmalloc(sizeof(struct chunk_struct) + size);
+  char *orig = backend_malloc(sizeof(struct chunk_struct) + size);
 
   QASAN_SWAP(state);
 
diff --git a/qemu_mode/libqasan/uninstrument.c b/qemu_mode/libqasan/uninstrument.c
index e75a09eb..5bf841a3 100644
--- a/qemu_mode/libqasan/uninstrument.c
+++ b/qemu_mode/libqasan/uninstrument.c
@@ -1,7 +1,7 @@
 /*
 
 This code is DEPRECATED!
-I'm keeping it here cause maybe the unistrumentation of a function is needed
+I'm keeping it here cause maybe the uninstrumentation of a function is needed
 for some strange reason.
 
 */
diff --git a/qemu_mode/qemuafl b/qemu_mode/qemuafl
-Subproject 6ab6bf28decb3e36eee43ffbd4a3bfd052dbbb5
+Subproject 0fb212daab492411b3e323bc18a3074c1aecfd3
diff --git a/src/afl-analyze.c b/src/afl-analyze.c
index 20aef2da..e106cd31 100644
--- a/src/afl-analyze.c
+++ b/src/afl-analyze.c
@@ -212,7 +212,7 @@ static s32 write_to_file(u8 *path, u8 *mem, u32 len) {
 
   unlink(path);                                            /* Ignore errors */
 
-  ret = open(path, O_RDWR | O_CREAT | O_EXCL, 0600);
+  ret = open(path, O_RDWR | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
 
   if (ret < 0) { PFATAL("Unable to create '%s'", path); }
 
@@ -785,6 +785,7 @@ static void set_up_environment(void) {
          "abort_on_error=1:"
          "detect_leaks=0:"
          "allocator_may_return_null=1:"
+         "detect_odr_violation=0:"
          "symbolize=0:"
          "handle_segv=0:"
          "handle_sigbus=0:"
@@ -821,38 +822,7 @@ static void set_up_environment(void) {
 
     if (qemu_mode) {
 
-      u8 *qemu_preload = getenv("QEMU_SET_ENV");
-      u8 *afl_preload = getenv("AFL_PRELOAD");
-      u8 *buf;
-
-      s32 i, afl_preload_size = strlen(afl_preload);
-      for (i = 0; i < afl_preload_size; ++i) {
-
-        if (afl_preload[i] == ',') {
-
-          PFATAL(
-              "Comma (',') is not allowed in AFL_PRELOAD when -Q is "
-              "specified!");
-
-        }
-
-      }
-
-      if (qemu_preload) {
-
-        buf = alloc_printf("%s,LD_PRELOAD=%s,DYLD_INSERT_LIBRARIES=%s",
-                           qemu_preload, afl_preload, afl_preload);
-
-      } else {
-
-        buf = alloc_printf("LD_PRELOAD=%s,DYLD_INSERT_LIBRARIES=%s",
-                           afl_preload, afl_preload);
-
-      }
-
-      setenv("QEMU_SET_ENV", buf, 1);
-
-      ck_free(buf);
+      /* afl-qemu-trace takes care of converting AFL_PRELOAD. */
 
     } else {
 
@@ -1078,31 +1048,6 @@ int main(int argc, char **argv_orig, char **envp) {
 
   if (optind == argc || !in_file) { usage(argv[0]); }
 
-  if (qemu_mode && getenv("AFL_USE_QASAN")) {
-
-    u8 *preload = getenv("AFL_PRELOAD");
-    u8 *libqasan = get_libqasan_path(argv_orig[0]);
-
-    if (!preload) {
-
-      setenv("AFL_PRELOAD", libqasan, 0);
-
-    } else {
-
-      u8 *result = ck_alloc(strlen(libqasan) + strlen(preload) + 2);
-      strcpy(result, libqasan);
-      strcat(result, " ");
-      strcat(result, preload);
-
-      setenv("AFL_PRELOAD", result, 1);
-      ck_free(result);
-
-    }
-
-    ck_free(libqasan);
-
-  }
-
   map_size = get_map_size();
 
   use_hex_offsets = !!get_afl_env("AFL_ANALYZE_HEX");
diff --git a/src/afl-as.c b/src/afl-as.c
index 7de267a3..aebd0ac8 100644
--- a/src/afl-as.c
+++ b/src/afl-as.c
@@ -280,7 +280,7 @@ static void add_instrumentation(void) {
 
   }
 
-  outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, 0600);
+  outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, DEFAULT_PERMISSION);
 
   if (outfd < 0) { PFATAL("Unable to write to '%s'", modified_file); }
 
diff --git a/src/afl-cc.c b/src/afl-cc.c
index cf10d9a7..80fc0742 100644
--- a/src/afl-cc.c
+++ b/src/afl-cc.c
@@ -22,7 +22,7 @@
 #include "types.h"
 #include "debug.h"
 #include "alloc-inl.h"
-#include "llvm-ngram-coverage.h"
+#include "llvm-alternative-coverage.h"
 
 #include <stdio.h>
 #include <unistd.h>
@@ -50,7 +50,7 @@ static u8 **cc_params;                 /* Parameters passed to the real CC  */
 static u32  cc_par_cnt = 1;            /* Param count, including argv0      */
 static u8   clang_mode;                /* Invoked as afl-clang*?            */
 static u8   llvm_fullpath[PATH_MAX];
-static u8   instrument_mode, instrument_opt_mode, ngram_size, lto_mode;
+static u8   instrument_mode, instrument_opt_mode, ngram_size, ctx_k, lto_mode;
 static u8   compiler_mode, plusplus_mode, have_instr_env = 0;
 static u8   have_gcc, have_llvm, have_gcc_plugin, have_lto, have_instr_list = 0;
 static u8 * lto_flag = AFL_CLANG_FLTO, *argvnull;
@@ -73,7 +73,9 @@ enum {
   INSTRUMENT_GCC = 6,
   INSTRUMENT_CLANG = 7,
   INSTRUMENT_OPT_CTX = 8,
-  INSTRUMENT_OPT_NGRAM = 16
+  INSTRUMENT_OPT_NGRAM = 16,
+  INSTRUMENT_OPT_CALLER = 32,
+  INSTRUMENT_OPT_CTX_K = 64,
 
 };
 
@@ -88,7 +90,7 @@ char instrument_mode_string[18][18] = {
     "GCC",
     "CLANG",
     "CTX",
-    "",
+    "CALLER",
     "",
     "",
     "",
@@ -315,16 +317,9 @@ static void edit_params(u32 argc, char **argv, char **envp) {
   u8 fortify_set = 0, asan_set = 0, x_set = 0, bit_mode = 0, shared_linking = 0,
      preprocessor_only = 0, have_unroll = 0, have_o = 0, have_pic = 0,
      have_c = 0;
-  u8 *name;
 
   cc_params = ck_alloc((argc + 128) * sizeof(u8 *));
 
-  name = strrchr(argv[0], '/');
-  if (!name)
-    name = argv[0];
-  else
-    ++name;
-
   if (lto_mode) {
 
     if (lto_flag[0] != '-')
@@ -561,6 +556,11 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
     }
 
+#if LLVM_MAJOR >= 13
+    // fuck you llvm 13
+    cc_params[cc_par_cnt++] = "-fno-experimental-new-pass-manager";
+#endif
+
     if (lto_mode && !have_c) {
 
       u8 *ld_path = strdup(AFL_REAL_LD);
@@ -590,6 +590,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 #if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
   #ifdef __ANDROID__
         cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard";
+        instrument_mode != INSTRUMENT_LLVMNATIVE;
   #else
         if (have_instr_list) {
 
@@ -599,6 +600,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
                 "-fsanitize-coverage-allow/denylist, you can use "
                 "AFL_LLVM_ALLOWLIST/AFL_LLMV_DENYLIST instead.\n");
           cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard";
+          instrument_mode = INSTRUMENT_LLVMNATIVE;
 
         } else {
 
@@ -618,6 +620,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
               "Using unoptimized trace-pc-guard, upgrade to llvm 10.0.1+ for "
               "enhanced version.\n");
         cc_params[cc_par_cnt++] = "-fsanitize-coverage=trace-pc-guard";
+        instrument_mode = INSTRUMENT_LLVMNATIVE;
   #else
         FATAL("pcguard instrumentation requires llvm 4.0.1+");
   #endif
@@ -682,19 +685,49 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
   /* Detect stray -v calls from ./configure scripts. */
 
+  u8 skip_next = 0;
   while (--argc) {
 
     u8 *cur = *(++argv);
 
+    if (skip_next) {
+
+      skip_next = 0;
+      continue;
+
+    }
+
     if (!strncmp(cur, "--afl", 5)) continue;
     if (lto_mode && !strncmp(cur, "-fuse-ld=", 9)) continue;
     if (lto_mode && !strncmp(cur, "--ld-path=", 10)) continue;
     if (!strncmp(cur, "-fno-unroll", 11)) continue;
     if (strstr(cur, "afl-compiler-rt") || strstr(cur, "afl-llvm-rt")) continue;
-    if (!strcmp(cur, "-Wl,-z,defs") || !strcmp(cur, "-Wl,--no-undefined"))
+    if (!strcmp(cur, "-Wl,-z,defs") || !strcmp(cur, "-Wl,--no-undefined") ||
+        !strcmp(cur, "--no-undefined")) {
+
       continue;
-    if (!strncmp(cur, "-fsanitize=fuzzer-", strlen("-fsanitize=fuzzer-")) ||
-        !strncmp(cur, "-fsanitize-coverage", strlen("-fsanitize-coverage"))) {
+
+    }
+
+    if (!strcmp(cur, "-z")) {
+
+      u8 *param = *(argv + 1);
+      if (!strcmp(param, "defs")) {
+
+        skip_next = 1;
+        continue;
+
+      }
+
+    }
+
+    if ((!strncmp(cur, "-fsanitize=fuzzer-", strlen("-fsanitize=fuzzer-")) ||
+         !strncmp(cur, "-fsanitize-coverage", strlen("-fsanitize-coverage"))) &&
+        (strncmp(cur, "sanitize-coverage-allow",
+                 strlen("sanitize-coverage-allow")) &&
+         strncmp(cur, "sanitize-coverage-deny",
+                 strlen("sanitize-coverage-deny")) &&
+         instrument_mode != INSTRUMENT_LLVMNATIVE)) {
 
       if (!be_quiet) { WARNF("Found '%s' - stripping!", cur); }
       continue;
@@ -940,7 +973,10 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
   }
 
-  if (preprocessor_only) {
+  // prevent unnecessary build errors
+  cc_params[cc_par_cnt++] = "-Wno-unused-command-line-argument";
+
+  if (preprocessor_only || have_c) {
 
     /* In the preprocessor_only case (-E), we are not actually compiling at
        all but requesting the compiler to output preprocessed sources only.
@@ -959,18 +995,24 @@ static void edit_params(u32 argc, char **argv, char **envp) {
     switch (bit_mode) {
 
       case 0:
-        cc_params[cc_par_cnt++] =
-            alloc_printf("%s/afl-compiler-rt.o", obj_path);
+        if (!shared_linking)
+          cc_params[cc_par_cnt++] =
+              alloc_printf("%s/afl-compiler-rt.o", obj_path);
         if (lto_mode)
           cc_params[cc_par_cnt++] =
               alloc_printf("%s/afl-llvm-rt-lto.o", obj_path);
         break;
 
       case 32:
-        cc_params[cc_par_cnt++] =
-            alloc_printf("%s/afl-compiler-rt-32.o", obj_path);
-        if (access(cc_params[cc_par_cnt - 1], R_OK))
-          FATAL("-m32 is not supported by your compiler");
+        if (!shared_linking) {
+
+          cc_params[cc_par_cnt++] =
+              alloc_printf("%s/afl-compiler-rt-32.o", obj_path);
+          if (access(cc_params[cc_par_cnt - 1], R_OK))
+            FATAL("-m32 is not supported by your compiler");
+
+        }
+
         if (lto_mode) {
 
           cc_params[cc_par_cnt++] =
@@ -983,10 +1025,15 @@ static void edit_params(u32 argc, char **argv, char **envp) {
         break;
 
       case 64:
-        cc_params[cc_par_cnt++] =
-            alloc_printf("%s/afl-compiler-rt-64.o", obj_path);
-        if (access(cc_params[cc_par_cnt - 1], R_OK))
-          FATAL("-m64 is not supported by your compiler");
+        if (!shared_linking) {
+
+          cc_params[cc_par_cnt++] =
+              alloc_printf("%s/afl-compiler-rt-64.o", obj_path);
+          if (access(cc_params[cc_par_cnt - 1], R_OK))
+            FATAL("-m64 is not supported by your compiler");
+
+        }
+
         if (lto_mode) {
 
           cc_params[cc_par_cnt++] =
@@ -1001,20 +1048,17 @@ static void edit_params(u32 argc, char **argv, char **envp) {
     }
 
   #if !defined(__APPLE__) && !defined(__sun)
-    if (!shared_linking && !have_c)
+    if (!shared_linking)
       cc_params[cc_par_cnt++] =
           alloc_printf("-Wl,--dynamic-list=%s/dynamic_list.txt", obj_path);
   #endif
 
+  }
+
   #if defined(USEMMAP) && !defined(__HAIKU__)
-    if (!have_c) cc_params[cc_par_cnt++] = "-lrt";
+  cc_params[cc_par_cnt++] = "-lrt";
   #endif
 
-    // prevent unnecessary build errors
-    cc_params[cc_par_cnt++] = "-Wno-unused-command-line-argument";
-
-  }
-
 #endif
 
   cc_params[cc_par_cnt] = NULL;
@@ -1025,7 +1069,7 @@ static void edit_params(u32 argc, char **argv, char **envp) {
 
 int main(int argc, char **argv, char **envp) {
 
-  int   i;
+  int   i, passthrough = 0;
   char *callname = argv[0], *ptr = NULL;
 
   if (getenv("AFL_DEBUG")) {
@@ -1045,6 +1089,13 @@ int main(int argc, char **argv, char **envp) {
 
   }
 
+  if (getenv("AFL_PASSTHROUGH") || getenv("AFL_NOOPT")) {
+
+    passthrough = 1;
+    if (!debug) { be_quiet = 1; }
+
+  }
+
   if ((ptr = strrchr(callname, '/')) != NULL) callname = ptr + 1;
   argvnull = (u8 *)argv[0];
   check_environment_vars(envp);
@@ -1213,6 +1264,7 @@ int main(int argc, char **argv, char **envp) {
 
       } else if (strcasecmp(ptr, "LLVMNATIVE") == 0 ||
 
+                 strcasecmp(ptr, "NATIVE") == 0 ||
                  strcasecmp(ptr, "LLVM-NATIVE") == 0) {
 
         compiler_mode = LLVM;
@@ -1275,6 +1327,7 @@ int main(int argc, char **argv, char **envp) {
   }
 
   if (getenv("AFL_LLVM_CTX")) instrument_opt_mode |= INSTRUMENT_OPT_CTX;
+  if (getenv("AFL_LLVM_CALLER")) instrument_opt_mode |= INSTRUMENT_OPT_CALLER;
 
   if (getenv("AFL_LLVM_NGRAM_SIZE")) {
 
@@ -1288,6 +1341,26 @@ int main(int argc, char **argv, char **envp) {
 
   }
 
+  if (getenv("AFL_LLVM_CTX_K")) {
+
+    ctx_k = atoi(getenv("AFL_LLVM_CTX_K"));
+    if (ctx_k < 1 || ctx_k > CTX_MAX_K)
+      FATAL("K-CTX instrumentation mode must be between 1 and CTX_MAX_K (%u)",
+            CTX_MAX_K);
+    if (ctx_k == 1) {
+
+      setenv("AFL_LLVM_CALLER", "1", 1);
+      unsetenv("AFL_LLVM_CTX_K");
+      instrument_opt_mode |= INSTRUMENT_OPT_CALLER;
+
+    } else {
+
+      instrument_opt_mode |= INSTRUMENT_OPT_CTX_K;
+
+    }
+
+  }
+
   if (getenv("AFL_LLVM_INSTRUMENT")) {
 
     u8 *ptr2 = strtok(getenv("AFL_LLVM_INSTRUMENT"), ":,;");
@@ -1383,6 +1456,44 @@ int main(int argc, char **argv, char **envp) {
 
       }
 
+      if (strncasecmp(ptr2, "ctx-", strlen("ctx-")) == 0) {
+
+        u8 *ptr3 = ptr2 + strlen("ctx-");
+        while (*ptr3 && (*ptr3 < '0' || *ptr3 > '9'))
+          ptr3++;
+
+        if (!*ptr3) {
+
+          if ((ptr3 = getenv("AFL_LLVM_CTX_K")) == NULL)
+            FATAL(
+                "you must set the K-CTX K with (e.g. for value 2) "
+                "AFL_LLVM_INSTRUMENT=ctx-2");
+
+        }
+
+        ctx_k = atoi(ptr3);
+        if (ctx_k < 1 || ctx_k > CTX_MAX_K)
+          FATAL(
+              "K-CTX instrumentation option must be between 1 and CTX_MAX_K "
+              "(%u)",
+              CTX_MAX_K);
+
+        if (ctx_k == 1) {
+
+          instrument_opt_mode |= INSTRUMENT_OPT_CALLER;
+          setenv("AFL_LLVM_CALLER", "1", 1);
+          unsetenv("AFL_LLVM_CTX_K");
+
+        } else {
+
+          instrument_opt_mode |= (INSTRUMENT_OPT_CTX_K);
+          u8 *ptr4 = alloc_printf("%u", ctx_k);
+          setenv("AFL_LLVM_CTX_K", ptr4, 1);
+
+        }
+
+      }
+
       if (strncasecmp(ptr2, "ctx", strlen("ctx")) == 0) {
 
         instrument_opt_mode |= INSTRUMENT_OPT_CTX;
@@ -1390,6 +1501,13 @@ int main(int argc, char **argv, char **envp) {
 
       }
 
+      if (strncasecmp(ptr2, "caller", strlen("caller")) == 0) {
+
+        instrument_opt_mode |= INSTRUMENT_OPT_CALLER;
+        setenv("AFL_LLVM_CALLER", "1", 1);
+
+      }
+
       if (strncasecmp(ptr2, "ngram", strlen("ngram")) == 0) {
 
         u8 *ptr3 = ptr2 + strlen("ngram");
@@ -1423,6 +1541,35 @@ int main(int argc, char **argv, char **envp) {
 
   }
 
+  if ((instrument_opt_mode & INSTRUMENT_OPT_CTX) &&
+      (instrument_opt_mode & INSTRUMENT_OPT_CALLER)) {
+
+    FATAL("you cannot set CTX and CALLER together");
+
+  }
+
+  if ((instrument_opt_mode & INSTRUMENT_OPT_CTX) &&
+      (instrument_opt_mode & INSTRUMENT_OPT_CTX_K)) {
+
+    FATAL("you cannot set CTX and K-CTX together");
+
+  }
+
+  if ((instrument_opt_mode & INSTRUMENT_OPT_CALLER) &&
+      (instrument_opt_mode & INSTRUMENT_OPT_CTX_K)) {
+
+    FATAL("you cannot set CALLER and K-CTX together");
+
+  }
+
+  if (instrument_opt_mode && instrument_mode == INSTRUMENT_DEFAULT &&
+      (compiler_mode == LLVM || compiler_mode == UNSET)) {
+
+    instrument_mode = INSTRUMENT_CLASSIC;
+    compiler_mode = LLVM;
+
+  }
+
   if (!compiler_mode) {
 
     // lto is not a default because outside of afl-cc RANLIB and AR have to
@@ -1492,12 +1639,13 @@ int main(int argc, char **argv, char **envp) {
         "      CLASSIC              %s      no  yes     module yes yes    "
         "yes\n"
         "        - NORMAL\n"
+        "        - CALLER\n"
         "        - CTX\n"
         "        - NGRAM-{2-16}\n"
         "      INSTRIM                           no  yes     module yes yes "
         "   yes\n"
         "        - NORMAL\n"
-        "        - CTX\n"
+        "        - CALLER\n"
         "        - NGRAM-{2-16}\n"
         "  [GCC_PLUGIN] gcc plugin: %s%s\n"
         "      CLASSIC              DEFAULT      no  yes     no     no  no     "
@@ -1529,8 +1677,8 @@ int main(int argc, char **argv, char **envp) {
         "of afl-cc.\n\n");
 
 #if LLVM_MAJOR > 10 || (LLVM_MAJOR == 10 && LLVM_MINOR > 0)
-  #define NATIVE_MSG                                              \
-    "  NATIVE:  use llvm's native PCGUARD instrumentation (less " \
+  #define NATIVE_MSG                                                   \
+    "  LLVM-NATIVE:  use llvm's native PCGUARD instrumentation (less " \
     "performant)\n"
 #else
   #define NATIVE_MSG ""
@@ -1544,7 +1692,10 @@ int main(int argc, char **argv, char **envp) {
         NATIVE_MSG
 
         "  CLASSIC: decision target instrumentation (README.llvm.md)\n"
-        "  CTX:     CLASSIC + callee context (instrumentation/README.ctx.md)\n"
+        "  CALLER:  CLASSIC + single callee context "
+        "(instrumentation/README.ctx.md)\n"
+        "  CTX:     CLASSIC + full callee context "
+        "(instrumentation/README.ctx.md)\n"
         "  NGRAM-x: CLASSIC + previous path "
         "((instrumentation/README.ngram.md)\n"
         "  INSTRIM: Dominator tree (for LLVM <= 6.0) "
@@ -1587,8 +1738,11 @@ int main(int argc, char **argv, char **envp) {
           "  AFL_DONT_OPTIMIZE: disable optimization instead of -O3\n"
           "  AFL_NO_BUILTIN: no builtins for string compare functions (for "
           "libtokencap.so)\n"
+          "  AFL_NOOP: behave like a normal compiler (to pass configure "
+          "tests)\n"
           "  AFL_PATH: path to instrumenting pass and runtime  "
           "(afl-compiler-rt.*o)\n"
+          "  AFL_IGNORE_UNKNOWN_ENVS: don't warn on unknown env vars\n"
           "  AFL_INST_RATIO: percentage of branches to instrument\n"
           "  AFL_QUIET: suppress verbose output\n"
           "  AFL_HARDEN: adds code hardening to catch memory bugs\n"
@@ -1637,15 +1791,17 @@ int main(int argc, char **argv, char **envp) {
             "  AFL_LLVM_CMPLOG: log operands of comparisons (RedQueen "
             "mutator)\n"
             "  AFL_LLVM_INSTRUMENT: set instrumentation mode:\n"
-            "    CLASSIC, INSTRIM, PCGUARD, LTO, GCC, CLANG, CTX, NGRAM-2 ... "
-            "NGRAM-16\n"
+            "    CLASSIC, INSTRIM, PCGUARD, LTO, GCC, CLANG, CALLER, CTX, "
+            "NGRAM-2 ..-16\n"
             " You can also use the old environment variables instead:\n"
             "  AFL_LLVM_USE_TRACE_PC: use LLVM trace-pc-guard instrumentation\n"
             "  AFL_LLVM_INSTRIM: use light weight instrumentation InsTrim\n"
             "  AFL_LLVM_INSTRIM_LOOPHEAD: optimize loop tracing for speed "
             "(option to INSTRIM)\n"
-            "  AFL_LLVM_CTX: use context sensitive coverage (for CLASSIC and "
-            "INSTRIM)\n"
+            "  AFL_LLVM_CALLER: use single context sensitive coverage (for "
+            "CLASSIC)\n"
+            "  AFL_LLVM_CTX: use full context sensitive coverage (for "
+            "CLASSIC)\n"
             "  AFL_LLVM_NGRAM_SIZE: use ngram prev_loc count coverage (for "
             "CLASSIC & INSTRIM)\n");
 
@@ -1700,7 +1856,10 @@ int main(int argc, char **argv, char **envp) {
         "Do not be overwhelmed :) afl-cc uses good defaults if no options are "
         "selected.\n"
         "Read the documentation for FEATURES though, all are good but few are "
-        "defaults.\n\n");
+        "defaults.\n"
+        "Recommended is afl-clang-lto with AFL_LLVM_CMPLOG or afl-clang-fast "
+        "with\n"
+        "AFL_LLVM_CMPLOG and AFL_LLVM_DICT2FILE.\n\n");
 
     exit(1);
 
@@ -1760,7 +1919,7 @@ int main(int argc, char **argv, char **envp) {
   }
 
   if (instrument_opt_mode && compiler_mode != LLVM)
-    FATAL("CTX and NGRAM can only be used in LLVM mode");
+    FATAL("CTX, CALLER and NGRAM can only be used in LLVM mode");
 
   if (!instrument_opt_mode) {
 
@@ -1770,15 +1929,18 @@ int main(int argc, char **argv, char **envp) {
 
   } else {
 
-    if (instrument_opt_mode == INSTRUMENT_OPT_CTX)
+    char *ptr2 = alloc_printf(" + NGRAM-%u", ngram_size);
+    char *ptr3 = alloc_printf(" + K-CTX-%u", ctx_k);
 
-      ptr = alloc_printf("%s + CTX", instrument_mode_string[instrument_mode]);
-    else if (instrument_opt_mode == INSTRUMENT_OPT_NGRAM)
-      ptr = alloc_printf("%s + NGRAM-%u",
-                         instrument_mode_string[instrument_mode], ngram_size);
-    else
-      ptr = alloc_printf("%s + CTX + NGRAM-%u",
-                         instrument_mode_string[instrument_mode], ngram_size);
+    ptr = alloc_printf(
+        "%s%s%s%s%s", instrument_mode_string[instrument_mode],
+        (instrument_opt_mode & INSTRUMENT_OPT_CTX) ? " + CTX" : "",
+        (instrument_opt_mode & INSTRUMENT_OPT_CALLER) ? " + CALLER" : "",
+        (instrument_opt_mode & INSTRUMENT_OPT_NGRAM) ? ptr2 : "",
+        (instrument_opt_mode & INSTRUMENT_OPT_CTX_K) ? ptr3 : "");
+
+    ck_free(ptr2);
+    ck_free(ptr3);
 
   }
 
@@ -1789,11 +1951,14 @@ int main(int argc, char **argv, char **envp) {
         "(requires LLVM 11 or higher)");
 #endif
 
-  if (instrument_opt_mode && instrument_mode != INSTRUMENT_CLASSIC &&
-      instrument_mode != INSTRUMENT_CFG)
+  if (instrument_opt_mode && instrument_mode == INSTRUMENT_CFG &&
+      instrument_opt_mode & INSTRUMENT_OPT_CTX)
+    FATAL("CFG instrumentation mode supports NGRAM and CALLER, but not CTX.");
+  else if (instrument_opt_mode && instrument_mode != INSTRUMENT_CLASSIC)
+    // we will drop CFG/INSTRIM in the future so do not advertise
     FATAL(
-        "CTX and NGRAM instrumentation options can only be used with CFG "
-        "(recommended) and CLASSIC instrumentation modes!");
+        "CALLER, CTX and NGRAM instrumentation options can only be used with "
+        "the LLVM CLASSIC instrumentation mode.");
 
   if (getenv("AFL_LLVM_SKIP_NEVERZERO") && getenv("AFL_LLVM_NOT_ZERO"))
     FATAL(
@@ -1840,6 +2005,8 @@ int main(int argc, char **argv, char **envp) {
     for (i = 0; i < argc; i++)
       SAYF(" '%s'", argv[i]);
     SAYF("\n");
+    fflush(stdout);
+    fflush(stderr);
 
   }
 
@@ -1880,10 +2047,21 @@ int main(int argc, char **argv, char **envp) {
     for (i = 0; i < (s32)cc_par_cnt; i++)
       SAYF(" '%s'", cc_params[i]);
     SAYF("\n");
+    fflush(stdout);
+    fflush(stderr);
 
   }
 
-  execvp(cc_params[0], (char **)cc_params);
+  if (passthrough) {
+
+    argv[0] = cc_params[0];
+    execvp(cc_params[0], (char **)argv);
+
+  } else {
+
+    execvp(cc_params[0], (char **)cc_params);
+
+  }
 
   FATAL("Oops, failed to execute '%s' - check your PATH", cc_params[0]);
 
diff --git a/src/afl-common.c b/src/afl-common.c
index 1cc7f462..1f9839a2 100644
--- a/src/afl-common.c
+++ b/src/afl-common.c
@@ -47,6 +47,10 @@ u8  be_quiet = 0;
 u8 *doc_path = "";
 u8  last_intr = 0;
 
+#ifndef AFL_PATH
+  #define AFL_PATH "/usr/local/lib/afl/"
+#endif
+
 void detect_file_args(char **argv, u8 *prog_in, bool *use_stdin) {
 
   u32 i = 0;
@@ -66,31 +70,26 @@ void detect_file_args(char **argv, u8 *prog_in, bool *use_stdin) {
 
       *use_stdin = false;
 
-      if (prog_in[0] != 0) {  // not afl-showmap special case
-
-        u8 *n_arg;
-
-        /* Be sure that we're always using fully-qualified paths. */
+      /* Be sure that we're always using fully-qualified paths. */
 
-        *aa_loc = 0;
+      *aa_loc = 0;
 
-        /* Construct a replacement argv value. */
+      /* Construct a replacement argv value. */
+      u8 *n_arg;
 
-        if (prog_in[0] == '/') {
+      if (prog_in[0] == '/') {
 
-          n_arg = alloc_printf("%s%s%s", argv[i], prog_in, aa_loc + 2);
+        n_arg = alloc_printf("%s%s%s", argv[i], prog_in, aa_loc + 2);
 
-        } else {
-
-          n_arg = alloc_printf("%s%s/%s%s", argv[i], cwd, prog_in, aa_loc + 2);
-
-        }
+      } else {
 
-        ck_free(argv[i]);
-        argv[i] = n_arg;
+        n_arg = alloc_printf("%s%s/%s%s", argv[i], cwd, prog_in, aa_loc + 2);
 
       }
 
+      ck_free(argv[i]);
+      argv[i] = n_arg;
+
     }
 
     i++;
@@ -145,9 +144,14 @@ void argv_cpy_free(char **argv) {
 
 char **get_qemu_argv(u8 *own_loc, u8 **target_path_p, int argc, char **argv) {
 
-  if (!unlikely(own_loc)) { FATAL("BUG: param own_loc is NULL"); }
+  if (unlikely(getenv("AFL_QEMU_CUSTOM_BIN"))) {
 
-  u8 *tmp, *cp = NULL, *rsl, *own_copy;
+    WARNF(
+        "AFL_QEMU_CUSTOM_BIN is enabled. "
+        "You must run your target under afl-qemu-trace on your own!");
+    return argv;
+
+  }
 
   char **new_argv = ck_alloc(sizeof(char *) * (argc + 4));
   if (unlikely(!new_argv)) { FATAL("Illegal amount of arguments specified"); }
@@ -160,70 +164,8 @@ char **get_qemu_argv(u8 *own_loc, u8 **target_path_p, int argc, char **argv) {
 
   /* Now we need to actually find the QEMU binary to put in argv[0]. */
 
-  tmp = getenv("AFL_PATH");
-
-  if (tmp) {
-
-    cp = alloc_printf("%s/afl-qemu-trace", tmp);
-
-    if (access(cp, X_OK)) { FATAL("Unable to find '%s'", tmp); }
-
-    *target_path_p = new_argv[0] = cp;
-    return new_argv;
-
-  }
-
-  own_copy = ck_strdup(own_loc);
-  rsl = strrchr(own_copy, '/');
-
-  if (rsl) {
-
-    *rsl = 0;
-
-    cp = alloc_printf("%s/afl-qemu-trace", own_copy);
-    ck_free(own_copy);
-
-    if (!access(cp, X_OK)) {
-
-      *target_path_p = new_argv[0] = cp;
-      return new_argv;
-
-    }
-
-  } else {
-
-    ck_free(own_copy);
-
-  }
-
-  if (!access(BIN_PATH "/afl-qemu-trace", X_OK)) {
-
-    if (cp) { ck_free(cp); }
-    *target_path_p = new_argv[0] = ck_strdup(BIN_PATH "/afl-qemu-trace");
-
-    return new_argv;
-
-  }
-
-  SAYF("\n" cLRD "[-] " cRST
-       "Oops, unable to find the 'afl-qemu-trace' binary. The binary must be "
-       "built\n"
-       "    separately by following the instructions in "
-       "qemu_mode/README.md. "
-       "If you\n"
-       "    already have the binary installed, you may need to specify "
-       "AFL_PATH in the\n"
-       "    environment.\n\n"
-
-       "    Of course, even without QEMU, afl-fuzz can still work with "
-       "binaries that are\n"
-       "    instrumented at compile time with afl-gcc. It is also possible to "
-       "use it as a\n"
-       "    traditional non-instrumented fuzzer by specifying '-n' in the "
-       "command "
-       "line.\n");
-
-  FATAL("Failed to locate 'afl-qemu-trace'.");
+  *target_path_p = new_argv[0] = find_afl_binary(own_loc, "afl-qemu-trace");
+  return new_argv;
 
 }
 
@@ -231,10 +173,6 @@ char **get_qemu_argv(u8 *own_loc, u8 **target_path_p, int argc, char **argv) {
 
 char **get_wine_argv(u8 *own_loc, u8 **target_path_p, int argc, char **argv) {
 
-  if (!unlikely(own_loc)) { FATAL("BUG: param own_loc is NULL"); }
-
-  u8 *tmp, *cp = NULL, *rsl, *own_copy;
-
   char **new_argv = ck_alloc(sizeof(char *) * (argc + 3));
   if (unlikely(!new_argv)) { FATAL("Illegal amount of arguments specified"); }
 
@@ -245,152 +183,10 @@ char **get_wine_argv(u8 *own_loc, u8 **target_path_p, int argc, char **argv) {
 
   /* Now we need to actually find the QEMU binary to put in argv[0]. */
 
-  tmp = getenv("AFL_PATH");
-
-  if (tmp) {
-
-    cp = alloc_printf("%s/afl-qemu-trace", tmp);
-
-    if (access(cp, X_OK)) { FATAL("Unable to find '%s'", tmp); }
-
-    ck_free(cp);
-
-    cp = alloc_printf("%s/afl-wine-trace", tmp);
-
-    if (access(cp, X_OK)) { FATAL("Unable to find '%s'", tmp); }
-
-    *target_path_p = new_argv[0] = cp;
-    return new_argv;
-
-  }
-
-  own_copy = ck_strdup(own_loc);
-  rsl = strrchr(own_copy, '/');
-
-  if (rsl) {
-
-    *rsl = 0;
-
-    cp = alloc_printf("%s/afl-qemu-trace", own_copy);
-
-    if (cp && !access(cp, X_OK)) {
-
-      ck_free(cp);
-
-      cp = alloc_printf("%s/afl-wine-trace", own_copy);
-
-      if (!access(cp, X_OK)) {
-
-        *target_path_p = new_argv[0] = cp;
-        return new_argv;
-
-      }
-
-    }
-
-    ck_free(own_copy);
-
-  } else {
-
-    ck_free(own_copy);
-
-  }
-
-  u8 *ncp = BIN_PATH "/afl-qemu-trace";
-
-  if (!access(ncp, X_OK)) {
-
-    ncp = BIN_PATH "/afl-wine-trace";
-
-    if (!access(ncp, X_OK)) {
-
-      *target_path_p = new_argv[0] = ck_strdup(ncp);
-      return new_argv;
-
-    }
-
-  }
-
-  SAYF("\n" cLRD "[-] " cRST
-       "Oops, unable to find the '%s' binary. The binary must be "
-       "built\n"
-       "    separately by following the instructions in "
-       "qemu_mode/README.md. "
-       "If you\n"
-       "    already have the binary installed, you may need to specify "
-       "AFL_PATH in the\n"
-       "    environment.\n\n"
-
-       "    Of course, even without QEMU, afl-fuzz can still work with "
-       "binaries that are\n"
-       "    instrumented at compile time with afl-gcc. It is also possible to "
-       "use it as a\n"
-       "    traditional non-instrumented fuzzer by specifying '-n' in the "
-       "command "
-       "line.\n",
-       ncp);
-
-  FATAL("Failed to locate '%s'.", ncp);
-
-}
-
-/* Get libqasan path. */
-
-u8 *get_libqasan_path(u8 *own_loc) {
-
-  if (!unlikely(own_loc)) { FATAL("BUG: param own_loc is NULL"); }
-
-  u8 *tmp, *cp = NULL, *rsl, *own_copy;
-
-  tmp = getenv("AFL_PATH");
-
-  if (tmp) {
-
-    cp = alloc_printf("%s/libqasan.so", tmp);
-
-    if (access(cp, X_OK)) { FATAL("Unable to find '%s'", tmp); }
-
-    return cp;
-
-  }
-
-  own_copy = ck_strdup(own_loc);
-  rsl = strrchr(own_copy, '/');
-
-  if (rsl) {
-
-    *rsl = 0;
-
-    cp = alloc_printf("%s/libqasan.so", own_copy);
-    ck_free(own_copy);
-
-    if (!access(cp, X_OK)) { return cp; }
-
-  } else {
-
-    ck_free(own_copy);
-
-  }
-
-  if (!access(BIN_PATH "/libqasan.so", X_OK)) {
-
-    if (cp) { ck_free(cp); }
-
-    return ck_strdup(BIN_PATH "/libqasan.so");
-
-  }
-
-  SAYF("\n" cLRD "[-] " cRST
-       "Oops, unable to find the 'libqasan.so' binary. The binary must be "
-       "built\n"
-       "    separately by following the instructions in "
-       "qemu_mode/libqasan/README.md. "
-       "If you\n"
-       "    already have the binary installed, you may need to specify "
-       "AFL_PATH in the\n"
-       "    environment.\n");
-
-  FATAL("Failed to locate 'libqasan.so'.");
+  u8 *tmp = find_afl_binary(own_loc, "afl-qemu-trace");
+  ck_free(tmp);
+  *target_path_p = new_argv[0] = find_afl_binary(own_loc, "afl-wine-trace");
+  return new_argv;
 
 }
 
@@ -484,6 +280,70 @@ u8 *find_binary(u8 *fname) {
 
 }
 
+u8 *find_afl_binary(u8 *own_loc, u8 *fname) {
+
+  u8 *afl_path = NULL, *target_path, *own_copy;
+
+  if ((afl_path = getenv("AFL_PATH"))) {
+
+    target_path = alloc_printf("%s/%s", afl_path, fname);
+    if (!access(target_path, X_OK)) {
+
+      return target_path;
+
+    } else {
+
+      ck_free(target_path);
+
+    }
+
+  }
+
+  if (own_loc) {
+
+    own_copy = ck_strdup(own_loc);
+    u8 *rsl = strrchr(own_copy, '/');
+
+    if (rsl) {
+
+      *rsl = 0;
+
+      target_path = alloc_printf("%s/%s", own_copy, fname);
+      ck_free(own_copy);
+
+      if (!access(target_path, X_OK)) {
+
+        return target_path;
+
+      } else {
+
+        ck_free(target_path);
+
+      }
+
+    } else {
+
+      ck_free(own_copy);
+
+    }
+
+  }
+
+  target_path = alloc_printf("%s/%s", BIN_PATH, fname);
+  if (!access(target_path, X_OK)) {
+
+    return target_path;
+
+  } else {
+
+    ck_free(target_path);
+
+  }
+
+  return find_binary(fname);
+
+}
+
 /* Parses the kill signal environment variable, FATALs on error.
   If the env is not set, sets the env to default_signal for the signal handlers
   and returns the default_signal. */
@@ -518,12 +378,147 @@ int parse_afl_kill_signal_env(u8 *afl_kill_signal_env, int default_signal) {
 
 }
 
+static inline unsigned int helper_min3(unsigned int a, unsigned int b,
+                                       unsigned int c) {
+
+  return a < b ? (a < c ? a : c) : (b < c ? b : c);
+
+}
+
+// from
+// https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#C
+static int string_distance_levenshtein(char *s1, char *s2) {
+
+  unsigned int s1len, s2len, x, y, lastdiag, olddiag;
+  s1len = strlen(s1);
+  s2len = strlen(s2);
+  unsigned int column[s1len + 1];
+  column[s1len] = 1;
+
+  for (y = 1; y <= s1len; y++)
+    column[y] = y;
+  for (x = 1; x <= s2len; x++) {
+
+    column[0] = x;
+    for (y = 1, lastdiag = x - 1; y <= s1len; y++) {
+
+      olddiag = column[y];
+      column[y] = helper_min3(column[y] + 1, column[y - 1] + 1,
+                              lastdiag + (s1[y - 1] == s2[x - 1] ? 0 : 1));
+      lastdiag = olddiag;
+
+    }
+
+  }
+
+  return column[s1len];
+
+}
+
+#define ENV_SIMILARITY_TRESHOLD 3
+
+void print_suggested_envs(char *mispelled_env) {
+
+  size_t env_name_len =
+      strcspn(mispelled_env, "=") - 4;  // remove the AFL_prefix
+  char *env_name = ck_alloc(env_name_len + 1);
+  memcpy(env_name, mispelled_env + 4, env_name_len);
+
+  char *seen = ck_alloc(sizeof(afl_environment_variables) / sizeof(char *));
+  int   found = 0;
+
+  int j;
+  for (j = 0; afl_environment_variables[j] != NULL; ++j) {
+
+    char *afl_env = afl_environment_variables[j] + 4;
+    int   distance = string_distance_levenshtein(afl_env, env_name);
+    if (distance < ENV_SIMILARITY_TRESHOLD && seen[j] == 0) {
+
+      SAYF("Did you mean %s?\n", afl_environment_variables[j]);
+      seen[j] = 1;
+      found = 1;
+
+    }
+
+  }
+
+  if (found) goto cleanup;
+
+  for (j = 0; afl_environment_variables[j] != NULL; ++j) {
+
+    char * afl_env = afl_environment_variables[j] + 4;
+    size_t afl_env_len = strlen(afl_env);
+    char * reduced = ck_alloc(afl_env_len + 1);
+
+    size_t start = 0;
+    while (start < afl_env_len) {
+
+      size_t end = start + strcspn(afl_env + start, "_") + 1;
+      memcpy(reduced, afl_env, start);
+      if (end < afl_env_len)
+        memcpy(reduced + start, afl_env + end, afl_env_len - end);
+      reduced[afl_env_len - end + start] = 0;
+
+      int distance = string_distance_levenshtein(reduced, env_name);
+      if (distance < ENV_SIMILARITY_TRESHOLD && seen[j] == 0) {
+
+        SAYF("Did you mean %s?\n", afl_environment_variables[j]);
+        seen[j] = 1;
+        found = 1;
+
+      }
+
+      start = end;
+
+    };
+
+    ck_free(reduced);
+
+  }
+
+  if (found) goto cleanup;
+
+  char * reduced = ck_alloc(env_name_len + 1);
+  size_t start = 0;
+  while (start < env_name_len) {
+
+    size_t end = start + strcspn(env_name + start, "_") + 1;
+    memcpy(reduced, env_name, start);
+    if (end < env_name_len)
+      memcpy(reduced + start, env_name + end, env_name_len - end);
+    reduced[env_name_len - end + start] = 0;
+
+    for (j = 0; afl_environment_variables[j] != NULL; ++j) {
+
+      int distance = string_distance_levenshtein(
+          afl_environment_variables[j] + 4, reduced);
+      if (distance < ENV_SIMILARITY_TRESHOLD && seen[j] == 0) {
+
+        SAYF("Did you mean %s?\n", afl_environment_variables[j]);
+        seen[j] = 1;
+
+      }
+
+    }
+
+    start = end;
+
+  };
+
+  ck_free(reduced);
+
+cleanup:
+  ck_free(env_name);
+  ck_free(seen);
+
+}
+
 void check_environment_vars(char **envp) {
 
   if (be_quiet) { return; }
 
   int   index = 0, issue_detected = 0;
-  char *env, *val;
+  char *env, *val, *ignore = getenv("AFL_IGNORE_UNKNOWN_ENVS");
   while ((env = envp[index++]) != NULL) {
 
     if (strncmp(env, "ALF_", 4) == 0 || strncmp(env, "_ALF", 4) == 0 ||
@@ -543,6 +538,7 @@ void check_environment_vars(char **envp) {
             env[strlen(afl_environment_variables[i])] == '=') {
 
           match = 1;
+
           if ((val = getenv(afl_environment_variables[i])) && !*val) {
 
             WARNF(
@@ -582,11 +578,13 @@ void check_environment_vars(char **envp) {
 
       }
 
-      if (match == 0) {
+      if (match == 0 && !ignore) {
 
         WARNF("Mistyped AFL environment variable: %s", env);
         issue_detected = 1;
 
+        print_suggested_envs(env);
+
       }
 
     }
@@ -615,6 +613,98 @@ char *get_afl_env(char *env) {
 
 }
 
+bool extract_and_set_env(u8 *env_str) {
+
+  if (!env_str) { return false; }
+
+  bool ret = false;  // return false by default
+
+  u8 *p = ck_strdup(env_str);
+  u8 *end = p + strlen((char *)p);
+  u8 *rest = p;
+
+  u8 closing_sym = ' ';
+  u8 c;
+
+  size_t num_pairs = 0;
+
+  while (rest < end) {
+
+    while (*rest == ' ') {
+
+      rest++;
+
+    }
+
+    if (rest + 1 >= end) break;
+
+    u8 *key = rest;
+    // env variable names may not start with numbers or '='
+    if (*key == '=' || (*key >= '0' && *key <= '9')) { goto free_and_return; }
+
+    while (rest < end && *rest != '=' && *rest != ' ') {
+
+      c = *rest;
+      // lowercase is bad but we may still allow it
+      if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
+          (c < '0' || c > '9') && c != '_') {
+
+        goto free_and_return;
+
+      }
+
+      rest++;
+
+    }
+
+    if (*rest != '=') { goto free_and_return; }
+
+    *rest = '\0';  // done with variable name
+
+    rest += 1;
+    if (rest >= end || *rest == ' ') { goto free_and_return; }
+
+    u8 *val = rest;
+    if (*val == '\'' || *val == '"') {
+
+      closing_sym = *val;
+      val += 1;
+      rest += 1;
+      if (rest >= end) { goto free_and_return; }
+
+    } else {
+
+      closing_sym = ' ';
+
+    }
+
+    while (rest < end && *rest != closing_sym) {
+
+      rest++;
+
+    }
+
+    if (closing_sym != ' ' && *rest != closing_sym) { goto free_and_return; }
+
+    *rest = '\0';  // done with variable value
+
+    rest += 1;
+    if (rest < end && *rest != ' ') { goto free_and_return; }
+
+    num_pairs++;
+
+    setenv(key, val, 1);
+
+  }
+
+  if (num_pairs) { ret = true; }
+
+free_and_return:
+  ck_free(p);
+  return ret;
+
+}
+
 /* Read mask bitmap from file. This is for the -B option. */
 
 void read_bitmap(u8 *fname, u8 *map, size_t len) {
@@ -981,7 +1071,7 @@ u8 *u_stringify_time_diff(u8 *buf, u64 cur_ms, u64 event_ms) {
 /* Reads the map size from ENV */
 u32 get_map_size(void) {
 
-  uint32_t map_size = (MAP_SIZE << 2);  // needed for target ctors :(
+  uint32_t map_size = DEFAULT_SHMEM_SIZE;
   char *   ptr;
 
   if ((ptr = getenv("AFL_MAP_SIZE")) || (ptr = getenv("AFL_MAPSIZE"))) {
@@ -989,12 +1079,12 @@ u32 get_map_size(void) {
     map_size = atoi(ptr);
     if (!map_size || map_size > (1 << 29)) {
 
-      FATAL("illegal AFL_MAP_SIZE %u, must be between %u and %u", map_size, 32U,
+      FATAL("illegal AFL_MAP_SIZE %u, must be between %u and %u", map_size, 64U,
             1U << 29);
 
     }
 
-    if (map_size % 32) { map_size = (((map_size >> 5) + 1) << 5); }
+    if (map_size % 64) { map_size = (((map_size >> 6) + 1) << 6); }
 
   }
 
@@ -1009,7 +1099,7 @@ FILE *create_ffile(u8 *fn) {
   s32   fd;
   FILE *f;
 
-  fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+  fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION);
 
   if (fd < 0) { PFATAL("Unable to create '%s'", fn); }
 
@@ -1027,7 +1117,7 @@ s32 create_file(u8 *fn) {
 
   s32 fd;
 
-  fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+  fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION);
 
   if (fd < 0) { PFATAL("Unable to create '%s'", fn); }
 
diff --git a/src/afl-forkserver.c b/src/afl-forkserver.c
index 4e4f92d6..979d7e9e 100644
--- a/src/afl-forkserver.c
+++ b/src/afl-forkserver.c
@@ -499,27 +499,28 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
     /* This should improve performance a bit, since it stops the linker from
        doing extra work post-fork(). */
 
-    if (!getenv("LD_BIND_LAZY")) { setenv("LD_BIND_NOW", "1", 0); }
+    if (!getenv("LD_BIND_LAZY")) { setenv("LD_BIND_NOW", "1", 1); }
 
     /* Set sane defaults for ASAN if nothing else specified. */
 
-    if (fsrv->debug == true && !getenv("ASAN_OPTIONS"))
+    if (!getenv("ASAN_OPTIONS"))
       setenv("ASAN_OPTIONS",
              "abort_on_error=1:"
              "detect_leaks=0:"
              "malloc_context_size=0:"
              "symbolize=0:"
              "allocator_may_return_null=1:"
+             "detect_odr_violation=0:"
              "handle_segv=0:"
              "handle_sigbus=0:"
              "handle_abort=0:"
              "handle_sigfpe=0:"
              "handle_sigill=0",
-             0);
+             1);
 
     /* Set sane defaults for UBSAN if nothing else specified. */
 
-    if (fsrv->debug == true && !getenv("UBSAN_OPTIONS"))
+    if (!getenv("UBSAN_OPTIONS"))
       setenv("UBSAN_OPTIONS",
              "halt_on_error=1:"
              "abort_on_error=1:"
@@ -531,7 +532,7 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
              "handle_abort=0:"
              "handle_sigfpe=0:"
              "handle_sigill=0",
-             0);
+             1);
 
     /* Envs for QASan */
     setenv("QASAN_MAX_CALL_STACK", "0", 0);
@@ -540,7 +541,7 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
     /* MSAN is tricky, because it doesn't support abort_on_error=1 at this
        point. So, we do this in a very hacky way. */
 
-    if (fsrv->debug == true && !getenv("MSAN_OPTIONS"))
+    if (!getenv("MSAN_OPTIONS"))
       setenv("MSAN_OPTIONS",
            "exit_code=" STRINGIFY(MSAN_ERROR) ":"
            "symbolize=0:"
@@ -553,7 +554,7 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
            "handle_abort=0:"
            "handle_sigfpe=0:"
            "handle_sigill=0",
-           0);
+           1);
 
     fsrv->init_child_func(fsrv, argv);
 
@@ -674,11 +675,11 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
 
         if (!fsrv->map_size) { fsrv->map_size = MAP_SIZE; }
 
-        if (unlikely(tmp_map_size % 32)) {
+        if (unlikely(tmp_map_size % 64)) {
 
           // should not happen
           WARNF("Target reported non-aligned map size of %u", tmp_map_size);
-          tmp_map_size = (((tmp_map_size + 31) >> 5) << 5);
+          tmp_map_size = (((tmp_map_size + 63) >> 6) << 6);
 
         }
 
@@ -826,7 +827,7 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
            "have a\n"
            "    restrictive memory limit configured, this is expected; please "
            "read\n"
-           "    %s/notes_for_asan.md for help.\n",
+           "    %s/notes_for_asan.md for help and run with '-m 0'.\n",
            doc_path);
 
     } else if (!fsrv->mem_limit) {
@@ -834,18 +835,21 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
       SAYF("\n" cLRD "[-] " cRST
            "Whoops, the target binary crashed suddenly, "
            "before receiving any input\n"
-           "    from the fuzzer! There are several probable explanations:\n\n"
-
-           "    - The target binary requires a large map and crashes before "
-           "reporting.\n"
-           "      Set a high value (e.g. AFL_MAP_SIZE=1024000) or use "
-           "AFL_DEBUG=1 to see the\n"
-           "      message from the target binary\n\n"
-
-           "    - The binary is just buggy and explodes entirely on its own. "
-           "If so, you\n"
-           "      need to fix the underlying problem or find a better "
-           "replacement.\n\n"
+           "    from the fuzzer! You can try the following:\n\n"
+
+           "    - The target binary crashes because necessary runtime "
+           "conditions it needs\n"
+           "      are not met. Try to:\n"
+           "      1. Run again with AFL_DEBUG=1 set and check the output of "
+           "the target\n"
+           "         binary for clues.\n"
+           "      2. Run again with AFL_DEBUG=1 and 'ulimit -c unlimited' and "
+           "analyze the\n"
+           "         generated core dump.\n\n"
+
+           "    - Possibly the target requires a huge coverage map and has "
+           "CTORS.\n"
+           "      Retry with setting AFL_MAP_SIZE=10000000.\n\n"
 
            MSG_FORK_ON_APPLE
 
@@ -861,13 +865,17 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
       SAYF("\n" cLRD "[-] " cRST
            "Whoops, the target binary crashed suddenly, "
            "before receiving any input\n"
-           "    from the fuzzer! There are several probable explanations:\n\n"
-
-           "    - The target binary requires a large map and crashes before "
-           "reporting.\n"
-           "      Set a high value (e.g. AFL_MAP_SIZE=1024000) or use "
-           "AFL_DEBUG=1 to see the\n"
-           "      message from the target binary\n\n"
+           "    from the fuzzer! You can try the following:\n\n"
+
+           "    - The target binary crashes because necessary runtime "
+           "conditions it needs\n"
+           "      are not met. Try to:\n"
+           "      1. Run again with AFL_DEBUG=1 set and check the output of "
+           "the target\n"
+           "         binary for clues.\n"
+           "      2. Run again with AFL_DEBUG=1 and 'ulimit -c unlimited' and "
+           "analyze the\n"
+           "         generated core dump.\n\n"
 
            "    - The current memory limit (%s) is too restrictive, causing "
            "the\n"
@@ -885,13 +893,12 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
            "      estimate the required amount of virtual memory for the "
            "binary.\n\n"
 
-           "    - The binary is just buggy and explodes entirely on its own. "
-           "If so, you\n"
-           "      need to fix the underlying problem or find a better "
-           "replacement.\n\n"
-
            MSG_FORK_ON_APPLE
 
+           "    - Possibly the target requires a huge coverage map and has "
+           "CTORS.\n"
+           "      Retry with setting AFL_MAP_SIZE=10000000.\n\n"
+
            "    - Less likely, there is a horrible bug in the fuzzer. If other "
            "options\n"
            "      fail, poke <afl-users@googlegroups.com> for troubleshooting "
@@ -920,16 +927,30 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
          "with ASAN and\n"
          "    you have a restrictive memory limit configured, this is "
          "expected; please\n"
-         "    read %s/notes_for_asan.md for help.\n",
+         "    read %s/notes_for_asan.md for help and run with '-m 0'.\n",
          doc_path);
 
   } else if (!fsrv->mem_limit) {
 
     SAYF("\n" cLRD "[-] " cRST
-         "Hmm, looks like the target binary terminated before we could"
-         " complete a handshake with the injected code.\n"
-         "If the target was compiled with afl-clang-lto and AFL_LLVM_MAP_ADDR"
-         " then recompiling without this parameter.\n"
+         "Hmm, looks like the target binary terminated before we could complete"
+         " a\n"
+         "handshake with the injected code. You can try the following:\n\n"
+
+         "    - The target binary crashes because necessary runtime conditions "
+         "it needs\n"
+         "      are not met. Try to:\n"
+         "      1. Run again with AFL_DEBUG=1 set and check the output of the "
+         "target\n"
+         "         binary for clues.\n"
+         "      2. Run again with AFL_DEBUG=1 and 'ulimit -c unlimited' and "
+         "analyze the\n"
+         "         generated core dump.\n\n"
+
+         "    - Possibly the target requires a huge coverage map and has "
+         "CTORS.\n"
+         "      Retry with setting AFL_MAP_SIZE=10000000.\n\n"
+
          "Otherwise there is a horrible bug in the fuzzer.\n"
          "Poke <afl-users@googlegroups.com> for troubleshooting tips.\n");
 
@@ -941,10 +962,24 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
         "\n" cLRD "[-] " cRST
         "Hmm, looks like the target binary terminated "
         "before we could complete a\n"
-        "    handshake with the injected code. There are %s probable "
-        "explanations:\n\n"
+        "    handshake with the injected code. You can try the following:\n\n"
 
         "%s"
+
+        "    - The target binary crashes because necessary runtime conditions "
+        "it needs\n"
+        "      are not met. Try to:\n"
+        "      1. Run again with AFL_DEBUG=1 set and check the output of the "
+        "target\n"
+        "         binary for clues.\n"
+        "      2. Run again with AFL_DEBUG=1 and 'ulimit -c unlimited' and "
+        "analyze the\n"
+        "         generated core dump.\n\n"
+
+        "    - Possibly the target requires a huge coverage map and has "
+        "CTORS.\n"
+        "      Retry with setting AFL_MAP_SIZE=10000000.\n\n"
+
         "    - The current memory limit (%s) is too restrictive, causing an "
         "OOM\n"
         "      fault in the dynamic linker. This can be fixed with the -m "
@@ -968,7 +1003,6 @@ void afl_fsrv_start(afl_forkserver_t *fsrv, char **argv,
         "options\n"
         "      fail, poke <afl-users@googlegroups.com> for troubleshooting "
         "tips.\n",
-        getenv(DEFER_ENV_VAR) ? "three" : "two",
         getenv(DEFER_ENV_VAR)
             ? "    - You are using deferred forkserver, but __AFL_INIT() is "
               "never\n"
@@ -1073,12 +1107,14 @@ void afl_fsrv_write_to_testcase(afl_forkserver_t *fsrv, u8 *buf, size_t len) {
 
       if (unlikely(fsrv->no_unlink)) {
 
-        fd = open(fsrv->out_file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+        fd = open(fsrv->out_file, O_WRONLY | O_CREAT | O_TRUNC,
+                  DEFAULT_PERMISSION);
 
       } else {
 
         unlink(fsrv->out_file);                           /* Ignore errors. */
-        fd = open(fsrv->out_file, O_WRONLY | O_CREAT | O_EXCL, 0600);
+        fd = open(fsrv->out_file, O_WRONLY | O_CREAT | O_EXCL,
+                  DEFAULT_PERMISSION);
 
       }
 
diff --git a/src/afl-fuzz-bitmap.c b/src/afl-fuzz-bitmap.c
index 0c4a114e..3d0228db 100644
--- a/src/afl-fuzz-bitmap.c
+++ b/src/afl-fuzz-bitmap.c
@@ -42,7 +42,7 @@ void write_bitmap(afl_state_t *afl) {
   afl->bitmap_changed = 0;
 
   snprintf(fname, PATH_MAX, "%s/fuzz_bitmap", afl->out_dir);
-  fd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+  fd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION);
 
   if (fd < 0) { PFATAL("Unable to open '%s'", fname); }
 
@@ -325,7 +325,8 @@ u8 *describe_op(afl_state_t *afl, u8 new_bits, size_t max_description_len) {
 
     }
 
-    sprintf(ret + strlen(ret), ",time:%llu", get_cur_time() - afl->start_time);
+    sprintf(ret + strlen(ret), ",time:%llu",
+            get_cur_time() + afl->prev_run_time - afl->start_time);
 
     if (afl->current_custom_fuzz &&
         afl->current_custom_fuzz->afl_custom_describe) {
@@ -406,7 +407,7 @@ static void write_crash_readme(afl_state_t *afl) {
 
   sprintf(fn, "%s/crashes/README.txt", afl->out_dir);
 
-  fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
+  fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
 
   /* Do not die on errors here - that would be impolite. */
 
@@ -508,7 +509,7 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
         alloc_printf("%s/queue/id_%06u", afl->out_dir, afl->queued_paths);
 
 #endif                                                    /* ^!SIMPLE_FILES */
-    fd = open(queue_fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
+    fd = open(queue_fn, O_WRONLY | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
     if (unlikely(fd < 0)) { PFATAL("Unable to create '%s'", queue_fn); }
     ck_write(fd, mem, len, queue_fn);
     close(fd);
@@ -782,7 +783,7 @@ save_if_interesting(afl_state_t *afl, void *mem, u32 len, u8 fault) {
   /* If we're here, we apparently want to save the crash or hang
      test case, too. */
 
-  fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
+  fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
   if (unlikely(fd < 0)) { PFATAL("Unable to create '%s'", fn); }
   ck_write(fd, mem, len, fn);
   close(fd);
diff --git a/src/afl-fuzz-extras.c b/src/afl-fuzz-extras.c
index 7ecad233..6091db15 100644
--- a/src/afl-fuzz-extras.c
+++ b/src/afl-fuzz-extras.c
@@ -387,6 +387,130 @@ static inline u8 memcmp_nocase(u8 *m1, u8 *m2, u32 len) {
 
 }
 
+/* add an extra/dict/token - no checks performed, no sorting */
+
+static void add_extra_nocheck(afl_state_t *afl, u8 *mem, u32 len) {
+
+  afl->extras = afl_realloc((void **)&afl->extras,
+                            (afl->extras_cnt + 1) * sizeof(struct extra_data));
+
+  if (unlikely(!afl->extras)) { PFATAL("alloc"); }
+
+  afl->extras[afl->extras_cnt].data = ck_alloc(len);
+  afl->extras[afl->extras_cnt].len = len;
+  memcpy(afl->extras[afl->extras_cnt].data, mem, len);
+  afl->extras_cnt++;
+
+  /* We only want to print this once */
+
+  if (afl->extras_cnt == afl->max_det_extras + 1) {
+
+    WARNF("More than %u tokens - will use them probabilistically.",
+          afl->max_det_extras);
+
+  }
+
+}
+
+/* Sometimes strings in input is transformed to unicode internally, so for
+   fuzzing we should attempt to de-unicode if it looks like simple unicode */
+
+void deunicode_extras(afl_state_t *afl) {
+
+  if (!afl->extras_cnt) return;
+
+  u32 i, j, orig_cnt = afl->extras_cnt;
+  u8  buf[64];
+
+  for (i = 0; i < orig_cnt; ++i) {
+
+    if (afl->extras[i].len < 6 || afl->extras[i].len > 64 ||
+        afl->extras[i].len % 2) {
+
+      continue;
+
+    }
+
+    u32 k = 0, z1 = 0, z2 = 0, z3 = 0, z4 = 0, half = afl->extras[i].len >> 1;
+    u32 quarter = half >> 1;
+
+    for (j = 0; j < afl->extras[i].len; ++j) {
+
+      switch (j % 4) {
+
+        case 2:
+          if (!afl->extras[i].data[j]) { ++z3; }
+          // fall through
+        case 0:
+          if (!afl->extras[i].data[j]) { ++z1; }
+          break;
+        case 3:
+          if (!afl->extras[i].data[j]) { ++z4; }
+          // fall through
+        case 1:
+          if (!afl->extras[i].data[j]) { ++z2; }
+          break;
+
+      }
+
+    }
+
+    if ((z1 < half && z2 < half) || z1 + z2 == afl->extras[i].len) { continue; }
+
+    // also maybe 32 bit unicode?
+    if (afl->extras[i].len % 4 == 0 && afl->extras[i].len >= 12 &&
+        (z3 == quarter || z4 == quarter) && z1 + z2 == quarter * 3) {
+
+      for (j = 0; j < afl->extras[i].len; ++j) {
+
+        if (z4 < quarter) {
+
+          if (j % 4 == 3) { buf[k++] = afl->extras[i].data[j]; }
+
+        } else if (z3 < quarter) {
+
+          if (j % 4 == 2) { buf[k++] = afl->extras[i].data[j]; }
+
+        } else if (z2 < half) {
+
+          if (j % 4 == 1) { buf[k++] = afl->extras[i].data[j]; }
+
+        } else {
+
+          if (j % 4 == 0) { buf[k++] = afl->extras[i].data[j]; }
+
+        }
+
+      }
+
+      add_extra_nocheck(afl, buf, k);
+      k = 0;
+
+    }
+
+    for (j = 0; j < afl->extras[i].len; ++j) {
+
+      if (z1 < half) {
+
+        if (j % 2 == 0) { buf[k++] = afl->extras[i].data[j]; }
+
+      } else {
+
+        if (j % 2 == 1) { buf[k++] = afl->extras[i].data[j]; }
+
+      }
+
+    }
+
+    add_extra_nocheck(afl, buf, k);
+
+  }
+
+  qsort(afl->extras, afl->extras_cnt, sizeof(struct extra_data),
+        compare_extras_len);
+
+}
+
 /* Removes duplicates from the loaded extras. This can happen if multiple files
    are loaded */
 
@@ -396,9 +520,9 @@ void dedup_extras(afl_state_t *afl) {
 
   u32 i, j, orig_cnt = afl->extras_cnt;
 
-  for (i = 0; i < afl->extras_cnt - 1; i++) {
+  for (i = 0; i < afl->extras_cnt - 1; ++i) {
 
-    for (j = i + 1; j < afl->extras_cnt; j++) {
+    for (j = i + 1; j < afl->extras_cnt; ++j) {
 
     restart_dedup:
 
@@ -462,30 +586,11 @@ void add_extra(afl_state_t *afl, u8 *mem, u32 len) {
 
   }
 
-  afl->extras = afl_realloc((void **)&afl->extras,
-                            (afl->extras_cnt + 1) * sizeof(struct extra_data));
-
-  if (unlikely(!afl->extras)) { PFATAL("alloc"); }
-
-  afl->extras[afl->extras_cnt].data = ck_alloc(len);
-  afl->extras[afl->extras_cnt].len = len;
-
-  memcpy(afl->extras[afl->extras_cnt].data, mem, len);
-
-  afl->extras_cnt++;
+  add_extra_nocheck(afl, mem, len);
 
   qsort(afl->extras, afl->extras_cnt, sizeof(struct extra_data),
         compare_extras_len);
 
-  /* We only want to print this once */
-
-  if (afl->extras_cnt == afl->max_det_extras + 1) {
-
-    WARNF("More than %u tokens - will use them probabilistically.",
-          afl->max_det_extras);
-
-  }
-
 }
 
 /* Maybe add automatic extra. */
@@ -626,7 +731,7 @@ void save_auto(afl_state_t *afl) {
         alloc_printf("%s/queue/.state/auto_extras/auto_%06u", afl->out_dir, i);
     s32 fd;
 
-    fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+    fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION);
 
     if (fd < 0) { PFATAL("Unable to create '%s'", fn); }
 
diff --git a/src/afl-fuzz-init.c b/src/afl-fuzz-init.c
index 40ba20c7..70a49a6b 100644
--- a/src/afl-fuzz-init.c
+++ b/src/afl-fuzz-init.c
@@ -152,7 +152,8 @@ void bind_to_free_cpu(afl_state_t *afl) {
 
     do {
 
-      if ((lockfd = open(lockfile, O_RDWR | O_CREAT | O_EXCL, 0600)) < 0) {
+      if ((lockfd = open(lockfile, O_RDWR | O_CREAT | O_EXCL,
+                         DEFAULT_PERMISSION)) < 0) {
 
         if (first) {
 
@@ -828,7 +829,7 @@ void perform_dry_run(afl_state_t *afl) {
   for (idx = 0; idx < afl->queued_paths; idx++) {
 
     q = afl->queue_buf[idx];
-    if (unlikely(q->disabled)) { continue; }
+    if (unlikely(!q || q->disabled)) { continue; }
 
     u8  res;
     s32 fd;
@@ -882,32 +883,23 @@ void perform_dry_run(afl_state_t *afl) {
 
         if (afl->timeout_given) {
 
-          /* The -t nn+ syntax in the command line sets afl->timeout_given to
-             '2' and instructs afl-fuzz to tolerate but skip queue entries that
-             time out. */
+          /* if we have a timeout but a timeout value was given then always
+             skip. The '+' meaning has been changed! */
+          WARNF("Test case results in a timeout (skipping)");
+          ++cal_failures;
+          q->cal_failed = CAL_CHANCES;
+          q->disabled = 1;
+          q->perf_score = 0;
 
-          if (afl->timeout_given > 1) {
+          if (!q->was_fuzzed) {
 
-            WARNF("Test case results in a timeout (skipping)");
-            q->cal_failed = CAL_CHANCES;
-            ++cal_failures;
-            break;
+            q->was_fuzzed = 1;
+            --afl->pending_not_fuzzed;
+            --afl->active_paths;
 
           }
 
-          SAYF("\n" cLRD "[-] " cRST
-               "The program took more than %u ms to process one of the initial "
-               "test cases.\n"
-               "    Usually, the right thing to do is to relax the -t option - "
-               "or to delete it\n"
-               "    altogether and allow the fuzzer to auto-calibrate. That "
-               "said, if you know\n"
-               "    what you are doing and want to simply skip the unruly test "
-               "cases, append\n"
-               "    '+' at the end of the value passed to -t ('-t %u+').\n",
-               afl->fsrv.exec_tmout, afl->fsrv.exec_tmout);
-
-          FATAL("Test case '%s' results in a timeout", fn);
+          break;
 
         } else {
 
@@ -1060,16 +1052,25 @@ void perform_dry_run(afl_state_t *afl) {
         p->perf_score = 0;
 
         u32 i = 0;
-        while (unlikely(afl->queue_buf[i]->disabled)) {
+        while (unlikely(i < afl->queued_paths && afl->queue_buf[i] &&
+                        afl->queue_buf[i]->disabled)) {
 
           ++i;
 
         }
 
-        afl->queue = afl->queue_buf[i];
+        if (i < afl->queued_paths && afl->queue_buf[i]) {
+
+          afl->queue = afl->queue_buf[i];
+
+        } else {
+
+          afl->queue = afl->queue_buf[0];
+
+        }
 
         afl->max_depth = 0;
-        for (i = 0; i < afl->queued_paths; i++) {
+        for (i = 0; i < afl->queued_paths && likely(afl->queue_buf[i]); i++) {
 
           if (!afl->queue_buf[i]->disabled &&
               afl->queue_buf[i]->depth > afl->max_depth)
@@ -1136,10 +1137,11 @@ void perform_dry_run(afl_state_t *afl) {
   for (idx = 0; idx < afl->queued_paths; idx++) {
 
     q = afl->queue_buf[idx];
-    if (q->disabled || q->cal_failed || !q->exec_cksum) { continue; }
+    if (!q || q->disabled || q->cal_failed || !q->exec_cksum) { continue; }
 
     u32 done = 0;
-    for (i = idx + 1; i < afl->queued_paths && !done; i++) {
+    for (i = idx + 1;
+         i < afl->queued_paths && !done && likely(afl->queue_buf[i]); i++) {
 
       struct queue_entry *p = afl->queue_buf[i];
       if (p->disabled || p->cal_failed || !p->exec_cksum) { continue; }
@@ -1191,7 +1193,7 @@ void perform_dry_run(afl_state_t *afl) {
 
     for (idx = 0; idx < afl->queued_paths; idx++) {
 
-      if (!afl->queue_buf[idx]->disabled &&
+      if (afl->queue_buf[idx] && !afl->queue_buf[idx]->disabled &&
           afl->queue_buf[idx]->depth > afl->max_depth)
         afl->max_depth = afl->queue_buf[idx]->depth;
 
@@ -1218,7 +1220,7 @@ static void link_or_copy(u8 *old_path, u8 *new_path) {
   sfd = open(old_path, O_RDONLY);
   if (sfd < 0) { PFATAL("Unable to open '%s'", old_path); }
 
-  dfd = open(new_path, O_WRONLY | O_CREAT | O_EXCL, 0600);
+  dfd = open(new_path, O_WRONLY | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
   if (dfd < 0) { PFATAL("Unable to create '%s'", new_path); }
 
   tmp = ck_alloc(64 * 1024);
@@ -1242,12 +1244,12 @@ static void link_or_copy(u8 *old_path, u8 *new_path) {
 
 void pivot_inputs(afl_state_t *afl) {
 
-  struct queue_entry *q = afl->queue;
+  struct queue_entry *q;
   u32                 id = 0, i;
 
   ACTF("Creating hard links for all input files...");
 
-  for (i = 0; i < afl->queued_paths; i++) {
+  for (i = 0; i < afl->queued_paths && likely(afl->queue_buf[i]); i++) {
 
     q = afl->queue_buf[i];
 
@@ -1811,9 +1813,13 @@ static void handle_existing_out_dir(afl_state_t *afl) {
 
   }
 
-  fn = alloc_printf("%s/plot_data", afl->out_dir);
-  if (unlink(fn) && errno != ENOENT) { goto dir_cleanup_failed; }
-  ck_free(fn);
+  if (!afl->in_place_resume) {
+
+    fn = alloc_printf("%s/plot_data", afl->out_dir);
+    if (unlink(fn) && errno != ENOENT) { goto dir_cleanup_failed; }
+    ck_free(fn);
+
+  }
 
   fn = alloc_printf("%s/cmdline", afl->out_dir);
   if (unlink(fn) && errno != ENOENT) { goto dir_cleanup_failed; }
@@ -2007,17 +2013,35 @@ void setup_dirs_fds(afl_state_t *afl) {
   /* Gnuplot output file. */
 
   tmp = alloc_printf("%s/plot_data", afl->out_dir);
-  int fd = open(tmp, O_WRONLY | O_CREAT | O_EXCL, 0600);
-  if (fd < 0) { PFATAL("Unable to create '%s'", tmp); }
-  ck_free(tmp);
 
-  afl->fsrv.plot_file = fdopen(fd, "w");
-  if (!afl->fsrv.plot_file) { PFATAL("fdopen() failed"); }
+  if (!afl->in_place_resume) {
+
+    int fd = open(tmp, O_WRONLY | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
+    if (fd < 0) { PFATAL("Unable to create '%s'", tmp); }
+    ck_free(tmp);
+
+    afl->fsrv.plot_file = fdopen(fd, "w");
+    if (!afl->fsrv.plot_file) { PFATAL("fdopen() failed"); }
+
+    fprintf(
+        afl->fsrv.plot_file,
+        "# unix_time, cycles_done, cur_path, paths_total, "
+        "pending_total, pending_favs, map_size, unique_crashes, "
+        "unique_hangs, max_depth, execs_per_sec, total_execs, edges_found\n");
+
+  } else {
+
+    int fd = open(tmp, O_WRONLY | O_CREAT, DEFAULT_PERMISSION);
+    if (fd < 0) { PFATAL("Unable to create '%s'", tmp); }
+    ck_free(tmp);
+
+    afl->fsrv.plot_file = fdopen(fd, "w");
+    if (!afl->fsrv.plot_file) { PFATAL("fdopen() failed"); }
+
+    fseek(afl->fsrv.plot_file, 0, SEEK_END);
+
+  }
 
-  fprintf(afl->fsrv.plot_file,
-          "# unix_time, cycles_done, cur_path, paths_total, "
-          "pending_total, pending_favs, map_size, unique_crashes, "
-          "unique_hangs, max_depth, execs_per_sec\n");
   fflush(afl->fsrv.plot_file);
 
   /* ignore errors */
@@ -2034,7 +2058,7 @@ void setup_cmdline_file(afl_state_t *afl, char **argv) {
 
   /* Store the command line to reproduce our findings */
   tmp = alloc_printf("%s/cmdline", afl->out_dir);
-  fd = open(tmp, O_WRONLY | O_CREAT | O_EXCL, 0600);
+  fd = open(tmp, O_WRONLY | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
   if (fd < 0) { PFATAL("Unable to create '%s'", tmp); }
   ck_free(tmp);
 
@@ -2069,7 +2093,8 @@ void setup_stdio_file(afl_state_t *afl) {
 
   unlink(afl->fsrv.out_file);                              /* Ignore errors */
 
-  afl->fsrv.out_fd = open(afl->fsrv.out_file, O_RDWR | O_CREAT | O_EXCL, 0600);
+  afl->fsrv.out_fd =
+      open(afl->fsrv.out_file, O_RDWR | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
 
   if (afl->fsrv.out_fd < 0) {
 
@@ -2457,7 +2482,7 @@ void check_asan_opts(afl_state_t *afl) {
 
     }
 
-    if (!strstr(x, "symbolize=0")) {
+    if (!afl->debug && !strstr(x, "symbolize=0")) {
 
       FATAL("Custom MSAN_OPTIONS set without symbolize=0 - please fix!");
 
@@ -2591,6 +2616,7 @@ void check_binary(afl_state_t *afl, u8 *fname) {
   }
 
   if (afl->afl_env.afl_skip_bin_check || afl->use_wine || afl->unicorn_mode ||
+      (afl->fsrv.qemu_mode && getenv("AFL_QEMU_CUSTOM_BIN")) ||
       afl->non_instrumented_mode) {
 
     return;
diff --git a/src/afl-fuzz-mutators.c b/src/afl-fuzz-mutators.c
index 80df6d08..a47b4f5f 100644
--- a/src/afl-fuzz-mutators.c
+++ b/src/afl-fuzz-mutators.c
@@ -465,7 +465,7 @@ u8 trim_case_custom(afl_state_t *afl, struct queue_entry *q, u8 *in_buf,
 
     unlink(q->fname);                                      /* ignore errors */
 
-    fd = open(q->fname, O_WRONLY | O_CREAT | O_EXCL, 0600);
+    fd = open(q->fname, O_WRONLY | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
 
     if (fd < 0) { PFATAL("Unable to create '%s'", q->fname); }
 
diff --git a/src/afl-fuzz-one.c b/src/afl-fuzz-one.c
index c73e394a..4e8154cd 100644
--- a/src/afl-fuzz-one.c
+++ b/src/afl-fuzz-one.c
@@ -5119,14 +5119,23 @@ pacemaker_fuzzing:
 
       /* Update afl->pending_not_fuzzed count if we made it through the
          calibration cycle and have not seen this entry before. */
+      /*
+        // TODO FIXME: I think we need this plus need an -L -1 check
+        if (!afl->stop_soon && !afl->queue_cur->cal_failed &&
+            (afl->queue_cur->was_fuzzed == 0 || afl->queue_cur->fuzz_level == 0)
+        && !afl->queue_cur->disabled) {
 
-      // if (!afl->stop_soon && !afl->queue_cur->cal_failed &&
-      // !afl->queue_cur->was_fuzzed) {
+          if (!afl->queue_cur->was_fuzzed) {
 
-      //   afl->queue_cur->was_fuzzed = 1;
-      //   --afl->pending_not_fuzzed;
-      //   if (afl->queue_cur->favored) --afl->pending_favored;
-      // }
+            --afl->pending_not_fuzzed;
+            afl->queue_cur->was_fuzzed = 1;
+            if (afl->queue_cur->favored) { --afl->pending_favored; }
+
+          }
+
+        }
+
+      */
 
       orig_in = NULL;
 
diff --git a/src/afl-fuzz-queue.c b/src/afl-fuzz-queue.c
index ad3e3b8e..e5f51a6c 100644
--- a/src/afl-fuzz-queue.c
+++ b/src/afl-fuzz-queue.c
@@ -198,34 +198,35 @@ void create_alias_table(afl_state_t *afl) {
   while (nS)
     afl->alias_probability[S[--nS]] = 1;
 
-#ifdef INTROSPECTION
-  u8 fn[PATH_MAX];
-  snprintf(fn, PATH_MAX, "%s/introspection_corpus.txt", afl->out_dir);
-  FILE *f = fopen(fn, "a");
-  if (f) {
+  /*
+  #ifdef INTROSPECTION
+    u8 fn[PATH_MAX];
+    snprintf(fn, PATH_MAX, "%s/introspection_corpus.txt", afl->out_dir);
+    FILE *f = fopen(fn, "a");
+    if (f) {
+
+      for (i = 0; i < n; i++) {
+
+        struct queue_entry *q = afl->queue_buf[i];
+        fprintf(
+            f,
+            "entry=%u name=%s favored=%s variable=%s disabled=%s len=%u "
+            "exec_us=%u "
+            "bitmap_size=%u bitsmap_size=%u tops=%u weight=%f perf_score=%f\n",
+            i, q->fname, q->favored ? "true" : "false",
+            q->var_behavior ? "true" : "false", q->disabled ? "true" : "false",
+            q->len, (u32)q->exec_us, q->bitmap_size, q->bitsmap_size, q->tc_ref,
+            q->weight, q->perf_score);
 
-    for (i = 0; i < n; i++) {
+      }
 
-      struct queue_entry *q = afl->queue_buf[i];
-      fprintf(
-          f,
-          "entry=%u name=%s favored=%s variable=%s disabled=%s len=%u "
-          "exec_us=%u "
-          "bitmap_size=%u bitsmap_size=%u tops=%u weight=%f perf_score=%f\n",
-          i, q->fname, q->favored ? "true" : "false",
-          q->var_behavior ? "true" : "false", q->disabled ? "true" : "false",
-          q->len, (u32)q->exec_us, q->bitmap_size, q->bitsmap_size, q->tc_ref,
-          q->weight, q->perf_score);
+      fprintf(f, "\n");
+      fclose(f);
 
     }
 
-    fprintf(f, "\n");
-    fclose(f);
-
-  }
-
-#endif
-
+  #endif
+  */
   /*
   fprintf(stderr, "  entry  alias  probability  perf_score   weight
   filename\n"); for (u32 i = 0; i < n; ++i) fprintf(stderr, "  %5u  %5u  %11u
@@ -248,7 +249,7 @@ void mark_as_det_done(afl_state_t *afl, struct queue_entry *q) {
   snprintf(fn, PATH_MAX, "%s/queue/.state/deterministic_done/%s", afl->out_dir,
            strrchr(q->fname, '/') + 1);
 
-  fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
+  fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
   if (fd < 0) { PFATAL("Unable to create '%s'", fn); }
   close(fd);
 
@@ -271,7 +272,7 @@ void mark_as_variable(afl_state_t *afl, struct queue_entry *q) {
 
   if (symlink(ldest, fn)) {
 
-    s32 fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
+    s32 fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
     if (fd < 0) { PFATAL("Unable to create '%s'", fn); }
     close(fd);
 
@@ -299,7 +300,7 @@ void mark_as_redundant(afl_state_t *afl, struct queue_entry *q, u8 state) {
 
     s32 fd;
 
-    fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
+    fd = open(fn, O_WRONLY | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
     if (fd < 0) { PFATAL("Unable to create '%s'", fn); }
     close(fd);
 
@@ -324,7 +325,7 @@ static u8 check_if_text(afl_state_t *afl, struct queue_entry *q) {
 
   if (len >= MAX_FILE) len = MAX_FILE - 1;
   if ((fd = open(q->fname, O_RDONLY)) < 0) return 0;
-  buf = afl_realloc(AFL_BUF_PARAM(in_scratch), len);
+  buf = afl_realloc(AFL_BUF_PARAM(in_scratch), len + 1);
   comp = read(fd, buf, len);
   close(fd);
   if (comp != (ssize_t)len) return 0;
@@ -679,13 +680,17 @@ void cull_queue(afl_state_t *afl) {
 
       }
 
-      afl->top_rated[i]->favored = 1;
-      ++afl->queued_favored;
+      if (!afl->top_rated[i]->favored) {
 
-      if (afl->top_rated[i]->fuzz_level == 0 ||
-          !afl->top_rated[i]->was_fuzzed) {
+        afl->top_rated[i]->favored = 1;
+        ++afl->queued_favored;
 
-        ++afl->pending_favored;
+        if (afl->top_rated[i]->fuzz_level == 0 ||
+            !afl->top_rated[i]->was_fuzzed) {
+
+          ++afl->pending_favored;
+
+        }
 
       }
 
diff --git a/src/afl-fuzz-redqueen.c b/src/afl-fuzz-redqueen.c
index f619a6d3..9bfbf95b 100644
--- a/src/afl-fuzz-redqueen.c
+++ b/src/afl-fuzz-redqueen.c
@@ -30,8 +30,6 @@
 
 //#define _DEBUG
 //#define CMPLOG_INTROSPECTION
-#define COMBINE
-#define ARITHMETIC_LESSER_GREATER
 
 // CMP attribute enum
 enum {
@@ -206,14 +204,31 @@ static void type_replace(afl_state_t *afl, u8 *buf, u32 len) {
         case '\t':
           c = ' ';
           break;
-          /*
-                case '\r':
-                case '\n':
-                  // nothing ...
-                  break;
-          */
+        case '\r':
+          c = '\n';
+          break;
+        case '\n':
+          c = '\r';
+          break;
+        case 0:
+          c = 1;
+          break;
+        case 1:
+          c = 0;
+          break;
+        case 0xff:
+          c = 0;
+          break;
         default:
-          c = (buf[i] ^ 0xff);
+          if (buf[i] < 32) {
+
+            c = (buf[i] ^ 0x1f);
+
+          } else {
+
+            c = (buf[i] ^ 0x7f);  // we keep the highest bit
+
+          }
 
       }
 
@@ -383,6 +398,7 @@ static u8 colorization(afl_state_t *afl, u8 *buf, u32 len,
     rng = ranges;
     ranges = rng->next;
     ck_free(rng);
+    rng = NULL;
 
   }
 
@@ -421,8 +437,9 @@ static u8 colorization(afl_state_t *afl, u8 *buf, u32 len,
 
   if (taint) {
 
-    if (len / positions == 1 && positions > CMPLOG_POSITIONS_MAX &&
-        afl->active_paths / afl->colorize_success > CMPLOG_CORPUS_PERCENT) {
+    if (afl->colorize_success &&
+        (len / positions == 1 && positions > CMPLOG_POSITIONS_MAX &&
+         afl->active_paths / afl->colorize_success > CMPLOG_CORPUS_PERCENT)) {
 
 #ifdef _DEBUG
       fprintf(stderr, "Colorization unsatisfactory\n");
@@ -456,6 +473,15 @@ static u8 colorization(afl_state_t *afl, u8 *buf, u32 len,
   return 0;
 
 checksum_fail:
+  while (ranges) {
+
+    rng = ranges;
+    ranges = rng->next;
+    ck_free(rng);
+    rng = NULL;
+
+  }
+
   ck_free(backup);
   ck_free(changed);
 
@@ -496,7 +522,7 @@ static u8 its_fuzz(afl_state_t *afl, u8 *buf, u32 len, u8 *status) {
 
 }
 
-#ifdef CMPLOG_TRANSFORM
+//#ifdef CMPLOG_SOLVE_TRANSFORM
 static int strntoll(const char *str, size_t sz, char **end, int base,
                     long long *out) {
 
@@ -504,6 +530,8 @@ static int strntoll(const char *str, size_t sz, char **end, int base,
   long long   ret;
   const char *beg = str;
 
+  if (!str || !sz) { return 1; }
+
   for (; beg && sz && *beg == ' '; beg++, sz--) {};
 
   if (!sz) return 1;
@@ -527,6 +555,8 @@ static int strntoull(const char *str, size_t sz, char **end, int base,
   unsigned long long ret;
   const char *       beg = str;
 
+  if (!str || !sz) { return 1; }
+
   for (; beg && sz && *beg == ' '; beg++, sz--)
     ;
 
@@ -577,7 +607,7 @@ static int is_hex(const char *str) {
 
 }
 
-  #ifdef CMPLOG_TRANSFORM_BASE64
+#ifdef CMPLOG_SOLVE_TRANSFORM_BASE64
 // tests 4 bytes at location
 static int is_base64(const char *str) {
 
@@ -690,10 +720,10 @@ static void to_base64(u8 *src, u8 *dst, u32 dst_len) {
 
 }
 
-  #endif
-
 #endif
 
+//#endif
+
 static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
                               u64 pattern, u64 repl, u64 o_pattern,
                               u64 changed_val, u8 attr, u32 idx, u32 taint_len,
@@ -717,9 +747,9 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
   //         o_pattern, pattern, repl, changed_val, idx, taint_len,
   //         h->shape + 1, attr);
 
-#ifdef CMPLOG_TRANSFORM
+  //#ifdef CMPLOG_SOLVE_TRANSFORM
   // reverse atoi()/strnu?toll() is expensive, so we only to it in lvl 3
-  if (lvl & LVL3) {
+  if (afl->cmplog_enable_transform && (lvl & LVL3)) {
 
     u8 *               endptr;
     u8                 use_num = 0, use_unum = 0;
@@ -740,11 +770,11 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
 
     }
 
-  #ifdef _DEBUG
+#ifdef _DEBUG
     if (idx == 0)
       fprintf(stderr, "ASCII is=%u use_num=%u use_unum=%u idx=%u %llx==%llx\n",
               afl->queue_cur->is_ascii, use_num, use_unum, idx, num, pattern);
-  #endif
+#endif
 
     // num is likely not pattern as atoi("AAA") will be zero...
     if (use_num && ((u64)num == pattern || !num)) {
@@ -794,37 +824,82 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
     // Try to identify transform magic
     if (pattern != o_pattern && repl == changed_val && attr <= IS_EQUAL) {
 
-      u64 *ptr = (u64 *)&buf[idx];
-      u64 *o_ptr = (u64 *)&orig_buf[idx];
-      u64  b_val, o_b_val, mask;
+      u64 b_val, o_b_val, mask;
+      u8  bytes;
 
       switch (SHAPE_BYTES(h->shape)) {
 
         case 0:
         case 1:
-          b_val = (u64)(*ptr % 0x100);
+          bytes = 1;
+          break;
+        case 2:
+          bytes = 2;
+          break;
+        case 3:
+        case 4:
+          bytes = 4;
+          break;
+        default:
+          bytes = 8;
+
+      }
+
+      // necessary for preventing heap access overflow
+      bytes = MIN(bytes, len - idx);
+
+      switch (bytes) {
+
+        case 0:                        // cannot happen
+          b_val = o_b_val = mask = 0;  // keep the linters happy
+          break;
+        case 1: {
+
+          u8 *ptr = (u8 *)&buf[idx];
+          u8 *o_ptr = (u8 *)&orig_buf[idx];
+          b_val = (u64)(*ptr);
           o_b_val = (u64)(*o_ptr % 0x100);
           mask = 0xff;
           break;
+
+        }
+
         case 2:
-        case 3:
-          b_val = (u64)(*ptr % 0x10000);
-          o_b_val = (u64)(*o_ptr % 0x10000);
+        case 3: {
+
+          u16 *ptr = (u16 *)&buf[idx];
+          u16 *o_ptr = (u16 *)&orig_buf[idx];
+          b_val = (u64)(*ptr);
+          o_b_val = (u64)(*o_ptr);
           mask = 0xffff;
           break;
+
+        }
+
         case 4:
         case 5:
         case 6:
-        case 7:
-          b_val = (u64)(*ptr % 0x100000000);
-          o_b_val = (u64)(*o_ptr % 0x100000000);
+        case 7: {
+
+          u32 *ptr = (u32 *)&buf[idx];
+          u32 *o_ptr = (u32 *)&orig_buf[idx];
+          b_val = (u64)(*ptr);
+          o_b_val = (u64)(*o_ptr);
           mask = 0xffffffff;
           break;
-        default:
-          b_val = *ptr;
-          o_b_val = *o_ptr;
+
+        }
+
+        default: {
+
+          u64 *ptr = (u64 *)&buf[idx];
+          u64 *o_ptr = (u64 *)&orig_buf[idx];
+          b_val = (u64)(*ptr);
+          o_b_val = (u64)(*o_ptr);
           mask = 0xffffffffffffffff;
 
+        }
+
       }
 
       // test for arithmetic, eg. "if ((user_val - 0x1111) == 0x1234) ..."
@@ -984,7 +1059,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
 
   }
 
-#endif
+  //#endif
 
   // we only allow this for ascii2integer (above) so leave if this is the case
   if (unlikely(pattern == o_pattern)) { return 0; }
@@ -1009,7 +1084,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
         u64 tmp_64 = *buf_64;
         *buf_64 = repl;
         if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
-#ifdef COMBINE
+#ifdef CMPLOG_COMBINE
         if (*status == 1) { memcpy(cbuf + idx, buf_64, 8); }
 #endif
         *buf_64 = tmp_64;
@@ -1050,7 +1125,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
         u32 tmp_32 = *buf_32;
         *buf_32 = (u32)repl;
         if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
-#ifdef COMBINE
+#ifdef CMPLOG_COMBINE
         if (*status == 1) { memcpy(cbuf + idx, buf_32, 4); }
 #endif
         *buf_32 = tmp_32;
@@ -1084,7 +1159,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
         u16 tmp_16 = *buf_16;
         *buf_16 = (u16)repl;
         if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
-#ifdef COMBINE
+#ifdef CMPLOG_COMBINE
         if (*status == 1) { memcpy(cbuf + idx, buf_16, 2); }
 #endif
         *buf_16 = tmp_16;
@@ -1122,7 +1197,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
         u8 tmp_8 = *buf_8;
         *buf_8 = (u8)repl;
         if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
-#ifdef COMBINE
+#ifdef CMPLOG_COMBINE
         if (*status == 1) { cbuf[idx] = *buf_8; }
 #endif
         *buf_8 = tmp_8;
@@ -1139,8 +1214,12 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
   //       16 = modified float, 32 = modified integer (modified = wont match
   //                                                   in original buffer)
 
-#ifdef ARITHMETIC_LESSER_GREATER
-  if (lvl < LVL3 || attr == IS_TRANSFORM) { return 0; }
+  //#ifdef CMPLOG_SOLVE_ARITHMETIC
+  if (!afl->cmplog_enable_arith || lvl < LVL3 || attr == IS_TRANSFORM) {
+
+    return 0;
+
+  }
 
   if (!(attr & (IS_GREATER | IS_LESSER)) || SHAPE_BYTES(h->shape) < 4) {
 
@@ -1245,11 +1324,11 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
       double *f = (double *)&repl;
       float   g = (float)*f;
       repl_new = 0;
-  #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
       memcpy((char *)&repl_new, (char *)&g, 4);
-  #else
+#else
       memcpy(((char *)&repl_new) + 4, (char *)&g, 4);
-  #endif
+#endif
       changed_val = repl_new;
       h->shape = 3;  // modify shape
 
@@ -1304,7 +1383,7 @@ static u8 cmp_extend_encoding(afl_state_t *afl, struct cmp_header *h,
 
   }
 
-#endif                                         /* ARITHMETIC_LESSER_GREATER */
+  //#endif                                           /* CMPLOG_SOLVE_ARITHMETIC
 
   return 0;
 
@@ -1366,7 +1445,7 @@ static u8 cmp_extend_encodingN(afl_state_t *afl, struct cmp_header *h,
 
       if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
 
-  #ifdef COMBINE
+  #ifdef CMPLOG_COMBINE
       if (*status == 1) { memcpy(cbuf + idx, r, shape); }
   #endif
 
@@ -1415,7 +1494,7 @@ static void try_to_add_to_dict(afl_state_t *afl, u64 v, u8 shape) {
 
     } else if (b[k] == 0xff) {
 
-      ++cons_0;
+      ++cons_ff;
 
     } else {
 
@@ -1463,7 +1542,7 @@ static void try_to_add_to_dictN(afl_state_t *afl, u128 v, u8 size) {
   for (k = 0; k < size; ++k) {
 
   #else
-  u32 off = 16 - size;
+  u32    off = 16 - size;
   for (k = 16 - size; k < 16; ++k) {
 
   #endif
@@ -1473,7 +1552,7 @@ static void try_to_add_to_dictN(afl_state_t *afl, u128 v, u8 size) {
 
     } else if (b[k] == 0xff) {
 
-      ++cons_0;
+      ++cons_ff;
 
     } else {
 
@@ -1499,11 +1578,12 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
   struct cmp_header *h = &afl->shm.cmp_map->headers[key];
   struct tainted *   t;
   u32                i, j, idx, taint_len, loggeds;
-  u32                have_taint = 1, is_n = 0;
+  u32                have_taint = 1;
   u8                 status = 0, found_one = 0;
 
   /* loop cmps are useless, detect and ignore them */
 #ifdef WORD_SIZE_64
+  u32  is_n = 0;
   u128 s128_v0 = 0, s128_v1 = 0, orig_s128_v0 = 0, orig_s128_v1 = 0;
 #endif
   u64 s_v0, s_v1;
@@ -1521,6 +1601,7 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
 
   }
 
+#ifdef WORD_SIZE_64
   switch (SHAPE_BYTES(h->shape)) {
 
     case 1:
@@ -1533,6 +1614,8 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
 
   }
 
+#endif
+
   for (i = 0; i < loggeds; ++i) {
 
     struct cmp_operands *o = &afl->shm.cmp_map->log[key][i];
@@ -1770,20 +1853,20 @@ static u8 cmp_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
 }
 
 static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
-                              u8 *o_pattern, u8 *changed_val, u32 idx,
+                              u8 *o_pattern, u8 *changed_val, u8 plen, u32 idx,
                               u32 taint_len, u8 *orig_buf, u8 *buf, u8 *cbuf,
                               u32 len, u8 lvl, u8 *status) {
 
-#ifndef COMBINE
+#ifndef CMPLOG_COMBINE
   (void)(cbuf);
 #endif
-#ifndef CMPLOG_TRANSFORM
-  (void)(changed_val);
-#endif
+  //#ifndef CMPLOG_SOLVE_TRANSFORM
+  //  (void)(changed_val);
+  //#endif
 
   u8  save[40];
   u32 saved_idx = idx, pre, from = 0, to = 0, i, j;
-  u32 its_len = MIN((u32)32, len - idx);
+  u32 its_len = MIN((u32)plen, len - idx);
   its_len = MIN(its_len, taint_len);
   u32 saved_its_len = its_len;
 
@@ -1847,7 +1930,7 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
 
         if (unlikely(its_fuzz(afl, buf, len, status))) { return 1; }
 
-#ifdef COMBINE
+#ifdef CMPLOG_COMBINE
         if (*status == 1) { memcpy(cbuf + idx, &buf[idx], i); }
 #endif
 
@@ -1859,16 +1942,16 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
 
   }
 
-#ifdef CMPLOG_TRANSFORM
+  //#ifdef CMPLOG_SOLVE_TRANSFORM
 
   if (*status == 1) return 0;
 
-  if (lvl & LVL3) {
+  if (afl->cmplog_enable_transform && (lvl & LVL3)) {
 
     u32 toupper = 0, tolower = 0, xor = 0, arith = 0, tohex = 0, fromhex = 0;
-  #ifdef CMPLOG_TRANSFORM_BASE64
+#ifdef CMPLOG_SOLVE_TRANSFORM_BASE64
     u32 tob64 = 0, fromb64 = 0;
-  #endif
+#endif
     u32 from_0 = 0, from_x = 0, from_X = 0, from_slash = 0, from_up = 0;
     u32 to_0 = 0, to_x = 0, to_slash = 0, to_up = 0;
     u8  xor_val[32], arith_val[32], tmp[48];
@@ -1964,7 +2047,7 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
 
       }
 
-  #ifdef CMPLOG_TRANSFORM_BASE64
+#ifdef CMPLOG_SOLVE_TRANSFORM_BASE64
       if (i % 3 == 2 && i < 24) {
 
         if (is_base64(repl + ((i / 3) << 2))) tob64 += 3;
@@ -1977,7 +2060,7 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
 
       }
 
-  #endif
+#endif
 
       if ((o_pattern[i] ^ orig_buf[idx + i]) == xor_val[i] && xor_val[i]) {
 
@@ -2005,20 +2088,20 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
 
       }
 
-  #ifdef _DEBUG
+#ifdef _DEBUG
       fprintf(stderr,
               "RTN idx=%u loop=%u xor=%u arith=%u tolower=%u toupper=%u "
               "tohex=%u fromhex=%u to_0=%u to_slash=%u to_x=%u "
               "from_0=%u from_slash=%u from_x=%u\n",
               idx, i, xor, arith, tolower, toupper, tohex, fromhex, to_0,
               to_slash, to_x, from_0, from_slash, from_x);
-    #ifdef CMPLOG_TRANSFORM_BASE64
+  #ifdef CMPLOG_SOLVE_TRANSFORM_BASE64
       fprintf(stderr, "RTN idx=%u loop=%u tob64=%u from64=%u\n", tob64,
               fromb64);
-    #endif
   #endif
+#endif
 
-  #ifdef CMPLOG_TRANSFORM_BASE64
+#ifdef CMPLOG_SOLVE_TRANSFORM_BASE64
       // input is base64 and converted to binary? convert repl to base64!
       if ((i % 4) == 3 && i < 24 && fromb64 > i) {
 
@@ -2041,7 +2124,7 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
 
       }
 
-  #endif
+#endif
 
       // input is converted to hex? convert repl to binary!
       if (i < 16 && tohex > i) {
@@ -2170,16 +2253,16 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
 
       }
 
-  #ifdef COMBINE
+#ifdef CMPLOG_COMBINE
       if (*status == 1) { memcpy(cbuf + idx, &buf[idx], i + 1); }
-  #endif
+#endif
 
       if ((i >= 7 &&
            (i >= xor&&i >= arith &&i >= tolower &&i >= toupper &&i > tohex &&i >
                 (fromhex + from_0 + from_x + from_slash + 1)
-  #ifdef CMPLOG_TRANSFORM_BASE64
+#ifdef CMPLOG_SOLVE_TRANSFORM_BASE64
             && i > tob64 + 3 && i > fromb64 + 4
-  #endif
+#endif
             )) ||
           repl[i] != changed_val[i] || *status == 1) {
 
@@ -2193,7 +2276,7 @@ static u8 rtn_extend_encoding(afl_state_t *afl, u8 *pattern, u8 *repl,
 
   }
 
-#endif
+  //#endif
 
   return 0;
 
@@ -2282,9 +2365,9 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
 
       status = 0;
 
-      if (unlikely(rtn_extend_encoding(afl, o->v0, o->v1, orig_o->v0,
-                                       orig_o->v1, idx, taint_len, orig_buf,
-                                       buf, cbuf, len, lvl, &status))) {
+      if (unlikely(rtn_extend_encoding(
+              afl, o->v0, o->v1, orig_o->v0, orig_o->v1, SHAPE_BYTES(h->shape),
+              idx, taint_len, orig_buf, buf, cbuf, len, lvl, &status))) {
 
         return 1;
 
@@ -2299,9 +2382,9 @@ static u8 rtn_fuzz(afl_state_t *afl, u32 key, u8 *orig_buf, u8 *buf, u8 *cbuf,
 
       status = 0;
 
-      if (unlikely(rtn_extend_encoding(afl, o->v1, o->v0, orig_o->v1,
-                                       orig_o->v0, idx, taint_len, orig_buf,
-                                       buf, cbuf, len, lvl, &status))) {
+      if (unlikely(rtn_extend_encoding(
+              afl, o->v1, o->v0, orig_o->v1, orig_o->v0, SHAPE_BYTES(h->shape),
+              idx, taint_len, orig_buf, buf, cbuf, len, lvl, &status))) {
 
         return 1;
 
@@ -2410,7 +2493,21 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len) {
 
   // manually clear the full cmp_map
   memset(afl->shm.cmp_map, 0, sizeof(struct cmp_map));
-  if (unlikely(common_fuzz_cmplog_stuff(afl, orig_buf, len))) { return 1; }
+  if (unlikely(common_fuzz_cmplog_stuff(afl, orig_buf, len))) {
+
+    afl->queue_cur->colorized = CMPLOG_LVL_MAX;
+    while (taint) {
+
+      t = taint->next;
+      ck_free(taint);
+      taint = t;
+
+    }
+
+    return 1;
+
+  }
+
   if (unlikely(!afl->orig_cmp_map)) {
 
     afl->orig_cmp_map = ck_alloc_nozero(sizeof(struct cmp_map));
@@ -2419,7 +2516,20 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len) {
 
   memcpy(afl->orig_cmp_map, afl->shm.cmp_map, sizeof(struct cmp_map));
   memset(afl->shm.cmp_map->headers, 0, sizeof(struct cmp_header) * CMP_MAP_W);
-  if (unlikely(common_fuzz_cmplog_stuff(afl, buf, len))) { return 1; }
+  if (unlikely(common_fuzz_cmplog_stuff(afl, buf, len))) {
+
+    afl->queue_cur->colorized = CMPLOG_LVL_MAX;
+    while (taint) {
+
+      t = taint->next;
+      ck_free(taint);
+      taint = t;
+
+    }
+
+    return 1;
+
+  }
 
 #ifdef _DEBUG
   dump("ORIG", orig_buf, len);
@@ -2442,7 +2552,7 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len) {
   u32 lvl = (afl->queue_cur->colorized ? 0 : LVL1) +
             (afl->cmplog_lvl == CMPLOG_LVL_MAX ? LVL3 : 0);
 
-#ifdef COMBINE
+#ifdef CMPLOG_COMBINE
   u8 *cbuf = afl_realloc((void **)&afl->in_scratch_buf, len + 128);
   memcpy(cbuf, orig_buf, len);
   u8 *virgin_backup = afl_realloc((void **)&afl->ex_buf, afl->shm.map_size);
@@ -2499,9 +2609,9 @@ u8 input_to_state_stage(afl_state_t *afl, u8 *orig_buf, u8 *buf, u32 len) {
 
     } else if ((lvl & LVL1)
 
-#ifdef CMPLOG_TRANSFORM
-               || (lvl & LVL3)
-#endif
+               //#ifdef CMPLOG_SOLVE_TRANSFORM
+               || ((lvl & LVL3) && afl->cmplog_enable_transform)
+               //#endif
     ) {
 
       if (unlikely(rtn_fuzz(afl, k, orig_buf, buf, cbuf, len, lvl, taint))) {
@@ -2530,7 +2640,6 @@ exit_its:
     afl->queue_cur->colorized = CMPLOG_LVL_MAX;
 
     ck_free(afl->queue_cur->cmplog_colorinput);
-    t = taint;
     while (taint) {
 
       t = taint->next;
@@ -2557,7 +2666,7 @@ exit_its:
 
   }
 
-#ifdef COMBINE
+#ifdef CMPLOG_COMBINE
   if (afl->queued_paths + afl->unique_crashes > orig_hit_cnt + 1) {
 
     // copy the current virgin bits so we can recover the information
@@ -2581,9 +2690,9 @@ exit_its:
     }
 
   #else
-    u32 *v = (u64 *)afl->virgin_bits;
-    u32 *s = (u64 *)virgin_save;
-    u32 i;
+    u32 *v = (u32 *)afl->virgin_bits;
+    u32 *s = (u32 *)virgin_save;
+    u32  i;
     for (i = 0; i < (afl->shm.map_size >> 2); i++) {
 
       v[i] &= s[i];
@@ -2596,7 +2705,7 @@ exit_its:
     dump("COMB", cbuf, len);
     if (status == 1) {
 
-      fprintf(stderr, "NEW COMBINED\n");
+      fprintf(stderr, "NEW CMPLOG_COMBINED\n");
 
     } else {
 
@@ -2646,7 +2755,3 @@ exit_its:
 
 }
 
-#ifdef COMBINE
-  #undef COMBINE
-#endif
-
diff --git a/src/afl-fuzz-run.c b/src/afl-fuzz-run.c
index 97cb7415..83133dad 100644
--- a/src/afl-fuzz-run.c
+++ b/src/afl-fuzz-run.c
@@ -83,7 +83,8 @@ write_to_testcase(afl_state_t *afl, void *mem, u32 len) {
            afl->document_counter++,
            describe_op(afl, 0, NAME_MAX - strlen("000000000:")));
 
-  if ((doc_fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0600)) >= 0) {
+  if ((doc_fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION)) >=
+      0) {
 
     if (write(doc_fd, mem, len) != len)
       PFATAL("write to mutation file failed: %s", fn);
@@ -247,12 +248,14 @@ static void write_with_gap(afl_state_t *afl, u8 *mem, u32 len, u32 skip_at,
 
     if (unlikely(afl->no_unlink)) {
 
-      fd = open(afl->fsrv.out_file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+      fd = open(afl->fsrv.out_file, O_WRONLY | O_CREAT | O_TRUNC,
+                DEFAULT_PERMISSION);
 
     } else {
 
       unlink(afl->fsrv.out_file);                         /* Ignore errors. */
-      fd = open(afl->fsrv.out_file, O_WRONLY | O_CREAT | O_EXCL, 0600);
+      fd = open(afl->fsrv.out_file, O_WRONLY | O_CREAT | O_EXCL,
+                DEFAULT_PERMISSION);
 
     }
 
@@ -564,7 +567,8 @@ void sync_fuzzers(afl_state_t *afl) {
     /* document the attempt to sync to this instance */
 
     sprintf(qd_synced_path, "%s/.synced/%s.last", afl->out_dir, sd_ent->d_name);
-    id_fd = open(qd_synced_path, O_RDWR | O_CREAT | O_TRUNC, 0600);
+    id_fd =
+        open(qd_synced_path, O_RDWR | O_CREAT | O_TRUNC, DEFAULT_PERMISSION);
     if (id_fd >= 0) close(id_fd);
 
     /* Skip anything that doesn't have a queue/ subdirectory. */
@@ -587,7 +591,7 @@ void sync_fuzzers(afl_state_t *afl) {
 
     sprintf(qd_synced_path, "%s/.synced/%s", afl->out_dir, sd_ent->d_name);
 
-    id_fd = open(qd_synced_path, O_RDWR | O_CREAT, 0600);
+    id_fd = open(qd_synced_path, O_RDWR | O_CREAT, DEFAULT_PERMISSION);
 
     if (id_fd < 0) { PFATAL("Unable to create '%s'", qd_synced_path); }
 
@@ -707,6 +711,8 @@ void sync_fuzzers(afl_state_t *afl) {
 
   if (afl->foreign_sync_cnt) read_foreign_testcases(afl, 0);
 
+  afl->last_sync_time = get_cur_time();
+
 }
 
 /* Trim all new test cases to save cycles when doing deterministic checks. The
@@ -849,7 +855,7 @@ u8 trim_case(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
 
     if (unlikely(afl->no_unlink)) {
 
-      fd = open(q->fname, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+      fd = open(q->fname, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION);
 
       if (fd < 0) { PFATAL("Unable to create '%s'", q->fname); }
 
@@ -864,7 +870,7 @@ u8 trim_case(afl_state_t *afl, struct queue_entry *q, u8 *in_buf) {
     } else {
 
       unlink(q->fname);                                    /* ignore errors */
-      fd = open(q->fname, O_WRONLY | O_CREAT | O_EXCL, 0600);
+      fd = open(q->fname, O_WRONLY | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
 
       if (fd < 0) { PFATAL("Unable to create '%s'", q->fname); }
 
diff --git a/src/afl-fuzz-state.c b/src/afl-fuzz-state.c
index 514414f3..10a0b869 100644
--- a/src/afl-fuzz-state.c
+++ b/src/afl-fuzz-state.c
@@ -440,6 +440,13 @@ void read_afl_environment(afl_state_t *afl, char **envp) {
             afl->afl_env.afl_kill_signal =
                 (u8 *)get_afl_env(afl_environment_variables[i]);
 
+          } else if (!strncmp(env, "AFL_TARGET_ENV",
+
+                              afl_environment_variable_len)) {
+
+            afl->afl_env.afl_target_env =
+                (u8 *)get_afl_env(afl_environment_variables[i]);
+
           }
 
         } else {
@@ -493,6 +500,8 @@ void read_afl_environment(afl_state_t *afl, char **envp) {
         WARNF("Mistyped AFL environment variable: %s", env);
         issue_detected = 1;
 
+        print_suggested_envs(env);
+
       }
 
     }
diff --git a/src/afl-fuzz-stats.c b/src/afl-fuzz-stats.c
index 7e99bf8f..2c814d90 100644
--- a/src/afl-fuzz-stats.c
+++ b/src/afl-fuzz-stats.c
@@ -89,17 +89,110 @@ void write_setup_file(afl_state_t *afl, u32 argc, char **argv) {
 
 }
 
+/* load some of the existing stats file when resuming.*/
+void load_stats_file(afl_state_t *afl) {
+
+  FILE *f;
+  u8    buf[MAX_LINE];
+  u8 *  lptr;
+  u8    fn[PATH_MAX];
+  u32   lineno = 0;
+  snprintf(fn, PATH_MAX, "%s/fuzzer_stats", afl->out_dir);
+  f = fopen(fn, "r");
+  if (!f) {
+
+    WARNF("Unable to load stats file '%s'", fn);
+    return;
+
+  }
+
+  while ((lptr = fgets(buf, MAX_LINE, f))) {
+
+    lineno++;
+    u8 *lstartptr = lptr;
+    u8 *rptr = lptr + strlen(lptr) - 1;
+    u8  keystring[MAX_LINE];
+    while (*lptr != ':' && lptr < rptr) {
+
+      lptr++;
+
+    }
+
+    if (*lptr == '\n' || !*lptr) {
+
+      WARNF("Unable to read line %d of stats file", lineno);
+      continue;
+
+    }
+
+    if (*lptr == ':') {
+
+      *lptr = 0;
+      strcpy(keystring, lstartptr);
+      lptr++;
+      char *nptr;
+      switch (lineno) {
+
+        case 3:
+          if (!strcmp(keystring, "run_time          "))
+            afl->prev_run_time = 1000 * strtoull(lptr, &nptr, 10);
+          break;
+        case 5:
+          if (!strcmp(keystring, "cycles_done       "))
+            afl->queue_cycle =
+                strtoull(lptr, &nptr, 10) ? strtoull(lptr, &nptr, 10) + 1 : 0;
+          break;
+        case 7:
+          if (!strcmp(keystring, "execs_done        "))
+            afl->fsrv.total_execs = strtoull(lptr, &nptr, 10);
+          break;
+        case 10:
+          if (!strcmp(keystring, "paths_total       "))
+            afl->queued_paths = strtoul(lptr, &nptr, 10);
+          break;
+        case 12:
+          if (!strcmp(keystring, "paths_found       "))
+            afl->queued_discovered = strtoul(lptr, &nptr, 10);
+          break;
+        case 13:
+          if (!strcmp(keystring, "paths_imported    "))
+            afl->queued_imported = strtoul(lptr, &nptr, 10);
+          break;
+        case 14:
+          if (!strcmp(keystring, "max_depth         "))
+            afl->max_depth = strtoul(lptr, &nptr, 10);
+          break;
+        case 21:
+          if (!strcmp(keystring, "unique_crashes    "))
+            afl->unique_crashes = strtoull(lptr, &nptr, 10);
+          break;
+        case 22:
+          if (!strcmp(keystring, "unique_hangs      "))
+            afl->unique_hangs = strtoull(lptr, &nptr, 10);
+          break;
+        default:
+          break;
+
+      }
+
+    }
+
+  }
+
+  return;
+
+}
+
 /* Update stats file for unattended monitoring. */
 
-void write_stats_file(afl_state_t *afl, double bitmap_cvg, double stability,
-                      double eps) {
+void write_stats_file(afl_state_t *afl, u32 t_bytes, double bitmap_cvg,
+                      double stability, double eps) {
 
 #ifndef __HAIKU__
   struct rusage rus;
 #endif
 
   u64   cur_time = get_cur_time();
-  u32   t_bytes = count_non_255_bytes(afl, afl->virgin_bits);
   u8    fn[PATH_MAX];
   FILE *f;
 
@@ -179,12 +272,13 @@ void write_stats_file(afl_state_t *afl, double bitmap_cvg, double stability,
           "\n"
           "target_mode       : %s%s%s%s%s%s%s%s%s\n"
           "command_line      : %s\n",
-          afl->start_time / 1000, cur_time / 1000,
-          (cur_time - afl->start_time) / 1000, (u32)getpid(),
-          afl->queue_cycle ? (afl->queue_cycle - 1) : 0, afl->cycles_wo_finds,
-          afl->fsrv.total_execs,
+          (afl->start_time - afl->prev_run_time) / 1000, cur_time / 1000,
+          (afl->prev_run_time + cur_time - afl->start_time) / 1000,
+          (u32)getpid(), afl->queue_cycle ? (afl->queue_cycle - 1) : 0,
+          afl->cycles_wo_finds, afl->fsrv.total_execs,
           afl->fsrv.total_execs /
-              ((double)(get_cur_time() - afl->start_time) / 1000),
+              ((double)(afl->prev_run_time + get_cur_time() - afl->start_time) /
+               1000),
           afl->last_avg_execs_saved, afl->queued_paths, afl->queued_favored,
           afl->queued_discovered, afl->queued_imported, afl->max_depth,
           afl->current_entry, afl->pending_favored, afl->pending_not_fuzzed,
@@ -258,9 +352,11 @@ void write_stats_file(afl_state_t *afl, double bitmap_cvg, double stability,
 
 /* Update the plot file if there is a reason to. */
 
-void maybe_update_plot_file(afl_state_t *afl, double bitmap_cvg, double eps) {
+void maybe_update_plot_file(afl_state_t *afl, u32 t_bytes, double bitmap_cvg,
+                            double eps) {
 
-  if (unlikely(afl->plot_prev_qp == afl->queued_paths &&
+  if (unlikely(afl->stop_soon) ||
+      unlikely(afl->plot_prev_qp == afl->queued_paths &&
                afl->plot_prev_pf == afl->pending_favored &&
                afl->plot_prev_pnf == afl->pending_not_fuzzed &&
                afl->plot_prev_ce == afl->current_entry &&
@@ -289,16 +385,17 @@ void maybe_update_plot_file(afl_state_t *afl, double bitmap_cvg, double eps) {
   /* Fields in the file:
 
      unix_time, afl->cycles_done, cur_path, paths_total, paths_not_fuzzed,
-     favored_not_fuzzed, afl->unique_crashes, afl->unique_hangs, afl->max_depth,
-     execs_per_sec */
-
-  fprintf(
-      afl->fsrv.plot_file,
-      "%llu, %llu, %u, %u, %u, %u, %0.02f%%, %llu, %llu, %u, %0.02f, %llu\n",
-      get_cur_time() / 1000, afl->queue_cycle - 1, afl->current_entry,
-      afl->queued_paths, afl->pending_not_fuzzed, afl->pending_favored,
-      bitmap_cvg, afl->unique_crashes, afl->unique_hangs, afl->max_depth, eps,
-      afl->plot_prev_ed);                                  /* ignore errors */
+     favored_not_fuzzed, unique_crashes, unique_hangs, max_depth,
+     execs_per_sec, edges_found */
+
+  fprintf(afl->fsrv.plot_file,
+          "%llu, %llu, %u, %u, %u, %u, %0.02f%%, %llu, %llu, %u, %0.02f, %llu, "
+          "%u\n",
+          (afl->prev_run_time + get_cur_time() - afl->start_time),
+          afl->queue_cycle - 1, afl->current_entry, afl->queued_paths,
+          afl->pending_not_fuzzed, afl->pending_favored, bitmap_cvg,
+          afl->unique_crashes, afl->unique_hangs, afl->max_depth, eps,
+          afl->plot_prev_ed, t_bytes);                     /* ignore errors */
 
   fflush(afl->fsrv.plot_file);
 
@@ -379,8 +476,8 @@ void show_stats(afl_state_t *afl) {
 
     if (likely(cur_ms != afl->start_time)) {
 
-      afl->stats_avg_exec =
-          ((double)afl->fsrv.total_execs) * 1000 / (cur_ms - afl->start_time);
+      afl->stats_avg_exec = ((double)afl->fsrv.total_execs) * 1000 /
+                            (afl->prev_run_time + cur_ms - afl->start_time);
 
     }
 
@@ -437,7 +534,8 @@ void show_stats(afl_state_t *afl) {
   if (cur_ms - afl->stats_last_stats_ms > STATS_UPDATE_SEC * 1000) {
 
     afl->stats_last_stats_ms = cur_ms;
-    write_stats_file(afl, t_byte_ratio, stab_ratio, afl->stats_avg_exec);
+    write_stats_file(afl, t_bytes, t_byte_ratio, stab_ratio,
+                     afl->stats_avg_exec);
     save_auto(afl);
     write_bitmap(afl);
 
@@ -460,7 +558,7 @@ void show_stats(afl_state_t *afl) {
   if (cur_ms - afl->stats_last_plot_ms > PLOT_UPDATE_SEC * 1000) {
 
     afl->stats_last_plot_ms = cur_ms;
-    maybe_update_plot_file(afl, t_byte_ratio, afl->stats_avg_exec);
+    maybe_update_plot_file(afl, t_bytes, t_byte_ratio, afl->stats_avg_exec);
 
   }
 
@@ -548,6 +646,13 @@ void show_stats(afl_state_t *afl) {
 #define SP10 SP5 SP5
 #define SP20 SP10 SP10
 
+  /* Since `total_crashes` does not get reloaded from disk on restart,
+    it indicates if we found crashes this round already -> paint red.
+    If it's 0, but `unique_crashes` is set from a past run, paint in yellow. */
+  char *crash_color = afl->total_crashes    ? cLRD
+                      : afl->unique_crashes ? cYEL
+                                            : cRST;
+
   /* Lord, forgive me this. */
 
   SAYF(SET_G1 bSTG bLT bH bSTOP                         cCYA
@@ -592,7 +697,7 @@ void show_stats(afl_state_t *afl) {
 
   }
 
-  u_stringify_time_diff(time_tmp, cur_ms, afl->start_time);
+  u_stringify_time_diff(time_tmp, afl->prev_run_time + cur_ms, afl->start_time);
   SAYF(bV bSTOP "        run time : " cRST "%-33s " bSTG bV bSTOP
                 "  cycles done : %s%-5s " bSTG              bV "\n",
        time_tmp, tmp, u_stringify_int(IB(0), afl->queue_cycle - 1));
@@ -635,7 +740,7 @@ void show_stats(afl_state_t *afl) {
   u_stringify_time_diff(time_tmp, cur_ms, afl->last_crash_time);
   SAYF(bV bSTOP " last uniq crash : " cRST "%-33s " bSTG bV bSTOP
                 " uniq crashes : %s%-6s" bSTG               bV "\n",
-       time_tmp, afl->unique_crashes ? cLRD : cRST, tmp);
+       time_tmp, crash_color, tmp);
 
   sprintf(tmp, "%s%s", u_stringify_int(IB(0), afl->unique_hangs),
           (afl->unique_hangs >= KEEP_UNIQUE_HANG) ? "+" : "");
@@ -718,15 +823,13 @@ void show_stats(afl_state_t *afl) {
 
     SAYF(bV bSTOP " total execs : " cRST "%-20s " bSTG bV bSTOP
                   "   new crashes : %s%-22s" bSTG         bV "\n",
-         u_stringify_int(IB(0), afl->fsrv.total_execs),
-         afl->unique_crashes ? cLRD : cRST, tmp);
+         u_stringify_int(IB(0), afl->fsrv.total_execs), crash_color, tmp);
 
   } else {
 
     SAYF(bV bSTOP " total execs : " cRST "%-20s " bSTG bV bSTOP
                   " total crashes : %s%-22s" bSTG         bV "\n",
-         u_stringify_int(IB(0), afl->fsrv.total_execs),
-         afl->unique_crashes ? cLRD : cRST, tmp);
+         u_stringify_int(IB(0), afl->fsrv.total_execs), crash_color, tmp);
 
   }
 
@@ -1122,7 +1225,7 @@ void show_init_stats(afl_state_t *afl) {
       stringify_int(IB(0), min_us), stringify_int(IB(1), max_us),
       stringify_int(IB(2), avg_us));
 
-  if (!afl->timeout_given) {
+  if (afl->timeout_given != 1) {
 
     /* Figure out the appropriate timeout. The basic idea is: 5x average or
        1x max, rounded up to EXEC_TM_ROUND ms and capped at 1 second.
diff --git a/src/afl-fuzz-statsd.c b/src/afl-fuzz-statsd.c
index 69cafd90..461bbbf6 100644
--- a/src/afl-fuzz-statsd.c
+++ b/src/afl-fuzz-statsd.c
@@ -1,3 +1,8 @@
+/*
+ * This implements rpc.statsd support, see docs/rpc_statsd.md
+ *
+ */
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/socket.h>
@@ -226,37 +231,39 @@ int statsd_format_metric(afl_state_t *afl, char *buff, size_t bufflen) {
   */
   if (afl->statsd_metric_format_type == STATSD_TAGS_TYPE_SUFFIX) {
 
-    snprintf(buff, bufflen, afl->statsd_metric_format,
-             afl->queue_cycle ? (afl->queue_cycle - 1) : 0, tags,
-             afl->cycles_wo_finds, tags, afl->fsrv.total_execs, tags,
-             afl->fsrv.total_execs /
-                 ((double)(get_cur_time() - afl->start_time) / 1000),
-             tags, afl->queued_paths, tags, afl->queued_favored, tags,
-             afl->queued_discovered, tags, afl->queued_imported, tags,
-             afl->max_depth, tags, afl->current_entry, tags,
-             afl->pending_favored, tags, afl->pending_not_fuzzed, tags,
-             afl->queued_variable, tags, afl->unique_crashes, tags,
-             afl->unique_hangs, tags, afl->total_crashes, tags,
-             afl->slowest_exec_ms, tags,
-             count_non_255_bytes(afl, afl->virgin_bits), tags,
-             afl->var_byte_count, tags, afl->expand_havoc, tags);
+    snprintf(
+        buff, bufflen, afl->statsd_metric_format,
+        afl->queue_cycle ? (afl->queue_cycle - 1) : 0, tags,
+        afl->cycles_wo_finds, tags, afl->fsrv.total_execs, tags,
+        afl->fsrv.total_execs /
+            ((double)(get_cur_time() + afl->prev_run_time - afl->start_time) /
+             1000),
+        tags, afl->queued_paths, tags, afl->queued_favored, tags,
+        afl->queued_discovered, tags, afl->queued_imported, tags,
+        afl->max_depth, tags, afl->current_entry, tags, afl->pending_favored,
+        tags, afl->pending_not_fuzzed, tags, afl->queued_variable, tags,
+        afl->unique_crashes, tags, afl->unique_hangs, tags, afl->total_crashes,
+        tags, afl->slowest_exec_ms, tags,
+        count_non_255_bytes(afl, afl->virgin_bits), tags, afl->var_byte_count,
+        tags, afl->expand_havoc, tags);
 
   } else if (afl->statsd_metric_format_type == STATSD_TAGS_TYPE_MID) {
 
-    snprintf(buff, bufflen, afl->statsd_metric_format, tags,
-             afl->queue_cycle ? (afl->queue_cycle - 1) : 0, tags,
-             afl->cycles_wo_finds, tags, afl->fsrv.total_execs, tags,
-             afl->fsrv.total_execs /
-                 ((double)(get_cur_time() - afl->start_time) / 1000),
-             tags, afl->queued_paths, tags, afl->queued_favored, tags,
-             afl->queued_discovered, tags, afl->queued_imported, tags,
-             afl->max_depth, tags, afl->current_entry, tags,
-             afl->pending_favored, tags, afl->pending_not_fuzzed, tags,
-             afl->queued_variable, tags, afl->unique_crashes, tags,
-             afl->unique_hangs, tags, afl->total_crashes, tags,
-             afl->slowest_exec_ms, tags,
-             count_non_255_bytes(afl, afl->virgin_bits), tags,
-             afl->var_byte_count, tags, afl->expand_havoc);
+    snprintf(
+        buff, bufflen, afl->statsd_metric_format, tags,
+        afl->queue_cycle ? (afl->queue_cycle - 1) : 0, tags,
+        afl->cycles_wo_finds, tags, afl->fsrv.total_execs, tags,
+        afl->fsrv.total_execs /
+            ((double)(get_cur_time() + afl->prev_run_time - afl->start_time) /
+             1000),
+        tags, afl->queued_paths, tags, afl->queued_favored, tags,
+        afl->queued_discovered, tags, afl->queued_imported, tags,
+        afl->max_depth, tags, afl->current_entry, tags, afl->pending_favored,
+        tags, afl->pending_not_fuzzed, tags, afl->queued_variable, tags,
+        afl->unique_crashes, tags, afl->unique_hangs, tags, afl->total_crashes,
+        tags, afl->slowest_exec_ms, tags,
+        count_non_255_bytes(afl, afl->virgin_bits), tags, afl->var_byte_count,
+        tags, afl->expand_havoc);
 
   }
 
diff --git a/src/afl-fuzz.c b/src/afl-fuzz.c
index d622db71..f89c1938 100644
--- a/src/afl-fuzz.c
+++ b/src/afl-fuzz.c
@@ -103,7 +103,10 @@ static void usage(u8 *argv0, int more_help) {
       "                  quad -- see docs/power_schedules.md\n"
       "  -f file       - location read by the fuzzed program (default: stdin "
       "or @@)\n"
-      "  -t msec       - timeout for each run (auto-scaled, 50-%u ms)\n"
+      "  -t msec       - timeout for each run (auto-scaled, default %u ms). "
+      "Add a '+'\n"
+      "                  to auto-calculate the timeout, the value being the "
+      "maximum.\n"
       "  -m megs       - memory limit for child process (%u MB, 0 = no limit "
       "[default])\n"
       "  -Q            - use binary-only instrumentation (QEMU mode)\n"
@@ -122,10 +125,10 @@ static void usage(u8 *argv0, int more_help) {
       "  -c program    - enable CmpLog by specifying a binary compiled for "
       "it.\n"
       "                  if using QEMU, just use -c 0.\n"
-      "  -l cmplog_level - set the complexity/intensivity of CmpLog.\n"
-      "                  Values: 1 (basic), 2 (larger files) and 3 "
-      "(transform)\n\n"
-
+      "  -l cmplog_opts - CmpLog configuration values (e.g. \"2AT\"):\n"
+      "                  1=small files (default), 2=larger files, 3=all "
+      "files,\n"
+      "                  A=arithmetic solving, T=transformational solving.\n\n"
       "Fuzzing behavior settings:\n"
       "  -Z            - sequential queue selection instead of weighted "
       "random\n"
@@ -137,8 +140,8 @@ static void usage(u8 *argv0, int more_help) {
 
       "Testing settings:\n"
       "  -s seed       - use a fixed seed for the RNG\n"
-      "  -V seconds    - fuzz for a specific time then terminate\n"
-      "  -E execs      - fuzz for a approx. no of total executions then "
+      "  -V seconds    - fuzz for a specified time then terminate\n"
+      "  -E execs      - fuzz for an approx. no. of total executions then "
       "terminate\n"
       "                  Note: not precise and can have several more "
       "executions.\n\n"
@@ -198,6 +201,7 @@ static void usage(u8 *argv0, int more_help) {
       "AFL_FORKSRV_INIT_TMOUT: time spent waiting for forkserver during startup (in milliseconds)\n"
       "AFL_HANG_TMOUT: override timeout value (in milliseconds)\n"
       "AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES: don't warn about core dump handlers\n"
+      "AFL_IGNORE_UNKNOWN_ENVS: don't warn on unknown env vars\n"
       "AFL_IMPORT_FIRST: sync and import test cases from other fuzzer instances first\n"
       "AFL_KILL_SIGNAL: Signal ID delivered to child processes on timeout, etc. (default: SIGKILL)\n"
       "AFL_MAP_SIZE: the shared memory size for that target. must be >= the size\n"
@@ -222,6 +226,7 @@ static void usage(u8 *argv0, int more_help) {
       "AFL_PERSISTENT_RECORD: record the last X inputs to every crash in out/crashes\n"
 #endif
       "AFL_PRELOAD: LD_PRELOAD / DYLD_INSERT_LIBRARIES settings for target\n"
+      "AFL_TARGET_ENV: pass extra environment variables to target\n"
       "AFL_SHUFFLE_QUEUE: reorder the input queue randomly on startup\n"
       "AFL_SKIP_BIN_CHECK: skip the check, if the target is an executable\n"
       "AFL_SKIP_CPUFREQ: do not warn about variable cpu clocking\n"
@@ -356,7 +361,7 @@ int main(int argc, char **argv_orig, char **envp) {
   exit_1 = !!afl->afl_env.afl_bench_just_one;
 
   SAYF(cCYA "afl-fuzz" VERSION cRST
-            " based on afl by Michal Zalewski and a big online community\n");
+            " based on afl by Michal Zalewski and a large online community\n");
 
   doc_path = access(DOC_PATH, F_OK) != 0 ? (u8 *)"docs" : (u8 *)DOC_PATH;
 
@@ -561,13 +566,22 @@ int main(int argc, char **argv_orig, char **envp) {
 
       case 'F':                                         /* foreign sync dir */
 
-        if (!afl->is_main_node)
+        if (!optarg) { FATAL("Missing path for -F"); }
+        if (!afl->is_main_node) {
+
           FATAL(
               "Option -F can only be specified after the -M option for the "
               "main fuzzer of a fuzzing campaign");
-        if (afl->foreign_sync_cnt >= FOREIGN_SYNCS_MAX)
+
+        }
+
+        if (afl->foreign_sync_cnt >= FOREIGN_SYNCS_MAX) {
+
           FATAL("Maximum %u entried of -F option can be specified",
                 FOREIGN_SYNCS_MAX);
+
+        }
+
         afl->foreign_syncs[afl->foreign_sync_cnt].dir = optarg;
         while (afl->foreign_syncs[afl->foreign_sync_cnt]
                    .dir[strlen(afl->foreign_syncs[afl->foreign_sync_cnt].dir) -
@@ -811,13 +825,36 @@ int main(int argc, char **argv_orig, char **envp) {
 
       case 'l': {
 
-        afl->cmplog_lvl = atoi(optarg);
-        if (afl->cmplog_lvl < 1 || afl->cmplog_lvl > CMPLOG_LVL_MAX) {
+        if (!optarg) { FATAL("missing parameter for 'l'"); }
+        char *c = optarg;
+        while (*c) {
 
-          FATAL(
-              "Bad complog level value, accepted values are 1 (default), 2 and "
-              "%u.",
-              CMPLOG_LVL_MAX);
+          switch (*c) {
+
+            case '0':
+            case '1':
+              afl->cmplog_lvl = 1;
+              break;
+            case '2':
+              afl->cmplog_lvl = 2;
+              break;
+            case '3':
+              afl->cmplog_lvl = 3;
+              break;
+            case 'a':
+            case 'A':
+              afl->cmplog_enable_arith = 1;
+              break;
+            case 't':
+            case 'T':
+              afl->cmplog_enable_transform = 1;
+              break;
+            default:
+              FATAL("Unknown option value '%c' in -l %s", *c, optarg);
+
+          }
+
+          ++c;
 
         }
 
@@ -1017,32 +1054,6 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
-  if (afl->fsrv.qemu_mode && getenv("AFL_USE_QASAN")) {
-
-    u8 *preload = getenv("AFL_PRELOAD");
-    u8 *libqasan = get_libqasan_path(argv_orig[0]);
-
-    if (!preload) {
-
-      setenv("AFL_PRELOAD", libqasan, 0);
-
-    } else {
-
-      u8 *result = ck_alloc(strlen(libqasan) + strlen(preload) + 2);
-      strcpy(result, libqasan);
-      strcat(result, " ");
-      strcat(result, preload);
-
-      setenv("AFL_PRELOAD", result, 1);
-      ck_free(result);
-
-    }
-
-    afl->afl_env.afl_preload = (u8 *)getenv("AFL_PRELOAD");
-    ck_free(libqasan);
-
-  }
-
   if (afl->fsrv.mem_limit && afl->shm.cmplog_mode) afl->fsrv.mem_limit += 260;
 
   OKF("afl++ is maintained by Marc \"van Hauser\" Heuse, Heiko \"hexcoder\" "
@@ -1307,38 +1318,7 @@ int main(int argc, char **argv_orig, char **envp) {
 
     if (afl->fsrv.qemu_mode) {
 
-      u8 *qemu_preload = getenv("QEMU_SET_ENV");
-      u8 *afl_preload = getenv("AFL_PRELOAD");
-      u8 *buf;
-
-      s32 j, afl_preload_size = strlen(afl_preload);
-      for (j = 0; j < afl_preload_size; ++j) {
-
-        if (afl_preload[j] == ',') {
-
-          PFATAL(
-              "Comma (',') is not allowed in AFL_PRELOAD when -Q is "
-              "specified!");
-
-        }
-
-      }
-
-      if (qemu_preload) {
-
-        buf = alloc_printf("%s,LD_PRELOAD=%s,DYLD_INSERT_LIBRARIES=%s",
-                           qemu_preload, afl_preload, afl_preload);
-
-      } else {
-
-        buf = alloc_printf("LD_PRELOAD=%s,DYLD_INSERT_LIBRARIES=%s",
-                           afl_preload, afl_preload);
-
-      }
-
-      setenv("QEMU_SET_ENV", buf, 1);
-
-      ck_free(buf);
+      /* afl-qemu-trace takes care of converting AFL_PRELOAD. */
 
     } else {
 
@@ -1355,6 +1335,13 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
+  if (afl->afl_env.afl_target_env &&
+      !extract_and_set_env(afl->afl_env.afl_target_env)) {
+
+    FATAL("Bad value of AFL_TARGET_ENV");
+
+  }
+
   save_cmdline(afl, argc, argv);
 
   fix_up_banner(afl, argv[optind]);
@@ -1398,6 +1385,15 @@ int main(int argc, char **argv_orig, char **envp) {
   set_scheduler_mode(SCHEDULER_MODE_LOW_LATENCY);
   #endif
 
+  #ifdef __APPLE__
+  if (pthread_set_qos_class_self_np(QOS_CLASS_USER_INTERACTIVE, 0) != 0) {
+
+    WARNF("general thread priority settings failed");
+
+  }
+
+  #endif
+
   init_count_class16();
 
   if (afl->is_main_node && check_main_node_exists(afl) == 1) {
@@ -1432,24 +1428,9 @@ int main(int argc, char **argv_orig, char **envp) {
   // read_foreign_testcases(afl, 1); for the moment dont do this
   OKF("Loaded a total of %u seeds.", afl->queued_paths);
 
-  load_auto(afl);
-
   pivot_inputs(afl);
 
-  if (extras_dir_cnt) {
-
-    for (i = 0; i < extras_dir_cnt; i++) {
-
-      load_extras(afl, extras_dir[i]);
-
-    }
-
-    dedup_extras(afl);
-    OKF("Loaded a total of %u extras.", afl->extras_cnt);
-
-  }
-
-  if (!afl->timeout_given) { find_timeout(afl); }
+  if (!afl->timeout_given) { find_timeout(afl); }  // only for resumes!
 
   if ((afl->tmp_dir = afl->afl_env.afl_tmpdir) != NULL &&
       !afl->in_place_resume) {
@@ -1577,6 +1558,21 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
+  if (afl->non_instrumented_mode || afl->fsrv.qemu_mode || afl->unicorn_mode) {
+
+    map_size = afl->fsrv.map_size = MAP_SIZE;
+    afl->virgin_bits = ck_realloc(afl->virgin_bits, map_size);
+    afl->virgin_tmout = ck_realloc(afl->virgin_tmout, map_size);
+    afl->virgin_crash = ck_realloc(afl->virgin_crash, map_size);
+    afl->var_bytes = ck_realloc(afl->var_bytes, map_size);
+    afl->top_rated = ck_realloc(afl->top_rated, map_size * sizeof(void *));
+    afl->clean_trace = ck_realloc(afl->clean_trace, map_size);
+    afl->clean_trace_custom = ck_realloc(afl->clean_trace_custom, map_size);
+    afl->first_trace = ck_realloc(afl->first_trace, map_size);
+    afl->map_tmp_buf = ck_realloc(afl->map_tmp_buf, map_size);
+
+  }
+
   afl->argv = use_argv;
   afl->fsrv.trace_bits =
       afl_shm_init(&afl->shm, afl->fsrv.map_size, afl->non_instrumented_mode);
@@ -1584,49 +1580,49 @@ int main(int argc, char **argv_orig, char **envp) {
   if (!afl->non_instrumented_mode && !afl->fsrv.qemu_mode &&
       !afl->unicorn_mode) {
 
-    afl->fsrv.map_size = 4194304;  // dummy temporary value
-    setenv("AFL_MAP_SIZE", "4194304", 1);
+    if (map_size <= DEFAULT_SHMEM_SIZE && !afl->non_instrumented_mode &&
+        !afl->fsrv.qemu_mode && !afl->unicorn_mode) {
+
+      afl->fsrv.map_size = DEFAULT_SHMEM_SIZE;  // dummy temporary value
+      char vbuf[16];
+      snprintf(vbuf, sizeof(vbuf), "%u", DEFAULT_SHMEM_SIZE);
+      setenv("AFL_MAP_SIZE", vbuf, 1);
+
+    }
 
     u32 new_map_size = afl_fsrv_get_mapsize(
         &afl->fsrv, afl->argv, &afl->stop_soon, afl->afl_env.afl_debug_child);
 
-    if (new_map_size && new_map_size != 4194304) {
-
-      // only reinitialize when it makes sense
-      if (map_size < new_map_size ||
-          (new_map_size > map_size && new_map_size - map_size > MAP_SIZE)) {
-
-        OKF("Re-initializing maps to %u bytes", new_map_size);
-
-        afl->virgin_bits = ck_realloc(afl->virgin_bits, new_map_size);
-        afl->virgin_tmout = ck_realloc(afl->virgin_tmout, new_map_size);
-        afl->virgin_crash = ck_realloc(afl->virgin_crash, new_map_size);
-        afl->var_bytes = ck_realloc(afl->var_bytes, new_map_size);
-        afl->top_rated =
-            ck_realloc(afl->top_rated, new_map_size * sizeof(void *));
-        afl->clean_trace = ck_realloc(afl->clean_trace, new_map_size);
-        afl->clean_trace_custom =
-            ck_realloc(afl->clean_trace_custom, new_map_size);
-        afl->first_trace = ck_realloc(afl->first_trace, new_map_size);
-        afl->map_tmp_buf = ck_realloc(afl->map_tmp_buf, new_map_size);
-
-        afl_fsrv_kill(&afl->fsrv);
-        afl_shm_deinit(&afl->shm);
-        afl->fsrv.map_size = new_map_size;
-        afl->fsrv.trace_bits =
-            afl_shm_init(&afl->shm, new_map_size, afl->non_instrumented_mode);
-        setenv("AFL_NO_AUTODICT", "1", 1);  // loaded already
-        afl_fsrv_start(&afl->fsrv, afl->argv, &afl->stop_soon,
-                       afl->afl_env.afl_debug_child);
-
-      }
+    // only reinitialize if the map needs to be larger than what we have.
+    if (map_size < new_map_size) {
+
+      OKF("Re-initializing maps to %u bytes", new_map_size);
+
+      afl->virgin_bits = ck_realloc(afl->virgin_bits, new_map_size);
+      afl->virgin_tmout = ck_realloc(afl->virgin_tmout, new_map_size);
+      afl->virgin_crash = ck_realloc(afl->virgin_crash, new_map_size);
+      afl->var_bytes = ck_realloc(afl->var_bytes, new_map_size);
+      afl->top_rated =
+          ck_realloc(afl->top_rated, new_map_size * sizeof(void *));
+      afl->clean_trace = ck_realloc(afl->clean_trace, new_map_size);
+      afl->clean_trace_custom =
+          ck_realloc(afl->clean_trace_custom, new_map_size);
+      afl->first_trace = ck_realloc(afl->first_trace, new_map_size);
+      afl->map_tmp_buf = ck_realloc(afl->map_tmp_buf, new_map_size);
+
+      afl_fsrv_kill(&afl->fsrv);
+      afl_shm_deinit(&afl->shm);
+      afl->fsrv.map_size = new_map_size;
+      afl->fsrv.trace_bits =
+          afl_shm_init(&afl->shm, new_map_size, afl->non_instrumented_mode);
+      setenv("AFL_NO_AUTODICT", "1", 1);  // loaded already
+      afl_fsrv_start(&afl->fsrv, afl->argv, &afl->stop_soon,
+                     afl->afl_env.afl_debug_child);
 
       map_size = new_map_size;
 
     }
 
-    afl->fsrv.map_size = map_size;
-
   }
 
   if (afl->cmplog_binary) {
@@ -1639,57 +1635,77 @@ int main(int argc, char **argv_orig, char **envp) {
     afl->cmplog_fsrv.cmplog_binary = afl->cmplog_binary;
     afl->cmplog_fsrv.init_child_func = cmplog_exec_child;
 
-    afl->cmplog_fsrv.map_size = 4194304;
+    if ((map_size <= DEFAULT_SHMEM_SIZE ||
+         afl->cmplog_fsrv.map_size < map_size) &&
+        !afl->non_instrumented_mode && !afl->fsrv.qemu_mode &&
+        !afl->unicorn_mode) {
+
+      afl->cmplog_fsrv.map_size = MAX(map_size, (u32)DEFAULT_SHMEM_SIZE);
+      char vbuf[16];
+      snprintf(vbuf, sizeof(vbuf), "%u", afl->cmplog_fsrv.map_size);
+      setenv("AFL_MAP_SIZE", vbuf, 1);
+
+    }
 
     u32 new_map_size =
         afl_fsrv_get_mapsize(&afl->cmplog_fsrv, afl->argv, &afl->stop_soon,
                              afl->afl_env.afl_debug_child);
 
-    if (new_map_size && new_map_size != 4194304) {
+    // only reinitialize when it needs to be larger
+    if (map_size < new_map_size) {
 
-      // only reinitialize when it needs to be larger
-      if (map_size < new_map_size) {
+      OKF("Re-initializing maps to %u bytes due cmplog", new_map_size);
 
-        OKF("Re-initializing maps to %u bytes due cmplog", new_map_size);
+      afl->virgin_bits = ck_realloc(afl->virgin_bits, new_map_size);
+      afl->virgin_tmout = ck_realloc(afl->virgin_tmout, new_map_size);
+      afl->virgin_crash = ck_realloc(afl->virgin_crash, new_map_size);
+      afl->var_bytes = ck_realloc(afl->var_bytes, new_map_size);
+      afl->top_rated =
+          ck_realloc(afl->top_rated, new_map_size * sizeof(void *));
+      afl->clean_trace = ck_realloc(afl->clean_trace, new_map_size);
+      afl->clean_trace_custom =
+          ck_realloc(afl->clean_trace_custom, new_map_size);
+      afl->first_trace = ck_realloc(afl->first_trace, new_map_size);
+      afl->map_tmp_buf = ck_realloc(afl->map_tmp_buf, new_map_size);
 
-        afl->virgin_bits = ck_realloc(afl->virgin_bits, new_map_size);
-        afl->virgin_tmout = ck_realloc(afl->virgin_tmout, new_map_size);
-        afl->virgin_crash = ck_realloc(afl->virgin_crash, new_map_size);
-        afl->var_bytes = ck_realloc(afl->var_bytes, new_map_size);
-        afl->top_rated =
-            ck_realloc(afl->top_rated, new_map_size * sizeof(void *));
-        afl->clean_trace = ck_realloc(afl->clean_trace, new_map_size);
-        afl->clean_trace_custom =
-            ck_realloc(afl->clean_trace_custom, new_map_size);
-        afl->first_trace = ck_realloc(afl->first_trace, new_map_size);
-        afl->map_tmp_buf = ck_realloc(afl->map_tmp_buf, new_map_size);
+      afl_fsrv_kill(&afl->fsrv);
+      afl_fsrv_kill(&afl->cmplog_fsrv);
+      afl_shm_deinit(&afl->shm);
 
-        afl_fsrv_kill(&afl->fsrv);
-        afl_fsrv_kill(&afl->cmplog_fsrv);
-        afl_shm_deinit(&afl->shm);
-        afl->cmplog_fsrv.map_size = new_map_size;  // non-cmplog stays the same
+      afl->cmplog_fsrv.map_size = new_map_size;  // non-cmplog stays the same
+      map_size = new_map_size;
 
-        afl->fsrv.trace_bits =
-            afl_shm_init(&afl->shm, new_map_size, afl->non_instrumented_mode);
-        setenv("AFL_NO_AUTODICT", "1", 1);  // loaded already
-        afl_fsrv_start(&afl->fsrv, afl->argv, &afl->stop_soon,
-                       afl->afl_env.afl_debug_child);
+      setenv("AFL_NO_AUTODICT", "1", 1);  // loaded already
+      afl->fsrv.trace_bits =
+          afl_shm_init(&afl->shm, new_map_size, afl->non_instrumented_mode);
+      afl->cmplog_fsrv.trace_bits = afl->fsrv.trace_bits;
+      afl_fsrv_start(&afl->fsrv, afl->argv, &afl->stop_soon,
+                     afl->afl_env.afl_debug_child);
+      afl_fsrv_start(&afl->cmplog_fsrv, afl->argv, &afl->stop_soon,
+                     afl->afl_env.afl_debug_child);
 
-        afl_fsrv_start(&afl->cmplog_fsrv, afl->argv, &afl->stop_soon,
-                       afl->afl_env.afl_debug_child);
+    }
 
-        map_size = new_map_size;
+    OKF("Cmplog forkserver successfully started");
 
-      }
+  }
 
-    }
+  load_auto(afl);
 
-    afl->cmplog_fsrv.map_size = map_size;
+  if (extras_dir_cnt) {
 
-    OKF("Cmplog forkserver successfully started");
+    for (i = 0; i < extras_dir_cnt; i++) {
+
+      load_extras(afl, extras_dir[i]);
+
+    }
 
   }
 
+  deunicode_extras(afl);
+  dedup_extras(afl);
+  if (afl->extras_cnt) { OKF("Loaded a total of %u extras.", afl->extras_cnt); }
+
   // after we have the correct bitmap size we can read the bitmap -B option
   // and set the virgin maps
   if (afl->in_bitmap) {
@@ -1717,18 +1733,49 @@ int main(int argc, char **argv_orig, char **envp) {
 
   cull_queue(afl);
 
-  if (!afl->pending_not_fuzzed) {
+  // ensure we have at least one seed that is not disabled.
+  u32 entry, valid_seeds = 0;
+  for (entry = 0; entry < afl->queued_paths; ++entry)
+    if (!afl->queue_buf[entry]->disabled) { ++valid_seeds; }
+
+  if (!afl->pending_not_fuzzed || !valid_seeds) {
 
     FATAL("We need at least one valid input seed that does not crash!");
 
   }
 
+  if (afl->timeout_given == 2) {  // -t ...+ option
+
+    if (valid_seeds == 1) {
+
+      WARNF(
+          "Only one valid seed is present, auto-calculating the timeout is "
+          "disabled!");
+      afl->timeout_given = 1;
+
+    } else {
+
+      u64 max_ms = 0;
+
+      for (entry = 0; entry < afl->queued_paths; ++entry)
+        if (!afl->queue_buf[entry]->disabled)
+          if (afl->queue_buf[entry]->exec_us > max_ms)
+            max_ms = afl->queue_buf[entry]->exec_us;
+
+      afl->fsrv.exec_tmout = max_ms;
+
+    }
+
+  }
+
   show_init_stats(afl);
 
   if (unlikely(afl->old_seed_selection)) seek_to = find_start_position(afl);
 
-  write_stats_file(afl, 0, 0, 0);
-  maybe_update_plot_file(afl, 0, 0);
+  afl->start_time = get_cur_time();
+  if (afl->in_place_resume || afl->afl_env.afl_autoresume) load_stats_file(afl);
+  write_stats_file(afl, 0, 0, 0, 0);
+  maybe_update_plot_file(afl, 0, 0, 0);
   save_auto(afl);
 
   if (afl->stop_soon) { goto stop_fuzzing; }
@@ -1778,12 +1825,15 @@ int main(int argc, char **argv_orig, char **envp) {
       if (unlikely(afl->old_seed_selection)) {
 
         afl->current_entry = 0;
-        while (unlikely(afl->queue_buf[afl->current_entry]->disabled)) {
+        while (unlikely(afl->current_entry < afl->queued_paths &&
+                        afl->queue_buf[afl->current_entry]->disabled)) {
 
           ++afl->current_entry;
 
         }
 
+        if (afl->current_entry >= afl->queued_paths) { afl->current_entry = 0; }
+
         afl->queue_cur = afl->queue_buf[afl->current_entry];
 
         if (unlikely(seek_to)) {
@@ -1986,15 +2036,24 @@ int main(int argc, char **argv_orig, char **envp) {
 
         if (unlikely(afl->is_main_node)) {
 
-          if (!(sync_interval_cnt++ % (SYNC_INTERVAL / 3))) {
+          if (unlikely(get_cur_time() >
+                       (SYNC_TIME >> 1) + afl->last_sync_time)) {
+
+            if (!(sync_interval_cnt++ % (SYNC_INTERVAL / 3))) {
 
-            sync_fuzzers(afl);
+              sync_fuzzers(afl);
+
+            }
 
           }
 
         } else {
 
-          if (!(sync_interval_cnt++ % SYNC_INTERVAL)) { sync_fuzzers(afl); }
+          if (unlikely(get_cur_time() > SYNC_TIME + afl->last_sync_time)) {
+
+            if (!(sync_interval_cnt++ % SYNC_INTERVAL)) { sync_fuzzers(afl); }
+
+          }
 
         }
 
@@ -2009,12 +2068,12 @@ int main(int argc, char **argv_orig, char **envp) {
   }
 
   write_bitmap(afl);
-  maybe_update_plot_file(afl, 0, 0);
+  maybe_update_plot_file(afl, 0, 0, 0);
   save_auto(afl);
 
 stop_fuzzing:
 
-  write_stats_file(afl, 0, 0, 0);
+  write_stats_file(afl, 0, 0, 0, 0);
   afl->force_ui_update = 1;  // ensure the screen is reprinted
   show_stats(afl);           // print the screen one last time
 
diff --git a/src/afl-sharedmem.c b/src/afl-sharedmem.c
index 3241a130..fbb8e65d 100644
--- a/src/afl-sharedmem.c
+++ b/src/afl-sharedmem.c
@@ -162,8 +162,8 @@ u8 *afl_shm_init(sharedmem_t *shm, size_t map_size,
   snprintf(shm->g_shm_file_path, L_tmpnam, "/afl_%d_%ld", getpid(), random());
 
   /* create the shared memory segment as if it was a file */
-  shm->g_shm_fd =
-      shm_open(shm->g_shm_file_path, O_CREAT | O_RDWR | O_EXCL, 0600);
+  shm->g_shm_fd = shm_open(shm->g_shm_file_path, O_CREAT | O_RDWR | O_EXCL,
+                           DEFAULT_PERMISSION);
   if (shm->g_shm_fd == -1) { PFATAL("shm_open() failed"); }
 
   /* configure the size of the shared memory segment */
@@ -202,7 +202,8 @@ u8 *afl_shm_init(sharedmem_t *shm, size_t map_size,
 
     /* create the shared memory segment as if it was a file */
     shm->cmplog_g_shm_fd =
-        shm_open(shm->cmplog_g_shm_file_path, O_CREAT | O_RDWR | O_EXCL, 0600);
+        shm_open(shm->cmplog_g_shm_file_path, O_CREAT | O_RDWR | O_EXCL,
+                 DEFAULT_PERMISSION);
     if (shm->cmplog_g_shm_fd == -1) { PFATAL("shm_open() failed"); }
 
     /* configure the size of the shared memory segment */
@@ -241,13 +242,14 @@ u8 *afl_shm_init(sharedmem_t *shm, size_t map_size,
 #else
   u8 *shm_str;
 
-  shm->shm_id = shmget(IPC_PRIVATE, map_size, IPC_CREAT | IPC_EXCL | 0600);
+  shm->shm_id =
+      shmget(IPC_PRIVATE, map_size, IPC_CREAT | IPC_EXCL | DEFAULT_PERMISSION);
   if (shm->shm_id < 0) { PFATAL("shmget() failed"); }
 
   if (shm->cmplog_mode) {
 
     shm->cmplog_shm_id = shmget(IPC_PRIVATE, sizeof(struct cmp_map),
-                                IPC_CREAT | IPC_EXCL | 0600);
+                                IPC_CREAT | IPC_EXCL | DEFAULT_PERMISSION);
 
     if (shm->cmplog_shm_id < 0) {
 
diff --git a/src/afl-showmap.c b/src/afl-showmap.c
index c424cdf3..558665a2 100644
--- a/src/afl-showmap.c
+++ b/src/afl-showmap.c
@@ -72,8 +72,7 @@ static u8 *in_data,                    /* Input data                        */
 static u64 total;                      /* tuple content information         */
 static u32 tcnt, highest;              /* tuple content information         */
 
-static u32 in_len,                     /* Input data length                 */
-    arg_offset;                        /* Total number of execs             */
+static u32 in_len;                     /* Input data length                 */
 
 static u32 map_size = MAP_SIZE;
 
@@ -252,7 +251,7 @@ static u32 write_results_to_file(afl_forkserver_t *fsrv, u8 *outfile) {
   } else {
 
     unlink(outfile);                                       /* Ignore errors */
-    fd = open(outfile, O_WRONLY | O_CREAT | O_EXCL, 0600);
+    fd = open(outfile, O_WRONLY | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
     if (fd < 0) { PFATAL("Unable to create '%s'", outfile); }
 
   }
@@ -563,6 +562,7 @@ static void set_up_environment(afl_forkserver_t *fsrv) {
          "detect_leaks=0:"
          "allocator_may_return_null=1:"
          "symbolize=0:"
+         "detect_odr_violation=0:"
          "handle_segv=0:"
          "handle_sigbus=0:"
          "handle_abort=0:"
@@ -598,38 +598,7 @@ static void set_up_environment(afl_forkserver_t *fsrv) {
 
     if (fsrv->qemu_mode) {
 
-      u8 *qemu_preload = getenv("QEMU_SET_ENV");
-      u8 *afl_preload = getenv("AFL_PRELOAD");
-      u8 *buf;
-
-      s32 i, afl_preload_size = strlen(afl_preload);
-      for (i = 0; i < afl_preload_size; ++i) {
-
-        if (afl_preload[i] == ',') {
-
-          PFATAL(
-              "Comma (',') is not allowed in AFL_PRELOAD when -Q is "
-              "specified!");
-
-        }
-
-      }
-
-      if (qemu_preload) {
-
-        buf = alloc_printf("%s,LD_PRELOAD=%s,DYLD_INSERT_LIBRARIES=%s",
-                           qemu_preload, afl_preload, afl_preload);
-
-      } else {
-
-        buf = alloc_printf("LD_PRELOAD=%s,DYLD_INSERT_LIBRARIES=%s",
-                           afl_preload, afl_preload);
-
-      }
-
-      setenv("QEMU_SET_ENV", buf, 1);
-
-      ck_free(buf);
+      /* afl-qemu-trace takes care of converting AFL_PRELOAD. */
 
     } else {
 
@@ -945,31 +914,6 @@ int main(int argc, char **argv_orig, char **envp) {
 
   if (optind == argc || !out_file) { usage(argv[0]); }
 
-  if (fsrv->qemu_mode && getenv("AFL_USE_QASAN")) {
-
-    u8 *preload = getenv("AFL_PRELOAD");
-    u8 *libqasan = get_libqasan_path(argv_orig[0]);
-
-    if (!preload) {
-
-      setenv("AFL_PRELOAD", libqasan, 0);
-
-    } else {
-
-      u8 *result = ck_alloc(strlen(libqasan) + strlen(preload) + 2);
-      strcpy(result, libqasan);
-      strcat(result, " ");
-      strcat(result, preload);
-
-      setenv("AFL_PRELOAD", result, 1);
-      ck_free(result);
-
-    }
-
-    ck_free(libqasan);
-
-  }
-
   if (in_dir) {
 
     if (!out_file && !collect_coverage)
@@ -1011,13 +955,30 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
+
   if (in_dir) {
 
-    if (at_file) { PFATAL("Options -A and -i are mutually exclusive"); }
-    detect_file_args(argv + optind, "", &fsrv->use_stdin);
+    /* If we don't have a file name chosen yet, use a safe default. */
+    u8 *use_dir = ".";
+
+    if (access(use_dir, R_OK | W_OK | X_OK)) {
+
+      use_dir = get_afl_env("TMPDIR");
+      if (!use_dir) { use_dir = "/tmp"; }
+
+    }
+
+    stdin_file = at_file ? strdup(at_file)
+                         : (char *)alloc_printf("%s/.afl-showmap-temp-%u",
+                                                use_dir, (u32)getpid());
+    unlink(stdin_file);
+
+    // If @@ are in the target args, replace them and also set use_stdin=false.
+    detect_file_args(argv + optind, stdin_file, &fsrv->use_stdin);
 
   } else {
 
+    // If @@ are in the target args, replace them and also set use_stdin=false.
     detect_file_args(argv + optind, at_file, &fsrv->use_stdin);
 
   }
@@ -1042,14 +1003,6 @@ int main(int argc, char **argv_orig, char **envp) {
 
   }
 
-  i = 0;
-  while (use_argv[i] != NULL && !arg_offset) {
-
-    if (strcmp(use_argv[i], "@@") == 0) { arg_offset = i; }
-    i++;
-
-  }
-
   shm_fuzz = ck_alloc(sizeof(sharedmem_t));
 
   /* initialize cmplog_mode */
@@ -1160,29 +1113,12 @@ int main(int argc, char **argv_orig, char **envp) {
 
     }
 
-    u8 *use_dir = ".";
-
-    if (access(use_dir, R_OK | W_OK | X_OK)) {
-
-      use_dir = get_afl_env("TMPDIR");
-      if (!use_dir) { use_dir = "/tmp"; }
-
-    }
-
-    stdin_file =
-        alloc_printf("%s/.afl-showmap-temp-%u", use_dir, (u32)getpid());
-    unlink(stdin_file);
     atexit(at_exit_handler);
     fsrv->out_file = stdin_file;
-    fsrv->out_fd = open(stdin_file, O_RDWR | O_CREAT | O_EXCL, 0600);
+    fsrv->out_fd =
+        open(stdin_file, O_RDWR | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
     if (fsrv->out_fd < 0) { PFATAL("Unable to create '%s'", out_file); }
 
-    if (arg_offset && use_argv[arg_offset] != stdin_file) {
-
-      use_argv[arg_offset] = strdup(stdin_file);
-
-    }
-
     if (get_afl_env("AFL_DEBUG")) {
 
       int j = optind;
diff --git a/src/afl-tmin.c b/src/afl-tmin.c
index 15336959..fc974262 100644
--- a/src/afl-tmin.c
+++ b/src/afl-tmin.c
@@ -244,7 +244,7 @@ static s32 write_to_file(u8 *path, u8 *mem, u32 len) {
 
   unlink(path);                                            /* Ignore errors */
 
-  ret = open(path, O_RDWR | O_CREAT | O_EXCL, 0600);
+  ret = open(path, O_RDWR | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
 
   if (ret < 0) { PFATAL("Unable to create '%s'", path); }
 
@@ -666,7 +666,7 @@ static void set_up_environment(afl_forkserver_t *fsrv) {
   unlink(out_file);
 
   fsrv->out_file = out_file;
-  fsrv->out_fd = open(out_file, O_RDWR | O_CREAT | O_EXCL, 0600);
+  fsrv->out_fd = open(out_file, O_RDWR | O_CREAT | O_EXCL, DEFAULT_PERMISSION);
 
   if (fsrv->out_fd < 0) { PFATAL("Unable to create '%s'", out_file); }
 
@@ -717,6 +717,7 @@ static void set_up_environment(afl_forkserver_t *fsrv) {
          "detect_leaks=0:"
          "allocator_may_return_null=1:"
          "symbolize=0:"
+         "detect_odr_violation=0:"
          "handle_segv=0:"
          "handle_sigbus=0:"
          "handle_abort=0:"
@@ -752,38 +753,7 @@ static void set_up_environment(afl_forkserver_t *fsrv) {
 
     if (fsrv->qemu_mode) {
 
-      u8 *qemu_preload = getenv("QEMU_SET_ENV");
-      u8 *afl_preload = getenv("AFL_PRELOAD");
-      u8 *buf;
-
-      s32 i, afl_preload_size = strlen(afl_preload);
-      for (i = 0; i < afl_preload_size; ++i) {
-
-        if (afl_preload[i] == ',') {
-
-          PFATAL(
-              "Comma (',') is not allowed in AFL_PRELOAD when -Q is "
-              "specified!");
-
-        }
-
-      }
-
-      if (qemu_preload) {
-
-        buf = alloc_printf("%s,LD_PRELOAD=%s,DYLD_INSERT_LIBRARIES=%s",
-                           qemu_preload, afl_preload, afl_preload);
-
-      } else {
-
-        buf = alloc_printf("LD_PRELOAD=%s,DYLD_INSERT_LIBRARIES=%s",
-                           afl_preload, afl_preload);
-
-      }
-
-      setenv("QEMU_SET_ENV", buf, 1);
-
-      ck_free(buf);
+      /* afl-qemu-trace takes care of converting AFL_PRELOAD. */
 
     } else {
 
@@ -1078,31 +1048,6 @@ int main(int argc, char **argv_orig, char **envp) {
   check_environment_vars(envp);
   setenv("AFL_NO_AUTODICT", "1", 1);
 
-  if (fsrv->qemu_mode && getenv("AFL_USE_QASAN")) {
-
-    u8 *preload = getenv("AFL_PRELOAD");
-    u8 *libqasan = get_libqasan_path(argv_orig[0]);
-
-    if (!preload) {
-
-      setenv("AFL_PRELOAD", libqasan, 0);
-
-    } else {
-
-      u8 *result = ck_alloc(strlen(libqasan) + strlen(preload) + 2);
-      strcpy(result, libqasan);
-      strcat(result, " ");
-      strcat(result, preload);
-
-      setenv("AFL_PRELOAD", result, 1);
-      ck_free(result);
-
-    }
-
-    ck_free(libqasan);
-
-  }
-
   /* initialize cmplog_mode */
   shm.cmplog_mode = 0;
 
diff --git a/test-instr.c b/test-instr.c
index 00799103..13d4eb93 100644
--- a/test-instr.c
+++ b/test-instr.c
@@ -18,6 +18,10 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 
+#ifdef TEST_SHARED_OBJECT
+  #define main main_exported
+#endif
+
 int main(int argc, char **argv) {
 
   int   fd = 0;
diff --git a/test/test-basic.sh b/test/test-basic.sh
index 132610c0..b4bb9df2 100755
--- a/test/test-basic.sh
+++ b/test/test-basic.sh
@@ -7,7 +7,7 @@ AFL_GCC=afl-gcc
 $ECHO "$BLUE[*] Testing: ${AFL_GCC}, afl-showmap, afl-fuzz, afl-cmin and afl-tmin"
 test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc" -o "$SYS" = "i386" && {
  test -e ../${AFL_GCC} -a -e ../afl-showmap -a -e ../afl-fuzz && {
-  ../${AFL_GCC} -o test-instr.plain ../test-instr.c > /dev/null 2>&1
+  ../${AFL_GCC} -o test-instr.plain -O0 ../test-instr.c > /dev/null 2>&1
   AFL_HARDEN=1 ../${AFL_GCC} -o test-compcov.harden test-compcov.c > /dev/null 2>&1
   test -e test-instr.plain && {
     $ECHO "$GREEN[+] ${AFL_GCC} compilation succeeded"
@@ -39,7 +39,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
     $ECHO "$RED[!] ${AFL_GCC} failed"
     echo CUT------------------------------------------------------------------CUT
     uname -a
-    ../${AFL_GCC} -o test-instr.plain ../test-instr.c
+    ../${AFL_GCC} -o test-instr.plain -O0 ../test-instr.c
     echo CUT------------------------------------------------------------------CUT
     CODE=1
   }
@@ -128,7 +128,7 @@ test "$SYS" = "i686" -o "$SYS" = "x86_64" -o "$SYS" = "amd64" -o "$SYS" = "i86pc
  $ECHO "$BLUE[*] Testing: ${AFL_GCC}, afl-showmap, afl-fuzz, afl-cmin and afl-tmin"
  SKIP=
  test -e ../${AFL_GCC} -a -e ../afl-showmap -a -e ../afl-fuzz && {
-  ../${AFL_GCC} -o test-instr.plain ../test-instr.c > /dev/null 2>&1
+  ../${AFL_GCC} -o test-instr.plain -O0 ../test-instr.c > /dev/null 2>&1
   AFL_HARDEN=1 ../${AFL_GCC} -o test-compcov.harden test-compcov.c > /dev/null 2>&1
   test -e test-instr.plain && {
     $ECHO "$GREEN[+] ${AFL_GCC} compilation succeeded"
diff --git a/test/test-custom-mutators.sh b/test/test-custom-mutators.sh
index 24c95ac7..bae4220f 100755
--- a/test/test-custom-mutators.sh
+++ b/test/test-custom-mutators.sh
@@ -37,9 +37,9 @@ test "1" = "`../afl-fuzz | grep -i 'without python' >/dev/null; echo $?`" && {
       echo "00000" > in/in
 
       # Run afl-fuzz w/ the C mutator
-      $ECHO "$GREY[*] running afl-fuzz for the C mutator, this will take approx 5 seconds"
+      $ECHO "$GREY[*] running afl-fuzz for the C mutator, this will take approx 10 seconds"
       {
-        AFL_CUSTOM_MUTATOR_LIBRARY=./libexamplemutator.so AFL_CUSTOM_MUTATOR_ONLY=1 ../afl-fuzz -V1 -m ${MEM_LIMIT} -i in -o out -- ./test-custom-mutator >>errors 2>&1
+        AFL_CUSTOM_MUTATOR_LIBRARY=./libexamplemutator.so AFL_CUSTOM_MUTATOR_ONLY=1 ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-custom-mutator >>errors 2>&1
       } >>errors 2>&1
 
       # Check results
@@ -57,9 +57,9 @@ test "1" = "`../afl-fuzz | grep -i 'without python' >/dev/null; echo $?`" && {
       rm -rf out errors core.*
 
       # Run afl-fuzz w/ multiple C mutators
-      $ECHO "$GREY[*] running afl-fuzz with multiple custom C mutators, this will take approx 5 seconds"
+      $ECHO "$GREY[*] running afl-fuzz with multiple custom C mutators, this will take approx 10 seconds"
       {
-        AFL_CUSTOM_MUTATOR_LIBRARY="./libexamplemutator.so;./libexamplemutator2.so" AFL_CUSTOM_MUTATOR_ONLY=1 ../afl-fuzz -V1 -m ${MEM_LIMIT} -i in -o out -- ./test-multiple-mutators >>errors 2>&1
+        AFL_CUSTOM_MUTATOR_LIBRARY="./libexamplemutator.so;./libexamplemutator2.so" AFL_CUSTOM_MUTATOR_ONLY=1 ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-multiple-mutators >>errors 2>&1
       } >>errors 2>&1
 
       test -n "$( ls out/default/crashes/id:000000* 2>/dev/null )" && {  # TODO: update here
@@ -76,11 +76,11 @@ test "1" = "`../afl-fuzz | grep -i 'without python' >/dev/null; echo $?`" && {
       rm -rf out errors core.*
 
       # Run afl-fuzz w/ the Python mutator
-      $ECHO "$GREY[*] running afl-fuzz for the Python mutator, this will take approx 5 seconds"
+      $ECHO "$GREY[*] running afl-fuzz for the Python mutator, this will take approx 10 seconds"
       {
         export PYTHONPATH=${CUSTOM_MUTATOR_PATH}
         export AFL_PYTHON_MODULE=example
-        AFL_CUSTOM_MUTATOR_ONLY=1 ../afl-fuzz -V5 -m ${MEM_LIMIT} -i in -o out -- ./test-custom-mutator >>errors 2>&1
+        AFL_CUSTOM_MUTATOR_ONLY=1 ../afl-fuzz -V10 -m ${MEM_LIMIT} -i in -o out -- ./test-custom-mutator >>errors 2>&1
         unset PYTHONPATH
         unset AFL_PYTHON_MODULE
       } >>errors 2>&1
diff --git a/test/test-dlopen.c b/test/test-dlopen.c
new file mode 100644
index 00000000..d08d9092
--- /dev/null
+++ b/test/test-dlopen.c
@@ -0,0 +1,23 @@
+#include <stdio.h>
+#include <errno.h>
+#include <dlfcn.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv) {
+
+  if (!getenv("TEST_DLOPEN_TARGET")) return 1;
+  void *lib = dlopen(getenv("TEST_DLOPEN_TARGET"), RTLD_LAZY);
+  if (!lib) {
+
+    perror(dlerror());
+    return 2;
+
+  }
+
+  int (*func)(int, char **) = dlsym(lib, "main_exported");
+  if (!func) return 3;
+
+  return func(argc, argv);
+
+}
+
diff --git a/test/test-libextensions.sh b/test/test-libextensions.sh
index 905a4cbc..40a898c8 100755
--- a/test/test-libextensions.sh
+++ b/test/test-libextensions.sh
@@ -38,14 +38,4 @@ test -e ../libdislocator.so && {
 }
 rm -f test-compcov
 
-test -z "$AFL_CC" && {
-  if type gcc >/dev/null; then
-    export AFL_CC=gcc
-  else
-    if type clang >/dev/null; then
-      export AFL_CC=clang
-    fi
-  fi
-}
-
 . ./test-post.sh
diff --git a/test/test-llvm.sh b/test/test-llvm.sh
index 156b8920..3ef36b37 100755
--- a/test/test-llvm.sh
+++ b/test/test-llvm.sh
@@ -43,6 +43,48 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
     $ECHO "$RED[!] llvm_mode failed"
     CODE=1
   }
+  ../afl-clang-fast -DTEST_SHARED_OBJECT=1 -z defs -fPIC -shared -o test-instr.so ../test-instr.c > /dev/null 2>&1
+  test -e test-instr.so && {
+    $ECHO "$GREEN[+] llvm_mode shared object with -z defs compilation succeeded"
+    ../afl-clang-fast -o test-dlopen.plain test-dlopen.c -ldl > /dev/null 2>&1
+    test -e test-dlopen.plain && {
+      $ECHO "$GREEN[+] llvm_mode test-dlopen compilation succeeded"
+      echo 0 | TEST_DLOPEN_TARGET=./test-instr.so AFL_QUIET=1 ./test-dlopen.plain > /dev/null 2>&1
+      if [ $? -ne 0 ]; then
+        $ECHO "$RED[!] llvm_mode test-dlopen exits with an error"
+        CODE=1
+      fi
+      echo 0 | TEST_DLOPEN_TARGET=./test-instr.so AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o test-dlopen.plain.0 -r -- ./test-dlopen.plain > /dev/null 2>&1
+      TEST_DLOPEN_TARGET=./test-instr.so AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o test-dlopen.plain.1 -r -- ./test-dlopen.plain < /dev/null > /dev/null 2>&1
+      test -e test-dlopen.plain.0 -a -e test-dlopen.plain.1 && {
+        diff test-dlopen.plain.0 test-dlopen.plain.1 > /dev/null 2>&1 && {
+          $ECHO "$RED[!] llvm_mode test-dlopen instrumentation should be different on different input but is not"
+          CODE=1
+        } || {
+          $ECHO "$GREEN[+] llvm_mode test-dlopen instrumentation present and working correctly"
+          TUPLES=`echo 0|TEST_DLOPEN_TARGET=./test-instr.so AFL_QUIET=1 ../afl-showmap -m ${MEM_LIMIT} -o /dev/null -- ./test-dlopen.plain 2>&1 | grep Captur | awk '{print$3}'`
+          test "$TUPLES" -gt 3 -a "$TUPLES" -lt 12 && {
+            $ECHO "$GREEN[+] llvm_mode test-dlopen run reported $TUPLES instrumented locations which is fine"
+          } || {
+            $ECHO "$RED[!] llvm_mode test-dlopen instrumentation produces weird numbers: $TUPLES"
+            CODE=1
+          }
+          test "$TUPLES" -lt 3 && SKIP=1
+          true
+        }
+      } || {
+        $ECHO "$RED[!] llvm_mode test-dlopen instrumentation failed"
+        CODE=1
+      }
+    } || {
+      $ECHO "$RED[!] llvm_mode test-dlopen compilation failed"
+      CODE=1
+    }
+    rm -f test-dlopen.plain test-dlopen.plain.0 test-dlopen.plain.1 test-instr.so
+  } || {
+    $ECHO "$RED[!] llvm_mode shared object with -z defs compilation failed"
+    CODE=1
+  }
   test -e test-compcov.harden && test_compcov_binary_functionality ./test-compcov.harden && {
     grep -Eq$GREPAOPTION 'stack_chk_fail|fstack-protector-all|fortified' test-compcov.harden > /dev/null 2>&1 && {
       $ECHO "$GREEN[+] llvm_mode hardened mode succeeded and is working"
@@ -162,9 +204,9 @@ test -e ../afl-clang-fast -a -e ../split-switches-pass.so && {
   test -e test-floatingpoint && {
     mkdir -p in
     echo ZZZZ > in/in
-    $ECHO "$GREY[*] running afl-fuzz with floating point splitting, this will take max. 30 seconds"
+    $ECHO "$GREY[*] running afl-fuzz with floating point splitting, this will take max. 45 seconds"
     {
-      AFL_BENCH_UNTIL_CRASH=1 AFL_NO_UI=1 ../afl-fuzz -s 1 -V30 -m ${MEM_LIMIT} -i in -o out -D -- ./test-floatingpoint >>errors 2>&1
+      AFL_BENCH_UNTIL_CRASH=1 AFL_NO_UI=1 ../afl-fuzz -Z -s 123 -V50 -m ${MEM_LIMIT} -i in -o out -D -- ./test-floatingpoint >>errors 2>&1
     } >>errors 2>&1
     test -n "$( ls out/default/crashes/id:* 2>/dev/null )" && {
       $ECHO "$GREEN[+] llvm_mode laf-intel floatingpoint splitting feature works correctly"
diff --git a/test/test-qemu-mode.sh b/test/test-qemu-mode.sh
index 73b39a43..85578d55 100755
--- a/test/test-qemu-mode.sh
+++ b/test/test-qemu-mode.sh
@@ -3,6 +3,16 @@
 . ./test-pre.sh
 
 $ECHO "$BLUE[*] Testing: qemu_mode"
+test -z "$AFL_CC" && {
+  if type gcc >/dev/null; then
+    export AFL_CC=gcc
+  else
+    if type clang >/dev/null; then
+      export AFL_CC=clang
+    fi
+  fi
+}
+
 test -e ../afl-qemu-trace && {
   cc -pie -fPIE -o test-instr ../test-instr.c
   cc -o test-compcov test-compcov.c
@@ -29,14 +39,7 @@ test -e ../afl-qemu-trace && {
       $ECHO "$GREY[*] running afl-fuzz for qemu_mode AFL_ENTRYPOINT, this will take approx 6 seconds"
       {
         {
-          if file test-instr | grep -q "32-bit"; then
-            # for 32-bit reduce 8 nibbles to the lower 7 nibbles
-	    ADDR_LOWER_PART=`nm test-instr | grep "T main" | awk '{print $1}' | sed 's/^.//'`
-          else
-            # for 64-bit reduce 16 nibbles to the lower 9 nibbles
-	    ADDR_LOWER_PART=`nm test-instr | grep "T main" | awk '{print $1}' | sed 's/^.......//'`
-          fi
-          export AFL_ENTRYPOINT=`expr 0x4${ADDR_LOWER_PART}`
+          export AFL_ENTRYPOINT=`printf 1 | AFL_DEBUG=1 ../afl-qemu-trace ./test-instr 2>&1 >/dev/null | awk '/forkserver/{print $4; exit}'`
           $ECHO AFL_ENTRYPOINT=$AFL_ENTRYPOINT - $(nm test-instr | grep "T main") - $(file ./test-instr)
           ../afl-fuzz -m ${MEM_LIMIT} -V2 -Q -i in -o out -- ./test-instr
           unset AFL_ENTRYPOINT
diff --git a/test/test-unicorn-mode.sh b/test/test-unicorn-mode.sh
index b4c6eb3e..e197e226 100755
--- a/test/test-unicorn-mode.sh
+++ b/test/test-unicorn-mode.sh
@@ -14,7 +14,7 @@ test -d ../unicorn_mode/unicornafl -a -e ../unicorn_mode/unicornafl/samples/shel
       EASY_INSTALL_FOUND=0
       for PYTHON in $PYTHONS ; do
 
-        if $PYTHON -c "help('easy_install');" </dev/null | grep -q module ; then
+        if $PYTHON -c "import setuptools" ; then
 
             EASY_INSTALL_FOUND=1
             PY=$PYTHON
diff --git a/test/travis/bionic/Dockerfile b/test/travis/bionic/Dockerfile
deleted file mode 100644
index 00ab96f9..00000000
--- a/test/travis/bionic/Dockerfile
+++ /dev/null
@@ -1,45 +0,0 @@
-# This is the Dockerfile for testing problems in Travis build
-# configuration #1.
-# This needs not to be rebuild everytime, most of the time it needs just to
-# be build once and then started when debugging issues and execute:
-#   cd /AFLplusplus/
-#   git pull
-#   make distrib
-#
-FROM ubuntu:bionic
-LABEL "about"="travis image 1"
-RUN apt-get update && apt-get -y install \
-    automake \
-    bison \
-    build-essential \
-    clang \
-    flex \
-    git \
-    python3.7 python3.7-dev \
-    python3-setuptools \
-    libtool libtool-bin \
-    libglib2.0-dev \
-    python-setuptools \
-    wget \
-    ca-certificates \
-    libpixman-1-dev \
-    gcc-7 gcc-7-plugin-dev libc++-7-dev \
-    findutils \
-    libcmocka-dev \
-    joe nano vim locate \
-    && rm -rf /var/lib/apt/lists/*
-
-ENV AFL_NO_UI=1
-ENV AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1
-ENV LLVM_CONFIG=llvm-config-6.0
-
-RUN cd / && \
-    git clone https://github.com/AFLplusplus/AFLplusplus && \
-    cd AFLplusplus && \
-    git checkout dev && \
-    cd qemu_mode && wget http://download.qemu-project.org/qemu-3.1.1.tar.xz && \
-    cd ../unicorn_mode && git submodule init && git submodule update || true && \
-    cd /AFLplusplus && ASAN_BUILD=1 make source-only || true
-
-WORKDIR /AFLplusplus
-CMD ["/bin/bash"]
diff --git a/test/travis/focal/Dockerfile b/test/travis/focal/Dockerfile
deleted file mode 100644
index 27d994f2..00000000
--- a/test/travis/focal/Dockerfile
+++ /dev/null
@@ -1,45 +0,0 @@
-# This is the Dockerfile for testing problems in Travis build
-# configuration #1.
-# This needs not to be rebuild everytime, most of the time it needs just to
-# be build once and then started when debugging issues and execute:
-#   cd /AFLplusplus/
-#   git pull
-#   make distrib
-#
-FROM ubuntu:focal
-LABEL "about"="travis image 4"
-ARG DEBIAN_FRONTEND=noninteractive
-RUN apt-get update && apt-get -y install \
-    automake \
-    bison \
-    build-essential \
-    clang \
-    flex \
-    git \
-    python3 python3-dev \
-    python3-setuptools \
-    libtool libtool-bin \
-    libglib2.0-dev \
-    python-setuptools \
-    wget \
-    ca-certificates \
-    libpixman-1-dev \
-    gcc-9 gcc-9-plugin-dev libc++-9-dev \
-    findutils \
-    libcmocka-dev \
-    joe nano vim locate \
-    && rm -rf /var/lib/apt/lists/*
-
-ENV AFL_NO_UI=1
-ENV AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1
-
-RUN cd / && \
-    git clone https://github.com/AFLplusplus/AFLplusplus && \
-    cd AFLplusplus && \
-    git checkout dev && \
-    cd qemu_mode && wget http://download.qemu-project.org/qemu-3.1.1.tar.xz && \
-    cd ../unicorn_mode && git submodule init && git submodule update || true && \
-    cd /AFLplusplus && ASAN_BUILD=1 make source-only || true
-
-WORKDIR /AFLplusplus
-CMD ["/bin/bash"]
diff --git a/test/travis/trusty/Dockerfile b/test/travis/trusty/Dockerfile
deleted file mode 100644
index 0a6f1804..00000000
--- a/test/travis/trusty/Dockerfile
+++ /dev/null
@@ -1,49 +0,0 @@
-# This is the Dockerfile for testing problems in Travis builds
-# configuration #3.
-# This needs not to be rebuild everytime, most of the time it needs just to
-# be build once and then started when debugging issues and execute:
-#   cd /AFLplusplus/
-#   git pull
-#   make distrib
-#
-FROM ubuntu:trusty
-LABEL "about"="travis image 3"
-RUN apt-get update && apt-get -y install \
-    automake \
-    bison \
-    build-essential \
-    clang \
-    flex \
-    git \
-    python2.7 python2.7-dev \
-    python3-setuptools \
-    libtool \
-    libglib2.0-dev \
-    python-setuptools \
-    wget \
-    ca-certificates \
-    libpixman-1-dev \
-    gcc-4.8 gcc-4.8-plugin-dev \
-    libc++-dev \
-    findutils \
-    libcmocka-dev \
-    joe nano vim locate \
-    && rm -rf /var/lib/apt/lists/*
-
-ENV TERM linux
-ENV DEBIAN_FRONTEND noninteractive
-ENV LLVM_CONFIG=llvm-config-3.4
-ENV AFL_NO_UI=1
-ENV AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1
-
-RUN cd / && \
-    git clone https://github.com/AFLplusplus/AFLplusplus && \
-    cd AFLplusplus && \
-    git checkout dev && \
-    cd qemu_mode && wget http://download.qemu-project.org/qemu-3.1.1.tar.xz && \
-    cd ../unicorn_mode && git submodule init && git submodule update || true && \
-    cd /AFLplusplus && ASAN_BUILD=1 make source-only || true
-
-WORKDIR /AFLplusplus
-CMD ["/bin/bash"]
-
diff --git a/test/travis/xenial/Dockerfile b/test/travis/xenial/Dockerfile
deleted file mode 100644
index 6aa4b1d1..00000000
--- a/test/travis/xenial/Dockerfile
+++ /dev/null
@@ -1,46 +0,0 @@
-# This is the Dockerfile for testing problems in Travis builds
-# configuration #2.
-# This needs not to be rebuild everytime, most of the time it needs just to
-# be build once and then started when debugging issues and execute:
-#   cd /AFLplusplus/
-#   git pull
-#   make distrib
-#
-FROM ubuntu:xenial
-LABEL "about"="travis image 2"
-RUN apt-get update && apt-get -y install \
-    automake \
-    bison \
-    build-essential \
-    clang-6.0 \
-    flex \
-    git \
-    python3 python3-dev \
-    python3-setuptools \
-    libtool libtool-bin \
-    libglib2.0-dev \
-    python-setuptools \
-    wget \
-    ca-certificates \
-    libpixman-1-dev \
-    gcc-5 gcc-5-plugin-dev \
-    libc++-dev \
-    findutils \
-    libcmocka-dev \
-    joe nano vim locate \
-    && rm -rf /var/lib/apt/lists/*
-
-ENV LLVM_CONFIG=llvm-config-6.0
-ENV AFL_NO_UI=1
-ENV AFL_I_DONT_CARE_ABOUT_MISSING_CRASHES=1
-
-RUN cd / && \
-    git clone https://github.com/AFLplusplus/AFLplusplus && \
-    cd AFLplusplus && \
-    git checkout dev && \
-    cd qemu_mode && wget http://download.qemu-project.org/qemu-3.1.1.tar.xz && \
-    cd ../unicorn_mode && git submodule init && git submodule update || true && \
-    cd /AFLplusplus && ASAN_BUILD=1 make source-only || true
-
-WORKDIR /AFLplusplus
-CMD ["/bin/bash"]
diff --git a/unicorn_mode/README.md b/unicorn_mode/README.md
index f6bd4d12..b3df44fa 100644
--- a/unicorn_mode/README.md
+++ b/unicorn_mode/README.md
@@ -8,19 +8,19 @@ The CompareCoverage and NeverZero counters features are by Andrea Fioraldi <andr
 
 ## 1) Introduction
 
-The code in ./unicorn_mode allows you to build a standalone feature that
-leverages the Unicorn Engine and allows callers to obtain instrumentation 
+The code in ./unicorn_mode allows you to build the (Unicorn Engine)[https://github.com/unicorn-engine/unicorn] with afl support.
+This means, you can run anything that can be emulated in unicorn and obtain instrumentation
 output for black-box, closed-source binary code snippets. This mechanism 
 can be then used by afl-fuzz to stress-test targets that couldn't be built 
-with afl-gcc or used in QEMU mode, or with other extensions such as 
-TriforceAFL.
+with afl-cc or used in QEMU mode.
 
 There is a significant performance penalty compared to native AFL,
 but at least we're able to use AFL++ on these binaries, right?
 
 ## 2) How to use
 
-Requirements: you need an installed python environment.
+First, you will need a working harness for your target in unicorn, using Python, C, or Rust.
+For some pointers for more advanced emulation, take a look at [BaseSAFE](https://github.com/fgsect/BaseSAFE) and [Qiling](https://github.com/qilingframework/qiling).
 
 ### Building AFL++'s Unicorn Mode
 
@@ -34,23 +34,23 @@ cd unicorn_mode
 ```
 
 NOTE: This script checks out a Unicorn Engine fork as submodule that has been tested 
-and is stable-ish, based on the unicorn engine master. 
+and is stable-ish, based on the unicorn engine `next` branch. 
 
 Building Unicorn will take a little bit (~5-10 minutes). Once it completes 
 it automatically compiles a sample application and verifies that it works.
 
 ### Fuzzing with Unicorn Mode
 
-To really use unicorn-mode effectively you need to prepare the following:
+To use unicorn-mode effectively you need to prepare the following:
 
 	* Relevant binary code to be fuzzed
 	* Knowledge of the memory map and good starting state
 	* Folder containing sample inputs to start fuzzing with
 		+ Same ideas as any other AFL inputs
-		+ Quality/speed of results will depend greatly on quality of starting 
+		+ Quality/speed of results will depend greatly on the quality of starting 
 		  samples
 		+ See AFL's guidance on how to create a sample corpus
-	* Unicornafl-based test harness which:
+	* Unicornafl-based test harness in Rust, C, or Python, which:
 		+ Adds memory map regions
 		+ Loads binary code into memory		
 		+ Calls uc.afl_fuzz() / uc.afl_start_forkserver
@@ -59,13 +59,13 @@ To really use unicorn-mode effectively you need to prepare the following:
 			  the test harness
 			+ Presumably the data to be fuzzed is at a fixed buffer address
 			+ If input constraints (size, invalid bytes, etc.) are known they 
-			  should be checked after the file is loaded. If a constraint 
-			  fails, just exit the test harness. AFL will treat the input as 
+			  should be checked in the place_input handler. If a constraint 
+			  fails, just return false from the handler. AFL will treat the input as 
 			  'uninteresting' and move on.
 		+ Sets up registers and memory state for beginning of test
-		+ Emulates the interested code from beginning to end
+		+ Emulates the interesting code from beginning to end
 		+ If a crash is detected, the test harness must 'crash' by 
-		  throwing a signal (SIGSEGV, SIGKILL, SIGABORT, etc.)
+		  throwing a signal (SIGSEGV, SIGKILL, SIGABORT, etc.), or indicate a crash in the crash validation callback.
 
 Once you have all those things ready to go you just need to run afl-fuzz in
 'unicorn-mode' by passing in the '-U' flag:
@@ -79,11 +79,12 @@ AFL's main documentation for more info about how to use afl-fuzz effectively.
 
 For a much clearer vision of what all of this looks like, please refer to the
 sample provided in the 'unicorn_mode/samples' directory. There is also a blog
-post that goes over the basics at:
+post that uses slightly older concepts, but describes the general ideas, at:
 
 [https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf](https://medium.com/@njvoss299/afl-unicorn-fuzzing-arbitrary-binary-code-563ca28936bf)
 
-The 'helper_scripts' directory also contains several helper scripts that allow you 
+
+The ['helper_scripts'](./helper_scripts) directory also contains several helper scripts that allow you 
 to dump context from a running process, load it, and hook heap allocations. For details
 on how to use this check out the follow-up blog post to the one linked above.
 
@@ -92,10 +93,10 @@ A example use of AFL-Unicorn mode is discussed in the paper Unicorefuzz:
 
 ## 3) Options
 
-As for the QEMU-based instrumentation, the afl-unicorn twist of afl++
-comes with a sub-instruction based instrumentation similar in purpose to laf-intel.
+As for the QEMU-based instrumentation, unicornafl comes with a sub-instruction based instrumentation similar in purpose to laf-intel.
 
 The options that enable Unicorn CompareCoverage are the same used for QEMU.
+This will split up each multi-byte compare to give feedback for each correct byte.
 AFL_COMPCOV_LEVEL=1 is to instrument comparisons with only immediate values.
 
 AFL_COMPCOV_LEVEL=2 instruments all comparison instructions.
@@ -119,6 +120,20 @@ unicornafl.monkeypatch()
 
 This will replace all unicorn imports with unicornafl inputs.
 
-Refer to the [samples/arm_example/arm_tester.c](samples/arm_example/arm_tester.c) for an example
-of how to do this properly! If you don't get this right, AFL will not 
-load any mutated inputs and your fuzzing will be useless!
+5) Examples
+
+Apart from reading the documentation in `afl.c` and the python bindings of unicornafl, the best documentation are the [samples/](./samples).
+The following examples exist at the time of writing:
+
+- c: A simple example how to use the c bindings
+- compcov_x64: A python example that uses compcov to traverse hard-to-reach blocks
+- persistent: A c example using persistent mode for maximum speed, and resetting the target state between each iteration
+- simple: A simple python example
+- speedtest/c: The c harness for an example target, used to compare c, python, and rust bindings and fix speed issues
+- speedtest/python: Fuzzing the same target in python
+- speedtest/rust: Fuzzing the same target using a rust harness
+
+Usually, the place to look at is the `harness` in each folder. The source code in each harness is pretty well documented.
+Most harnesses also have the `afl-fuzz` commandline, or even offer a `make fuzz` Makefile target.
+Targets in these folders, if x86, can usually be made using `make target` in each folder or get shipped pre-built (plus their source).
+Especially take a look at the [speedtest documentation](./samples/speedtest/README.md) to see how the languages compare.
\ No newline at end of file
diff --git a/unicorn_mode/UNICORNAFL_VERSION b/unicorn_mode/UNICORNAFL_VERSION
index 4d8a03b2..d9ae5590 100644
--- a/unicorn_mode/UNICORNAFL_VERSION
+++ b/unicorn_mode/UNICORNAFL_VERSION
@@ -1 +1 @@
-80d31ef3
+fb2fc9f2
diff --git a/unicorn_mode/build_unicorn_support.sh b/unicorn_mode/build_unicorn_support.sh
index f1d028f8..6c376f8d 100755
--- a/unicorn_mode/build_unicorn_support.sh
+++ b/unicorn_mode/build_unicorn_support.sh
@@ -117,19 +117,19 @@ done
 
 # some python version should be available now
 PYTHONS="`command -v python3` `command -v python` `command -v python2`"
-EASY_INSTALL_FOUND=0
+SETUPTOOLS_FOUND=0
 for PYTHON in $PYTHONS ; do
 
   if $PYTHON -c "import setuptools" ; then
 
-    EASY_INSTALL_FOUND=1
+    SETUPTOOLS_FOUND=1
     PYTHONBIN=$PYTHON
     break
 
   fi
 
 done
-if [ "0" = $EASY_INSTALL_FOUND ]; then
+if [ "0" = $SETUPTOOLS_FOUND ]; then
 
   echo "[-] Error: Python setup-tools not found. Run 'sudo apt-get install python-setuptools', or install python3-setuptools, or run '$PYTHONBIN -m ensurepip', or create a virtualenv, or ..."
   PREREQ_NOTFOUND=1
diff --git a/unicorn_mode/helper_scripts/ida_context_loader.py b/unicorn_mode/helper_scripts/ida_context_loader.py
new file mode 100644
index 00000000..31d47a90
--- /dev/null
+++ b/unicorn_mode/helper_scripts/ida_context_loader.py
@@ -0,0 +1,197 @@
+# Copyright (c) 2021 Brandon Miller (zznop)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+"""IDA script for loading state that was dumped from a running process using unicorn AFL's GDB
+plugin (unicorn_dumper_gdb.py). The dumper script can be found in the AFL++ repository at:
+https://github.com/AFLplusplus/AFLplusplus/blob/stable/unicorn_mode/helper_scripts/unicorn_dumper_gdb.py
+"""
+
+import json
+from pathlib import Path, PurePath
+import zlib
+import idaapi
+import ida_bytes
+import ida_kernwin
+import ida_nalt
+import ida_segment
+
+
+class ContextLoaderError(Exception):
+    """Base "catch all" exception for this script
+    """
+
+
+class ArchNotSupportedError(ContextLoaderError):
+    """Exception raised if the input file CPU architecture isn't supported fully
+    """
+
+
+def parse_mapping_index(filepath: str):
+    """Open and unmarshal the _index.json file
+
+    :param filepath: Path to the JSON file
+    :return: Dict representing index file contents
+    """
+
+    if filepath is None:
+        raise ContextLoaderError('_index.json file was not selected')
+
+    try:
+        with open(filepath, 'rb') as _file:
+            return json.load(_file)
+    except Exception as ex:
+        raise ContextLoaderError('Failed to parse json file {}'.format(filepath)) from ex
+
+def get_input_name():
+    """Get the name of the input file
+
+    :retrun: Name of the input file
+    """
+
+    input_filepath = ida_nalt.get_input_file_path()
+    return Path(input_filepath).name
+
+def write_segment_bytes(start: int, filepath: str):
+    """"Read data from context file and write it to the IDA segment
+
+    :param start: Start address
+    :param filepath: Path to context file
+    """
+
+    with open(filepath, 'rb') as _file:
+        data = _file.read()
+
+    decompressed_data = zlib.decompress(data)
+    ida_bytes.put_bytes(start, decompressed_data)
+
+def create_segment(context_dir: str, segment: dict, is_be: bool):
+    """Create segment in IDA and map in the data from the file
+
+    :param context_dir: Parent directory of the context files
+    :param segment: Segment information from _index.json
+    :param is_be: True if processor is big endian, otherwise False
+    """
+
+    input_name = get_input_name()
+    if Path(segment['name']).name != input_name:
+        ida_seg = idaapi.segment_t()
+        ida_seg.start_ea = segment['start']
+        ida_seg.end_ea = segment['end']
+        ida_seg.bitness = 1 if is_be else 0
+        if segment['permissions']['r']:
+            ida_seg.perm |= ida_segment.SEGPERM_READ
+        if segment['permissions']['w']:
+            ida_seg.perm |= ida_segment.SEGPERM_WRITE
+        if segment['permissions']['x']:
+            ida_seg.perm |= ida_segment.SEGPERM_EXEC
+            idaapi.add_segm_ex(ida_seg, Path(segment['name']).name, 'CODE', idaapi.ADDSEG_OR_DIE)
+        else:
+            idaapi.add_segm_ex(ida_seg, Path(segment['name']).name, 'DATA', idaapi.ADDSEG_OR_DIE)
+
+    if segment['content_file']:
+        write_segment_bytes(segment['start'], PurePath(context_dir, segment['content_file']))
+
+def create_segments(index: dict, context_dir: str):
+    """Iterate segments in index JSON, create the segment in IDA, and map in the data from the file
+
+    :param index: _index.json JSON data
+    :param context_dir: Parent directory of the context files
+    """
+
+    info = idaapi.get_inf_structure()
+    is_be = info.is_be()
+    for segment in index['segments']:
+        create_segment(context_dir, segment, is_be)
+
+def rebase_program(index: dict):
+    """Rebase the program to the offset specified in the context _index.json
+
+    :param index: _index.json JSON data
+    """
+
+    input_name = get_input_name()
+    new_base = None
+    for segment in index['segments']:
+        if not segment['name']:
+            continue
+
+        segment_name = Path(segment['name']).name
+        if input_name == segment_name:
+            new_base = segment['start']
+            break
+
+    if not new_base:
+        raise ContextLoaderError('Input file is not in _index.json')
+
+    current_base = idaapi.get_imagebase()
+    ida_segment.rebase_program(new_base-current_base, 8)
+
+def get_pc_by_arch(index: dict) -> int:
+    """Queries the input file CPU architecture and attempts to lookup the address of the program
+    counter in the _index.json by register name
+
+    :param index: _index.json JSON data
+    :return: Program counter value or None
+    """
+
+    progctr = None
+    info = idaapi.get_inf_structure()
+    if info.procname == 'metapc':
+        if info.is_64bit():
+            progctr = index['regs']['rax']
+        elif info.is_32bit():
+            progctr = index['regs']['eax']
+    return progctr
+
+def write_reg_info(index: dict):
+    """Write register info as line comment at instruction pointed to by the program counter and
+    change focus to that location
+
+    :param index: _index.json JSON data
+    """
+
+    cmt = ''
+    for reg, val in index['regs'].items():
+        cmt += f"{reg.ljust(6)} : {hex(val)}\n"
+
+    progctr = get_pc_by_arch(index)
+    if progctr is None:
+        raise ArchNotSupportedError(
+            'Architecture not fully supported, skipping register status comment')
+    ida_bytes.set_cmt(progctr, cmt, 0)
+    ida_kernwin.jumpto(progctr)
+
+def main(filepath):
+    """Main - parse _index.json input and map context files into the database
+
+    :param filepath: Path to the _index.json file
+    """
+
+    try:
+        index = parse_mapping_index(filepath)
+        context_dir = Path(filepath).parent
+        rebase_program(index)
+        create_segments(index, context_dir)
+        write_reg_info(index)
+    except ContextLoaderError as ex:
+        print(ex)
+
+if __name__ == '__main__':
+    main(ida_kernwin.ask_file(1, '*.json', 'Import file name'))
diff --git a/unicorn_mode/helper_scripts/unicorn_dumper_gdb.py b/unicorn_mode/helper_scripts/unicorn_dumper_gdb.py
index 8c8f9641..1ac4c9f3 100644
--- a/unicorn_mode/helper_scripts/unicorn_dumper_gdb.py
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_gdb.py
@@ -45,30 +45,31 @@ MAX_SEG_SIZE = 128 * 1024 * 1024
 INDEX_FILE_NAME = "_index.json"
 
 
-#----------------------
-#---- Helper Functions
+# ----------------------
+# ---- Helper Functions
+
 
 def map_arch():
-    arch = get_arch() # from GEF
-    if 'x86_64' in arch or 'x86-64' in arch:
+    arch = get_arch()  # from GEF
+    if "x86_64" in arch or "x86-64" in arch:
         return "x64"
-    elif 'x86' in arch or 'i386' in arch:
+    elif "x86" in arch or "i386" in arch:
         return "x86"
-    elif 'aarch64' in arch or 'arm64' in arch:
+    elif "aarch64" in arch or "arm64" in arch:
         return "arm64le"
-    elif 'aarch64_be' in arch:
+    elif "aarch64_be" in arch:
         return "arm64be"
-    elif 'armeb' in arch:
+    elif "armeb" in arch:
         # check for THUMB mode
-        cpsr = get_register('$cpsr')
-        if (cpsr & (1 << 5)):
+        cpsr = get_register("$cpsr")
+        if cpsr & (1 << 5):
             return "armbethumb"
         else:
             return "armbe"
-    elif 'arm' in arch:
+    elif "arm" in arch:
         # check for THUMB mode
-        cpsr = get_register('$cpsr')
-        if (cpsr & (1 << 5)):
+        cpsr = get_register("$cpsr")
+        if cpsr & (1 << 5):
             return "armlethumb"
         else:
             return "armle"
@@ -76,8 +77,9 @@ def map_arch():
         return ""
 
 
-#-----------------------
-#---- Dumping functions
+# -----------------------
+# ---- Dumping functions
+
 
 def dump_arch_info():
     arch_info = {}
@@ -89,7 +91,7 @@ def dump_regs():
     reg_state = {}
     for reg in current_arch.all_registers:
         reg_val = get_register(reg)
-        reg_state[reg.strip().strip('$')] = reg_val
+        reg_state[reg.strip().strip("$")] = reg_val
 
     return reg_state
 
@@ -108,47 +110,76 @@ def dump_process_memory(output_dir):
         if entry.page_start == entry.page_end:
             continue
 
-        seg_info = {'start': entry.page_start, 'end': entry.page_end, 'name': entry.path, 'permissions': {
-            "r": entry.is_readable() > 0,
-            "w": entry.is_writable() > 0,
-            "x": entry.is_executable() > 0
-        }, 'content_file': ''}
+        seg_info = {
+            "start": entry.page_start,
+            "end": entry.page_end,
+            "name": entry.path,
+            "permissions": {
+                "r": entry.is_readable() > 0,
+                "w": entry.is_writable() > 0,
+                "x": entry.is_executable() > 0,
+            },
+            "content_file": "",
+        }
 
         # "(deleted)" may or may not be valid, but don't push it.
-        if entry.is_readable() and not '(deleted)' in entry.path:
+        if entry.is_readable() and not "(deleted)" in entry.path:
             try:
                 # Compress and dump the content to a file
                 seg_content = read_memory(entry.page_start, entry.size)
-                if(seg_content == None):
-                    print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(entry.page_start, entry.path))
+                if seg_content == None:
+                    print(
+                        "Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(
+                            entry.page_start, entry.path
+                        )
+                    )
                 else:
-                    print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(entry.page_start, len(seg_content), entry.path, repr(seg_info['permissions'])))
+                    print(
+                        "Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(
+                            entry.page_start,
+                            len(seg_content),
+                            entry.path,
+                            repr(seg_info["permissions"]),
+                        )
+                    )
                     compressed_seg_content = zlib.compress(seg_content)
                     md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
                     seg_info["content_file"] = md5_sum
 
                     # Write the compressed contents to disk
-                    out_file = open(os.path.join(output_dir, md5_sum), 'wb')
+                    out_file = open(os.path.join(output_dir, md5_sum), "wb")
                     out_file.write(compressed_seg_content)
                     out_file.close()
 
             except:
-                print("Exception reading segment ({}): {}".format(entry.path, sys.exc_info()[0]))
+                print(
+                    "Exception reading segment ({}): {}".format(
+                        entry.path, sys.exc_info()[0]
+                    )
+                )
         else:
-            print("Skipping segment {0}@0x{1:016x}".format(entry.path, entry.page_start))
+            print(
+                "Skipping segment {0}@0x{1:016x}".format(entry.path, entry.page_start)
+            )
 
         # Add the segment to the list
         final_segment_list.append(seg_info)
 
-
     return final_segment_list
 
-#---------------------------------------------
-#---- ARM Extention (dump floating point regs)
+
+# ---------------------------------------------
+# ---- ARM Extention (dump floating point regs)
+
 
 def dump_float(rge=32):
     reg_convert = ""
-    if map_arch() == "armbe" or map_arch() == "armle" or map_arch() == "armbethumb" or map_arch() == "armbethumb":
+    if (
+        map_arch() == "armbe"
+        or map_arch() == "armle"
+        or map_arch() == "armbethumb"
+        or map_arch() == "armbethumb"
+    ):
         reg_state = {}
         for reg_num in range(32):
             value = gdb.selected_frame().read_register("d" + str(reg_num))
@@ -158,8 +189,10 @@ def dump_float(rge=32):
 
         return reg_state
 
-#----------
-#---- Main
+
+# ----------
+# ---- Main
+
 
 def main():
     print("----- Unicorn Context Dumper -----")
@@ -175,7 +208,9 @@ def main():
     try:
 
         # Create the output directory
-        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
+        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime(
+            "%Y%m%d_%H%M%S"
+        )
         output_path = "UnicornContext_" + timestamp
         if not os.path.exists(output_path):
             os.makedirs(output_path)
@@ -190,7 +225,7 @@ def main():
         }
 
         # Write the index file
-        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
+        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), "w")
         index_file.write(json.dumps(context, indent=4))
         index_file.close()
         print("Done.")
@@ -198,5 +233,6 @@ def main():
     except Exception as e:
         print("!!! ERROR:\n\t{}".format(repr(e)))
 
+
 if __name__ == "__main__":
     main()
diff --git a/unicorn_mode/helper_scripts/unicorn_dumper_ida.py b/unicorn_mode/helper_scripts/unicorn_dumper_ida.py
index 3f955a5c..fa29fb90 100644
--- a/unicorn_mode/helper_scripts/unicorn_dumper_ida.py
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_ida.py
@@ -31,8 +31,9 @@ MAX_SEG_SIZE = 128 * 1024 * 1024
 # Name of the index file
 INDEX_FILE_NAME = "_index.json"
 
-#----------------------
-#---- Helper Functions
+# ----------------------
+# ---- Helper Functions
+
 
 def get_arch():
     if ph.id == PLFM_386 and ph.flag & PR_USE64:
@@ -52,6 +53,7 @@ def get_arch():
     else:
         return ""
 
+
 def get_register_list(arch):
     if arch == "arm64le" or arch == "arm64be":
         arch = "arm64"
@@ -59,84 +61,174 @@ def get_register_list(arch):
         arch = "arm"
 
     registers = {
-        "x64" : [
-            "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp",
-            "r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
-            "rip", "rsp", "efl",
-            "cs", "ds", "es", "fs", "gs", "ss",
+        "x64": [
+            "rax",
+            "rbx",
+            "rcx",
+            "rdx",
+            "rsi",
+            "rdi",
+            "rbp",
+            "rsp",
+            "r8",
+            "r9",
+            "r10",
+            "r11",
+            "r12",
+            "r13",
+            "r14",
+            "r15",
+            "rip",
+            "rsp",
+            "efl",
+            "cs",
+            "ds",
+            "es",
+            "fs",
+            "gs",
+            "ss",
+        ],
+        "x86": [
+            "eax",
+            "ebx",
+            "ecx",
+            "edx",
+            "esi",
+            "edi",
+            "ebp",
+            "esp",
+            "eip",
+            "esp",
+            "efl",
+            "cs",
+            "ds",
+            "es",
+            "fs",
+            "gs",
+            "ss",
         ],
-        "x86" : [
-            "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp",
-            "eip", "esp", "efl", 
-            "cs", "ds", "es", "fs", "gs", "ss",
-        ],        
-        "arm" : [
-            "R0", "R1", "R2",  "R3",  "R4",  "R5", "R6", "R7",  
-            "R8", "R9", "R10", "R11", "R12", "PC", "SP", "LR",  
+        "arm": [
+            "R0",
+            "R1",
+            "R2",
+            "R3",
+            "R4",
+            "R5",
+            "R6",
+            "R7",
+            "R8",
+            "R9",
+            "R10",
+            "R11",
+            "R12",
+            "PC",
+            "SP",
+            "LR",
             "PSR",
         ],
-        "arm64" : [
-            "X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7",  
-            "X8", "X9", "X10", "X11", "X12", "X13", "X14", 
-            "X15", "X16", "X17", "X18", "X19", "X20", "X21", 
-            "X22", "X23", "X24", "X25", "X26", "X27", "X28", 
-            "PC", "SP", "FP", "LR", "CPSR"
+        "arm64": [
+            "X0",
+            "X1",
+            "X2",
+            "X3",
+            "X4",
+            "X5",
+            "X6",
+            "X7",
+            "X8",
+            "X9",
+            "X10",
+            "X11",
+            "X12",
+            "X13",
+            "X14",
+            "X15",
+            "X16",
+            "X17",
+            "X18",
+            "X19",
+            "X20",
+            "X21",
+            "X22",
+            "X23",
+            "X24",
+            "X25",
+            "X26",
+            "X27",
+            "X28",
+            "PC",
+            "SP",
+            "FP",
+            "LR",
+            "CPSR"
             #    "NZCV",
-        ]
+        ],
     }
-    return registers[arch]  
+    return registers[arch]
+
+
+# -----------------------
+# ---- Dumping functions
 
-#-----------------------
-#---- Dumping functions
 
 def dump_arch_info():
     arch_info = {}
     arch_info["arch"] = get_arch()
     return arch_info
 
+
 def dump_regs():
     reg_state = {}
     for reg in get_register_list(get_arch()):
         reg_state[reg] = GetRegValue(reg)
     return reg_state
 
+
 def dump_process_memory(output_dir):
     # Segment information dictionary
     segment_list = []
-    
+
     # Loop over the segments, fill in the info dictionary
     for seg_ea in Segments():
         seg_start = SegStart(seg_ea)
         seg_end = SegEnd(seg_ea)
         seg_size = seg_end - seg_start
-		
+
         seg_info = {}
-        seg_info["name"]  = SegName(seg_ea)
+        seg_info["name"] = SegName(seg_ea)
         seg_info["start"] = seg_start
-        seg_info["end"]   = seg_end
-        
+        seg_info["end"] = seg_end
+
         perms = getseg(seg_ea).perm
         seg_info["permissions"] = {
-            "r": False if (perms & SEGPERM_READ)  == 0 else True,
+            "r": False if (perms & SEGPERM_READ) == 0 else True,
             "w": False if (perms & SEGPERM_WRITE) == 0 else True,
-            "x": False if (perms & SEGPERM_EXEC)  == 0 else True,
+            "x": False if (perms & SEGPERM_EXEC) == 0 else True,
         }
 
         if (perms & SEGPERM_READ) and seg_size <= MAX_SEG_SIZE and isLoaded(seg_start):
             try:
                 # Compress and dump the content to a file
                 seg_content = get_many_bytes(seg_start, seg_end - seg_start)
-                if(seg_content == None):
-                    print("Segment empty: {0}@0x{1:016x} (size:UNKNOWN)".format(SegName(seg_ea), seg_ea))
+                if seg_content == None:
+                    print(
+                        "Segment empty: {0}@0x{1:016x} (size:UNKNOWN)".format(
+                            SegName(seg_ea), seg_ea
+                        )
+                    )
                     seg_info["content_file"] = ""
                 else:
-                    print("Dumping segment {0}@0x{1:016x} (size:{2})".format(SegName(seg_ea), seg_ea, len(seg_content)))
+                    print(
+                        "Dumping segment {0}@0x{1:016x} (size:{2})".format(
+                            SegName(seg_ea), seg_ea, len(seg_content)
+                        )
+                    )
                     compressed_seg_content = zlib.compress(seg_content)
                     md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
                     seg_info["content_file"] = md5_sum
-                    
+
                     # Write the compressed contents to disk
-                    out_file = open(os.path.join(output_dir, md5_sum), 'wb')
+                    out_file = open(os.path.join(output_dir, md5_sum), "wb")
                     out_file.write(compressed_seg_content)
                     out_file.close()
             except:
@@ -145,12 +237,13 @@ def dump_process_memory(output_dir):
         else:
             print("Skipping segment {0}@0x{1:016x}".format(SegName(seg_ea), seg_ea))
             seg_info["content_file"] = ""
-            
+
         # Add the segment to the list
-        segment_list.append(seg_info)     
-   
+        segment_list.append(seg_info)
+
     return segment_list
 
+
 """
     TODO: FINISH IMPORT DUMPING
 def import_callback(ea, name, ord):
@@ -169,41 +262,47 @@ def dump_imports():
     
     return import_dict
 """
- 
-#----------
-#---- Main    
-    
+
+# ----------
+# ---- Main
+
+
 def main():
 
     try:
         print("----- Unicorn Context Dumper -----")
         print("You must be actively debugging before running this!")
-        print("If it fails, double check that you are actively debugging before running.")
+        print(
+            "If it fails, double check that you are actively debugging before running."
+        )
 
         # Create the output directory
-        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
+        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime(
+            "%Y%m%d_%H%M%S"
+        )
         output_path = os.path.dirname(os.path.abspath(GetIdbPath()))
         output_path = os.path.join(output_path, "UnicornContext_" + timestamp)
         if not os.path.exists(output_path):
             os.makedirs(output_path)
         print("Process context will be output to {}".format(output_path))
-            
+
         # Get the context
         context = {
             "arch": dump_arch_info(),
-            "regs": dump_regs(), 
+            "regs": dump_regs(),
             "segments": dump_process_memory(output_path),
-            #"imports": dump_imports(),
+            # "imports": dump_imports(),
         }
 
         # Write the index file
-        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
+        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), "w")
         index_file.write(json.dumps(context, indent=4))
-        index_file.close()    
+        index_file.close()
         print("Done.")
-        
+
     except Exception, e:
         print("!!! ERROR:\n\t{}".format(str(e)))
-        
+
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/unicorn_mode/helper_scripts/unicorn_dumper_lldb.py b/unicorn_mode/helper_scripts/unicorn_dumper_lldb.py
index 3c019d77..179d062a 100644
--- a/unicorn_mode/helper_scripts/unicorn_dumper_lldb.py
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_lldb.py
@@ -50,10 +50,11 @@ UNICORN_PAGE_SIZE = 0x1000
 
 # Alignment functions to align all memory segments to Unicorn page boundaries (4KB pages only)
 ALIGN_PAGE_DOWN = lambda x: x & ~(UNICORN_PAGE_SIZE - 1)
-ALIGN_PAGE_UP   = lambda x: (x + UNICORN_PAGE_SIZE - 1) & ~(UNICORN_PAGE_SIZE-1)
+ALIGN_PAGE_UP = lambda x: (x + UNICORN_PAGE_SIZE - 1) & ~(UNICORN_PAGE_SIZE - 1)
+
+# ----------------------
+# ---- Helper Functions
 
-#----------------------
-#---- Helper Functions
 
 def overlap_alignments(segments, memory):
     final_list = []
@@ -61,33 +62,40 @@ def overlap_alignments(segments, memory):
     curr_end_addr = 0
     curr_node = None
     current_segment = None
-    sorted_segments = sorted(segments, key=lambda k: (k['start'], k['end']))
+    sorted_segments = sorted(segments, key=lambda k: (k["start"], k["end"]))
     if curr_seg_idx < len(sorted_segments):
         current_segment = sorted_segments[curr_seg_idx]
-    for mem in sorted(memory, key=lambda k: (k['start'], -k['end'])):
+    for mem in sorted(memory, key=lambda k: (k["start"], -k["end"])):
         if curr_node is None:
-            if current_segment is not None and current_segment['start'] == mem['start']:
+            if current_segment is not None and current_segment["start"] == mem["start"]:
                 curr_node = deepcopy(current_segment)
-                curr_node['permissions'] = mem['permissions']
+                curr_node["permissions"] = mem["permissions"]
             else:
                 curr_node = deepcopy(mem)
 
-            curr_end_addr = curr_node['end']
-
-        while curr_end_addr <= mem['end']:
-            if curr_node['end'] == mem['end']:
-                if current_segment is not None and current_segment['start'] > curr_node['start'] and current_segment['start'] < curr_node['end']:
-                    curr_node['end'] = current_segment['start']
-                    if(curr_node['end'] > curr_node['start']):
+            curr_end_addr = curr_node["end"]
+
+        while curr_end_addr <= mem["end"]:
+            if curr_node["end"] == mem["end"]:
+                if (
+                    current_segment is not None
+                    and current_segment["start"] > curr_node["start"]
+                    and current_segment["start"] < curr_node["end"]
+                ):
+                    curr_node["end"] = current_segment["start"]
+                    if curr_node["end"] > curr_node["start"]:
                         final_list.append(curr_node)
                     curr_node = deepcopy(current_segment)
-                    curr_node['permissions'] = mem['permissions']
-                    curr_end_addr = curr_node['end']
+                    curr_node["permissions"] = mem["permissions"]
+                    curr_end_addr = curr_node["end"]
                 else:
-                    if(curr_node['end'] > curr_node['start']):
+                    if curr_node["end"] > curr_node["start"]:
                         final_list.append(curr_node)
                     # if curr_node is a segment
-                    if current_segment is not None and current_segment['end'] == mem['end']:
+                    if (
+                        current_segment is not None
+                        and current_segment["end"] == mem["end"]
+                    ):
                         curr_seg_idx += 1
                         if curr_seg_idx < len(sorted_segments):
                             current_segment = sorted_segments[curr_seg_idx]
@@ -98,50 +106,56 @@ def overlap_alignments(segments, memory):
                     break
             # could only be a segment
             else:
-                if curr_node['end'] < mem['end']:
+                if curr_node["end"] < mem["end"]:
                     # check for remaining segments and valid segments
-                    if(curr_node['end'] > curr_node['start']):
+                    if curr_node["end"] > curr_node["start"]:
                         final_list.append(curr_node)
-          
+
                     curr_seg_idx += 1
                     if curr_seg_idx < len(sorted_segments):
                         current_segment = sorted_segments[curr_seg_idx]
                     else:
                         current_segment = None
-                        
-                    if current_segment is not None and current_segment['start'] <= curr_end_addr and current_segment['start'] < mem['end']:
+
+                    if (
+                        current_segment is not None
+                        and current_segment["start"] <= curr_end_addr
+                        and current_segment["start"] < mem["end"]
+                    ):
                         curr_node = deepcopy(current_segment)
-                        curr_node['permissions'] = mem['permissions']
+                        curr_node["permissions"] = mem["permissions"]
                     else:
                         # no more segments
                         curr_node = deepcopy(mem)
-                        
-                    curr_node['start'] = curr_end_addr
-                    curr_end_addr = curr_node['end']
 
-    return final_list    
+                    curr_node["start"] = curr_end_addr
+                    curr_end_addr = curr_node["end"]
+
+    return final_list
+
 
 # https://github.com/llvm-mirror/llvm/blob/master/include/llvm/ADT/Triple.h
 def get_arch():
-    arch, arch_vendor, arch_os = lldb.target.GetTriple().split('-')
-    if arch == 'x86_64':
+    arch, arch_vendor, arch_os = lldb.target.GetTriple().split("-")
+    if arch == "x86_64":
         return "x64"
-    elif arch == 'x86' or arch == 'i386':
+    elif arch == "x86" or arch == "i386":
         return "x86"
-    elif arch == 'aarch64' or arch == 'arm64':
+    elif arch == "aarch64" or arch == "arm64":
         return "arm64le"
-    elif arch == 'aarch64_be':
+    elif arch == "aarch64_be":
         return "arm64be"
-    elif arch == 'armeb':
+    elif arch == "armeb":
         return "armbe"
-    elif arch == 'arm':
+    elif arch == "arm":
         return "armle"
     else:
         return ""
 
 
-#-----------------------
-#---- Dumping functions
+# -----------------------
+# ---- Dumping functions
+
 
 def dump_arch_info():
     arch_info = {}
@@ -152,56 +166,64 @@ def dump_arch_info():
 def dump_regs():
     reg_state = {}
     for reg_list in lldb.frame.GetRegisters():
-        if 'general purpose registers' in reg_list.GetName().lower():
+        if "general purpose registers" in reg_list.GetName().lower():
             for reg in reg_list:
                 reg_state[reg.GetName()] = int(reg.GetValue(), 16)
     return reg_state
 
+
 def get_section_info(sec):
-    name = sec.name if sec.name is not None else ''
+    name = sec.name if sec.name is not None else ""
     if sec.GetParent().name is not None:
-        name = sec.GetParent().name + '.' + sec.name
+        name = sec.GetParent().name + "." + sec.name
 
     module_name = sec.addr.module.file.GetFilename()
-    module_name = module_name if module_name is not None else ''
-    long_name = module_name + '.' + name
-    
+    module_name = module_name if module_name is not None else ""
+    long_name = module_name + "." + name
+
     return sec.addr.load_addr, (sec.addr.load_addr + sec.size), sec.size, long_name
- 
+
 
 def dump_process_memory(output_dir):
     # Segment information dictionary
     raw_segment_list = []
     raw_memory_list = []
-    
+
     # 1st pass:
     # Loop over the segments, fill in the segment info dictionary
     for module in lldb.target.module_iter():
         for seg_ea in module.section_iter():
-            seg_info = {'module': module.file.GetFilename() }
-            seg_info['start'], seg_info['end'], seg_size, seg_info['name'] = get_section_info(seg_ea)
+            seg_info = {"module": module.file.GetFilename()}
+            (
+                seg_info["start"],
+                seg_info["end"],
+                seg_size,
+                seg_info["name"],
+            ) = get_section_info(seg_ea)
             # TODO: Ugly hack for -1 LONG address on 32-bit
-            if seg_info['start'] >= sys.maxint or seg_size <= 0:
-                print "Throwing away page: {}".format(seg_info['name'])     
+            if seg_info["start"] >= sys.maxint or seg_size <= 0:
+                print "Throwing away page: {}".format(seg_info["name"])
                 continue
 
             # Page-align segment
-            seg_info['start'] = ALIGN_PAGE_DOWN(seg_info['start'])
-            seg_info['end'] = ALIGN_PAGE_UP(seg_info['end'])
-            print("Appending: {}".format(seg_info['name']))
+            seg_info["start"] = ALIGN_PAGE_DOWN(seg_info["start"])
+            seg_info["end"] = ALIGN_PAGE_UP(seg_info["end"])
+            print ("Appending: {}".format(seg_info["name"]))
             raw_segment_list.append(seg_info)
 
     # Add the stack memory region (just hardcode 0x1000 around the current SP)
     sp = lldb.frame.GetSP()
     start_sp = ALIGN_PAGE_DOWN(sp)
-    raw_segment_list.append({'start': start_sp, 'end': start_sp + 0x1000, 'name': 'STACK'})
+    raw_segment_list.append(
+        {"start": start_sp, "end": start_sp + 0x1000, "name": "STACK"}
+    )
 
     # Write the original memory to file for debugging
-    index_file = open(os.path.join(output_dir, DEBUG_MEM_FILE_NAME), 'w')
+    index_file = open(os.path.join(output_dir, DEBUG_MEM_FILE_NAME), "w")
     index_file.write(json.dumps(raw_segment_list, indent=4))
-    index_file.close()    
+    index_file.close()
 
-    # Loop over raw memory regions 
+    # Loop over raw memory regions
     mem_info = lldb.SBMemoryRegionInfo()
     start_addr = -1
     next_region_addr = 0
@@ -218,15 +240,20 @@ def dump_process_memory(output_dir):
         end_addr = mem_info.GetRegionEnd()
 
         # Unknown region name
-        region_name = 'UNKNOWN'
+        region_name = "UNKNOWN"
 
         # Ignore regions that aren't even mapped
         if mem_info.IsMapped() and mem_info.IsReadable():
-            mem_info_obj = {'start': start_addr, 'end': end_addr, 'name': region_name, 'permissions': {
-                "r": mem_info.IsReadable(),
-                "w": mem_info.IsWritable(),
-                "x": mem_info.IsExecutable()
-            }}
+            mem_info_obj = {
+                "start": start_addr,
+                "end": end_addr,
+                "name": region_name,
+                "permissions": {
+                    "r": mem_info.IsReadable(),
+                    "w": mem_info.IsWritable(),
+                    "x": mem_info.IsExecutable(),
+                },
+            }
 
             raw_memory_list.append(mem_info_obj)
 
@@ -234,65 +261,89 @@ def dump_process_memory(output_dir):
 
     for seg_info in final_segment_list:
         try:
-            seg_info['content_file'] = ''
-            start_addr = seg_info['start']
-            end_addr = seg_info['end']
-            region_name = seg_info['name']
+            seg_info["content_file"] = ""
+            start_addr = seg_info["start"]
+            end_addr = seg_info["end"]
+            region_name = seg_info["name"]
             # Compress and dump the content to a file
             err = lldb.SBError()
-            seg_content = lldb.process.ReadMemory(start_addr, end_addr - start_addr, err)
-            if(seg_content == None):
-                print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(start_addr, region_name))
-                seg_info['content_file'] = ''
+            seg_content = lldb.process.ReadMemory(
+                start_addr, end_addr - start_addr, err
+            )
+            if seg_content == None:
+                print (
+                    "Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(
+                        start_addr, region_name
+                    )
+                )
+                seg_info["content_file"] = ""
             else:
-                print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(start_addr, len(seg_content), region_name, repr(seg_info['permissions'])))
+                print (
+                    "Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(
+                        start_addr,
+                        len(seg_content),
+                        region_name,
+                        repr(seg_info["permissions"]),
+                    )
+                )
                 compressed_seg_content = zlib.compress(seg_content)
                 md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
-                seg_info['content_file'] = md5_sum
-                
+                seg_info["content_file"] = md5_sum
+
                 # Write the compressed contents to disk
-                out_file = open(os.path.join(output_dir, md5_sum), 'wb')
+                out_file = open(os.path.join(output_dir, md5_sum), "wb")
                 out_file.write(compressed_seg_content)
                 out_file.close()
-    
+
         except:
-            print("Exception reading segment ({}): {}".format(region_name, sys.exc_info()[0]))
-            
+            print (
+                "Exception reading segment ({}): {}".format(
+                    region_name, sys.exc_info()[0]
+                )
+            )
+
     return final_segment_list
 
-#----------
-#---- Main    
-    
+
+# ----------
+# ---- Main
+
+
 def main():
 
     try:
-        print("----- Unicorn Context Dumper -----")
-        print("You must be actively debugging before running this!")
-        print("If it fails, double check that you are actively debugging before running.")
-        
+        print ("----- Unicorn Context Dumper -----")
+        print ("You must be actively debugging before running this!")
+        print (
+            "If it fails, double check that you are actively debugging before running."
+        )
+
         # Create the output directory
-        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
+        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime(
+            "%Y%m%d_%H%M%S"
+        )
         output_path = "UnicornContext_" + timestamp
         if not os.path.exists(output_path):
             os.makedirs(output_path)
-        print("Process context will be output to {}".format(output_path))
-            
+        print ("Process context will be output to {}".format(output_path))
+
         # Get the context
         context = {
             "arch": dump_arch_info(),
-            "regs": dump_regs(), 
+            "regs": dump_regs(),
             "segments": dump_process_memory(output_path),
         }
-    
+
         # Write the index file
-        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
+        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), "w")
         index_file.write(json.dumps(context, indent=4))
-        index_file.close()    
-        print("Done.")
-        
+        index_file.close()
+        print ("Done.")
+
     except Exception, e:
-        print("!!! ERROR:\n\t{}".format(repr(e)))
-        
+        print ("!!! ERROR:\n\t{}".format(repr(e)))
+
+
 if __name__ == "__main__":
     main()
 elif lldb.debugger:
diff --git a/unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py b/unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py
index dc56b2aa..eccbc8bf 100644
--- a/unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py
+++ b/unicorn_mode/helper_scripts/unicorn_dumper_pwndbg.py
@@ -59,45 +59,47 @@ MAX_SEG_SIZE = 128 * 1024 * 1024
 # Name of the index file
 INDEX_FILE_NAME = "_index.json"
 
-#----------------------
-#---- Helper Functions
+# ----------------------
+# ---- Helper Functions
+
 
 def map_arch():
-    arch = pwndbg.arch.current # from PWNDBG
-    if 'x86_64' in arch or 'x86-64' in arch:
+    arch = pwndbg.arch.current  # from PWNDBG
+    if "x86_64" in arch or "x86-64" in arch:
         return "x64"
-    elif 'x86' in arch or 'i386' in arch:
+    elif "x86" in arch or "i386" in arch:
         return "x86"
-    elif 'aarch64' in arch or 'arm64' in arch:
+    elif "aarch64" in arch or "arm64" in arch:
         return "arm64le"
-    elif 'aarch64_be' in arch:
+    elif "aarch64_be" in arch:
         return "arm64be"
-    elif 'arm' in arch:
-        cpsr = pwndbg.regs['cpsr']
-        # check endianess 
-        if pwndbg.arch.endian == 'big':
+    elif "arm" in arch:
+        cpsr = pwndbg.regs["cpsr"]
+        # check endianess
+        if pwndbg.arch.endian == "big":
             # check for THUMB mode
-            if (cpsr & (1 << 5)):
+            if cpsr & (1 << 5):
                 return "armbethumb"
             else:
                 return "armbe"
         else:
             # check for THUMB mode
-            if (cpsr & (1 << 5)):
+            if cpsr & (1 << 5):
                 return "armlethumb"
             else:
                 return "armle"
-    elif 'mips' in arch:
-        if pwndbg.arch.endian == 'little':
-            return 'mipsel'
+    elif "mips" in arch:
+        if pwndbg.arch.endian == "little":
+            return "mipsel"
         else:
-            return 'mips'
+            return "mips"
     else:
         return ""
 
 
-#-----------------------
-#---- Dumping functions
+# -----------------------
+# ---- Dumping functions
+
 
 def dump_arch_info():
     arch_info = {}
@@ -110,26 +112,26 @@ def dump_regs():
     for reg in pwndbg.regs.all:
         reg_val = pwndbg.regs[reg]
         # current dumper script looks for register values to be hex strings
-#         reg_str = "0x{:08x}".format(reg_val)
-#         if "64" in get_arch():
-#             reg_str = "0x{:016x}".format(reg_val)
-#         reg_state[reg.strip().strip('$')] = reg_str
-        reg_state[reg.strip().strip('$')] = reg_val
+        #         reg_str = "0x{:08x}".format(reg_val)
+        #         if "64" in get_arch():
+        #             reg_str = "0x{:016x}".format(reg_val)
+        #         reg_state[reg.strip().strip('$')] = reg_str
+        reg_state[reg.strip().strip("$")] = reg_val
     return reg_state
 
 
 def dump_process_memory(output_dir):
     # Segment information dictionary
     final_segment_list = []
-    
+
     # PWNDBG:
     vmmap = pwndbg.vmmap.get()
-    
+
     # Pointer to end of last dumped memory segment
-    segment_last_addr = 0x0;
+    segment_last_addr = 0x0
 
     start = None
-    end   = None
+    end = None
 
     if not vmmap:
         print("No address mapping information found")
@@ -141,86 +143,107 @@ def dump_process_memory(output_dir):
             continue
 
         start = entry.start
-        end   = entry.end
+        end = entry.end
 
-        if (segment_last_addr > entry.start): # indicates overlap
-            if (segment_last_addr > entry.end): # indicates complete overlap, so we skip the segment entirely
+        if segment_last_addr > entry.start:  # indicates overlap
+            if (
+                segment_last_addr > entry.end
+            ):  # indicates complete overlap, so we skip the segment entirely
                 continue
-            else:            
+            else:
                 start = segment_last_addr
-            
-        
-        seg_info = {'start': start, 'end': end, 'name': entry.objfile, 'permissions': {
-            "r": entry.read,
-            "w": entry.write,
-            "x": entry.execute
-        }, 'content_file': ''}
+
+        seg_info = {
+            "start": start,
+            "end": end,
+            "name": entry.objfile,
+            "permissions": {"r": entry.read, "w": entry.write, "x": entry.execute},
+            "content_file": "",
+        }
 
         # "(deleted)" may or may not be valid, but don't push it.
-        if entry.read and not '(deleted)' in entry.objfile:
+        if entry.read and not "(deleted)" in entry.objfile:
             try:
                 # Compress and dump the content to a file
                 seg_content = pwndbg.memory.read(start, end - start)
-                if(seg_content == None):
-                    print("Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(entry.start, entry.objfile))
+                if seg_content == None:
+                    print(
+                        "Segment empty: @0x{0:016x} (size:UNKNOWN) {1}".format(
+                            entry.start, entry.objfile
+                        )
+                    )
                 else:
-                    print("Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(entry.start, len(seg_content), entry.objfile, repr(seg_info['permissions'])))
+                    print(
+                        "Dumping segment @0x{0:016x} (size:0x{1:x}): {2} [{3}]".format(
+                            entry.start,
+                            len(seg_content),
+                            entry.objfile,
+                            repr(seg_info["permissions"]),
+                        )
+                    )
                     compressed_seg_content = zlib.compress(str(seg_content))
                     md5_sum = hashlib.md5(compressed_seg_content).hexdigest() + ".bin"
                     seg_info["content_file"] = md5_sum
-                    
+
                     # Write the compressed contents to disk
-                    out_file = open(os.path.join(output_dir, md5_sum), 'wb')
+                    out_file = open(os.path.join(output_dir, md5_sum), "wb")
                     out_file.write(compressed_seg_content)
                     out_file.close()
 
             except Exception as e:
                 traceback.print_exc()
-                print("Exception reading segment ({}): {}".format(entry.objfile, sys.exc_info()[0]))
+                print(
+                    "Exception reading segment ({}): {}".format(
+                        entry.objfile, sys.exc_info()[0]
+                    )
+                )
         else:
             print("Skipping segment {0}@0x{1:016x}".format(entry.objfile, entry.start))
-        
+
         segment_last_addr = end
 
         # Add the segment to the list
         final_segment_list.append(seg_info)
 
-            
     return final_segment_list
 
-#----------
-#---- Main    
-    
+
+# ----------
+# ---- Main
+
+
 def main():
     print("----- Unicorn Context Dumper -----")
     print("You must be actively debugging before running this!")
     print("If it fails, double check that you are actively debugging before running.")
-    
+
     try:
 
         # Create the output directory
-        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d_%H%M%S')
+        timestamp = datetime.datetime.fromtimestamp(time.time()).strftime(
+            "%Y%m%d_%H%M%S"
+        )
         output_path = "UnicornContext_" + timestamp
         if not os.path.exists(output_path):
             os.makedirs(output_path)
         print("Process context will be output to {}".format(output_path))
-            
+
         # Get the context
         context = {
             "arch": dump_arch_info(),
-            "regs": dump_regs(), 
+            "regs": dump_regs(),
             "segments": dump_process_memory(output_path),
         }
 
         # Write the index file
-        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), 'w')
+        index_file = open(os.path.join(output_path, INDEX_FILE_NAME), "w")
         index_file.write(json.dumps(context, indent=4))
-        index_file.close()    
+        index_file.close()
         print("Done.")
-        
+
     except Exception as e:
         print("!!! ERROR:\n\t{}".format(repr(e)))
-        
+
+
 if __name__ == "__main__" and pwndbg_loaded:
     main()
-    
diff --git a/unicorn_mode/samples/c/COMPILE.md b/unicorn_mode/samples/c/COMPILE.md
index 7857e5bf..7da140f7 100644
--- a/unicorn_mode/samples/c/COMPILE.md
+++ b/unicorn_mode/samples/c/COMPILE.md
@@ -17,6 +17,6 @@ You shouldn't need to compile simple_target.c since a X86_64 binary version is
 pre-built and shipped in this sample folder. This file documents how the binary
 was built in case you want to rebuild it or recompile it for any reason.
 
-The pre-built binary (simple_target_x86_64.bin) was built using -g -O0 in gcc.
+The pre-built binary (persistent_target_x86_64) was built using -g -O0 in gcc.
 
 We then load the binary and execute the main function directly.
diff --git a/unicorn_mode/samples/compcov_x64/compcov_test_harness.py b/unicorn_mode/samples/compcov_x64/compcov_test_harness.py
index b9ebb61d..f0749d1b 100644
--- a/unicorn_mode/samples/compcov_x64/compcov_test_harness.py
+++ b/unicorn_mode/samples/compcov_x64/compcov_test_harness.py
@@ -22,48 +22,81 @@ from unicornafl import *
 from unicornafl.x86_const import *
 
 # Path to the file containing the binary to emulate
-BINARY_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'compcov_target.bin')
+BINARY_FILE = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "compcov_target.bin"
+)
 
 # Memory map for the code to be tested
-CODE_ADDRESS  = 0x00100000  # Arbitrary address where code to test will be loaded
+CODE_ADDRESS = 0x00100000  # Arbitrary address where code to test will be loaded
 CODE_SIZE_MAX = 0x00010000  # Max size for the code (64kb)
 STACK_ADDRESS = 0x00200000  # Address of the stack (arbitrarily chosen)
-STACK_SIZE	  = 0x00010000  # Size of the stack (arbitrarily chosen)
-DATA_ADDRESS  = 0x00300000  # Address where mutated data will be placed
+STACK_SIZE = 0x00010000  # Size of the stack (arbitrarily chosen)
+DATA_ADDRESS = 0x00300000  # Address where mutated data will be placed
 DATA_SIZE_MAX = 0x00010000  # Maximum allowable size of mutated data
 
 try:
     # If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
     from capstone import *
+
     cs = Cs(CS_ARCH_X86, CS_MODE_64)
+
     def unicorn_debug_instruction(uc, address, size, user_data):
         mem = uc.mem_read(address, size)
-        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size):
+        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(
+            bytes(mem), size
+        ):
             print("    Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
+
+
 except ImportError:
+
     def unicorn_debug_instruction(uc, address, size, user_data):
         print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
 
+
 def unicorn_debug_block(uc, address, size, user_data):
     print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
 
+
 def unicorn_debug_mem_access(uc, access, address, size, value, user_data):
     if access == UC_MEM_WRITE:
-        print("        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+        print(
+            "        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
     else:
         print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))
 
+
 def unicorn_debug_mem_invalid_access(uc, access, address, size, value, user_data):
     if access == UC_MEM_WRITE_UNMAPPED:
-        print("        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+        print(
+            "        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
     else:
-        print("        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size))
+        print(
+            "        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size)
+        )
+
 
 def main():
 
     parser = argparse.ArgumentParser(description="Test harness for compcov_target.bin")
-    parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input to load")
-    parser.add_argument('-t', '--trace', default=False, action="store_true", help="Enables debug tracing")
+    parser.add_argument(
+        "input_file",
+        type=str,
+        help="Path to the file containing the mutated input to load",
+    )
+    parser.add_argument(
+        "-t",
+        "--trace",
+        default=False,
+        action="store_true",
+        help="Enables debug tracing",
+    )
     args = parser.parse_args()
 
     # Instantiate a MIPS32 big endian Unicorn Engine instance
@@ -73,13 +106,16 @@ def main():
         uc.hook_add(UC_HOOK_BLOCK, unicorn_debug_block)
         uc.hook_add(UC_HOOK_CODE, unicorn_debug_instruction)
         uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, unicorn_debug_mem_access)
-        uc.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, unicorn_debug_mem_invalid_access)
+        uc.hook_add(
+            UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID,
+            unicorn_debug_mem_invalid_access,
+        )
 
-    #---------------------------------------------------
+    # ---------------------------------------------------
     # Load the binary to emulate and map it into memory
 
     print("Loading data input from {}".format(args.input_file))
-    binary_file = open(BINARY_FILE, 'rb')
+    binary_file = open(BINARY_FILE, "rb")
     binary_code = binary_file.read()
     binary_file.close()
 
@@ -93,11 +129,11 @@ def main():
     uc.mem_write(CODE_ADDRESS, binary_code)
 
     # Set the program counter to the start of the code
-    start_address = CODE_ADDRESS          # Address of entry point of main()
-    end_address   = CODE_ADDRESS + 0x55   # Address of last instruction in main()
+    start_address = CODE_ADDRESS  # Address of entry point of main()
+    end_address = CODE_ADDRESS + 0x55  # Address of last instruction in main()
     uc.reg_write(UC_X86_REG_RIP, start_address)
 
-    #-----------------
+    # -----------------
     # Setup the stack
 
     uc.mem_map(STACK_ADDRESS, STACK_SIZE)
@@ -106,8 +142,7 @@ def main():
     # Mapping a location to write our buffer to
     uc.mem_map(DATA_ADDRESS, DATA_SIZE_MAX)
 
-
-    #-----------------------------------------------
+    # -----------------------------------------------
     # Load the mutated input and map it into memory
 
     def place_input_callback(uc, input, _, data):
@@ -121,7 +156,7 @@ def main():
         # Write the mutated command into the data buffer
         uc.mem_write(DATA_ADDRESS, input)
 
-    #------------------------------------------------------------
+    # ------------------------------------------------------------
     # Emulate the code, allowing it to process the mutated input
 
     print("Starting the AFL fuzz")
@@ -129,8 +164,9 @@ def main():
         input_file=args.input_file,
         place_input_callback=place_input_callback,
         exits=[end_address],
-        persistent_iters=1
+        persistent_iters=1,
     )
 
+
 if __name__ == "__main__":
     main()
diff --git a/unicorn_mode/samples/simple/simple_test_harness.py b/unicorn_mode/samples/simple/simple_test_harness.py
index 4a673daf..cd04ad3a 100644
--- a/unicorn_mode/samples/simple/simple_test_harness.py
+++ b/unicorn_mode/samples/simple/simple_test_harness.py
@@ -22,48 +22,81 @@ from unicornafl import *
 from unicornafl.mips_const import *
 
 # Path to the file containing the binary to emulate
-BINARY_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'simple_target.bin')
+BINARY_FILE = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "simple_target.bin"
+)
 
 # Memory map for the code to be tested
-CODE_ADDRESS  = 0x00100000  # Arbitrary address where code to test will be loaded
+CODE_ADDRESS = 0x00100000  # Arbitrary address where code to test will be loaded
 CODE_SIZE_MAX = 0x00010000  # Max size for the code (64kb)
 STACK_ADDRESS = 0x00200000  # Address of the stack (arbitrarily chosen)
-STACK_SIZE	  = 0x00010000  # Size of the stack (arbitrarily chosen)
-DATA_ADDRESS  = 0x00300000  # Address where mutated data will be placed
+STACK_SIZE = 0x00010000  # Size of the stack (arbitrarily chosen)
+DATA_ADDRESS = 0x00300000  # Address where mutated data will be placed
 DATA_SIZE_MAX = 0x00010000  # Maximum allowable size of mutated data
 
 try:
     # If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
     from capstone import *
+
     cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
+
     def unicorn_debug_instruction(uc, address, size, user_data):
         mem = uc.mem_read(address, size)
-        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size):
+        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(
+            bytes(mem), size
+        ):
             print("    Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
+
+
 except ImportError:
+
     def unicorn_debug_instruction(uc, address, size, user_data):
-        print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))    
+        print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
+
 
 def unicorn_debug_block(uc, address, size, user_data):
     print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
-    
+
+
 def unicorn_debug_mem_access(uc, access, address, size, value, user_data):
     if access == UC_MEM_WRITE:
-        print("        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+        print(
+            "        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
     else:
-        print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))    
+        print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))
+
 
 def unicorn_debug_mem_invalid_access(uc, access, address, size, value, user_data):
     if access == UC_MEM_WRITE_UNMAPPED:
-        print("        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+        print(
+            "        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
     else:
-        print("        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size))   
+        print(
+            "        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size)
+        )
+
 
 def main():
 
     parser = argparse.ArgumentParser(description="Test harness for simple_target.bin")
-    parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input to load")
-    parser.add_argument('-t', '--trace', default=False, action="store_true", help="Enables debug tracing")
+    parser.add_argument(
+        "input_file",
+        type=str,
+        help="Path to the file containing the mutated input to load",
+    )
+    parser.add_argument(
+        "-t",
+        "--trace",
+        default=False,
+        action="store_true",
+        help="Enables debug tracing",
+    )
     args = parser.parse_args()
 
     # Instantiate a MIPS32 big endian Unicorn Engine instance
@@ -73,13 +106,16 @@ def main():
         uc.hook_add(UC_HOOK_BLOCK, unicorn_debug_block)
         uc.hook_add(UC_HOOK_CODE, unicorn_debug_instruction)
         uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, unicorn_debug_mem_access)
-        uc.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, unicorn_debug_mem_invalid_access)
+        uc.hook_add(
+            UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID,
+            unicorn_debug_mem_invalid_access,
+        )
 
-    #---------------------------------------------------
+    # ---------------------------------------------------
     # Load the binary to emulate and map it into memory
 
     print("Loading data input from {}".format(args.input_file))
-    binary_file = open(BINARY_FILE, 'rb')
+    binary_file = open(BINARY_FILE, "rb")
     binary_code = binary_file.read()
     binary_file.close()
 
@@ -93,11 +129,11 @@ def main():
     uc.mem_write(CODE_ADDRESS, binary_code)
 
     # Set the program counter to the start of the code
-    start_address = CODE_ADDRESS          # Address of entry point of main()
-    end_address   = CODE_ADDRESS + 0xf4   # Address of last instruction in main()
+    start_address = CODE_ADDRESS  # Address of entry point of main()
+    end_address = CODE_ADDRESS + 0xF4  # Address of last instruction in main()
     uc.reg_write(UC_MIPS_REG_PC, start_address)
 
-    #-----------------
+    # -----------------
     # Setup the stack
 
     uc.mem_map(STACK_ADDRESS, STACK_SIZE)
@@ -106,14 +142,14 @@ def main():
     # reserve some space for data
     uc.mem_map(DATA_ADDRESS, DATA_SIZE_MAX)
 
-    #-----------------------------------------------------
+    # -----------------------------------------------------
     # Set up a callback to place input data (do little work here, it's called for every single iteration)
     # We did not pass in any data and don't use persistent mode, so we can ignore these params.
     # Be sure to check out the docstrings for the uc.afl_* functions.
     def place_input_callback(uc, input, persistent_round, data):
         # Apply constraints to the mutated input
         if len(input) > DATA_SIZE_MAX:
-            #print("Test input is too long (> {} bytes)")
+            # print("Test input is too long (> {} bytes)")
             return False
 
         # Write the mutated command into the data buffer
@@ -122,5 +158,6 @@ def main():
     # Start the fuzzer.
     uc.afl_fuzz(args.input_file, place_input_callback, [end_address])
 
+
 if __name__ == "__main__":
     main()
diff --git a/unicorn_mode/samples/simple/simple_test_harness_alt.py b/unicorn_mode/samples/simple/simple_test_harness_alt.py
index 9c3dbc93..3249b13d 100644
--- a/unicorn_mode/samples/simple/simple_test_harness_alt.py
+++ b/unicorn_mode/samples/simple/simple_test_harness_alt.py
@@ -25,50 +25,79 @@ from unicornafl import *
 from unicornafl.mips_const import *
 
 # Path to the file containing the binary to emulate
-BINARY_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'simple_target.bin')
+BINARY_FILE = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "simple_target.bin"
+)
 
 # Memory map for the code to be tested
-CODE_ADDRESS  = 0x00100000  # Arbitrary address where code to test will be loaded
+CODE_ADDRESS = 0x00100000  # Arbitrary address where code to test will be loaded
 CODE_SIZE_MAX = 0x00010000  # Max size for the code (64kb)
 STACK_ADDRESS = 0x00200000  # Address of the stack (arbitrarily chosen)
-STACK_SIZE	  = 0x00010000  # Size of the stack (arbitrarily chosen)
-DATA_ADDRESS  = 0x00300000  # Address where mutated data will be placed
+STACK_SIZE = 0x00010000  # Size of the stack (arbitrarily chosen)
+DATA_ADDRESS = 0x00300000  # Address where mutated data will be placed
 DATA_SIZE_MAX = 0x00010000  # Maximum allowable size of mutated data
 
 try:
     # If Capstone is installed then we'll dump disassembly, otherwise just dump the binary.
     from capstone import *
+
     cs = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32 + CS_MODE_BIG_ENDIAN)
+
     def unicorn_debug_instruction(uc, address, size, user_data):
         mem = uc.mem_read(address, size)
-        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(bytes(mem), size):
+        for (cs_address, cs_size, cs_mnemonic, cs_opstr) in cs.disasm_lite(
+            bytes(mem), size
+        ):
             print("    Instr: {:#016x}:\t{}\t{}".format(address, cs_mnemonic, cs_opstr))
+
+
 except ImportError:
+
     def unicorn_debug_instruction(uc, address, size, user_data):
-        print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))    
+        print("    Instr: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
+
 
 def unicorn_debug_block(uc, address, size, user_data):
     print("Basic Block: addr=0x{0:016x}, size=0x{1:016x}".format(address, size))
-    
+
+
 def unicorn_debug_mem_access(uc, access, address, size, value, user_data):
     if access == UC_MEM_WRITE:
-        print("        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+        print(
+            "        >>> Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
     else:
-        print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))    
+        print("        >>> Read: addr=0x{0:016x} size={1}".format(address, size))
+
 
 def unicorn_debug_mem_invalid_access(uc, access, address, size, value, user_data):
     if access == UC_MEM_WRITE_UNMAPPED:
-        print("        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(address, size, value))
+        print(
+            "        >>> INVALID Write: addr=0x{0:016x} size={1} data=0x{2:016x}".format(
+                address, size, value
+            )
+        )
     else:
-        print("        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size))   
+        print(
+            "        >>> INVALID Read: addr=0x{0:016x} size={1}".format(address, size)
+        )
+
 
 def force_crash(uc_error):
     # This function should be called to indicate to AFL that a crash occurred during emulation.
     # Pass in the exception received from Uc.emu_start()
     mem_errors = [
-        UC_ERR_READ_UNMAPPED, UC_ERR_READ_PROT, UC_ERR_READ_UNALIGNED,
-        UC_ERR_WRITE_UNMAPPED, UC_ERR_WRITE_PROT, UC_ERR_WRITE_UNALIGNED,
-        UC_ERR_FETCH_UNMAPPED, UC_ERR_FETCH_PROT, UC_ERR_FETCH_UNALIGNED,
+        UC_ERR_READ_UNMAPPED,
+        UC_ERR_READ_PROT,
+        UC_ERR_READ_UNALIGNED,
+        UC_ERR_WRITE_UNMAPPED,
+        UC_ERR_WRITE_PROT,
+        UC_ERR_WRITE_UNALIGNED,
+        UC_ERR_FETCH_UNMAPPED,
+        UC_ERR_FETCH_PROT,
+        UC_ERR_FETCH_UNALIGNED,
     ]
     if uc_error.errno in mem_errors:
         # Memory error - throw SIGSEGV
@@ -80,11 +109,22 @@ def force_crash(uc_error):
         # Not sure what happened - throw SIGABRT
         os.kill(os.getpid(), signal.SIGABRT)
 
+
 def main():
 
     parser = argparse.ArgumentParser(description="Test harness for simple_target.bin")
-    parser.add_argument('input_file', type=str, help="Path to the file containing the mutated input to load")
-    parser.add_argument('-d', '--debug', default=False, action="store_true", help="Enables debug tracing")
+    parser.add_argument(
+        "input_file",
+        type=str,
+        help="Path to the file containing the mutated input to load",
+    )
+    parser.add_argument(
+        "-d",
+        "--debug",
+        default=False,
+        action="store_true",
+        help="Enables debug tracing",
+    )
     args = parser.parse_args()
 
     # Instantiate a MIPS32 big endian Unicorn Engine instance
@@ -94,13 +134,16 @@ def main():
         uc.hook_add(UC_HOOK_BLOCK, unicorn_debug_block)
         uc.hook_add(UC_HOOK_CODE, unicorn_debug_instruction)
         uc.hook_add(UC_HOOK_MEM_WRITE | UC_HOOK_MEM_READ, unicorn_debug_mem_access)
-        uc.hook_add(UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID, unicorn_debug_mem_invalid_access)
+        uc.hook_add(
+            UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_READ_INVALID,
+            unicorn_debug_mem_invalid_access,
+        )
 
-    #---------------------------------------------------
+    # ---------------------------------------------------
     # Load the binary to emulate and map it into memory
 
     print("Loading data input from {}".format(args.input_file))
-    binary_file = open(BINARY_FILE, 'rb')
+    binary_file = open(BINARY_FILE, "rb")
     binary_code = binary_file.read()
     binary_file.close()
 
@@ -114,11 +157,11 @@ def main():
     uc.mem_write(CODE_ADDRESS, binary_code)
 
     # Set the program counter to the start of the code
-    start_address = CODE_ADDRESS          # Address of entry point of main()
-    end_address   = CODE_ADDRESS + 0xf4   # Address of last instruction in main()
+    start_address = CODE_ADDRESS  # Address of entry point of main()
+    end_address = CODE_ADDRESS + 0xF4  # Address of last instruction in main()
     uc.reg_write(UC_MIPS_REG_PC, start_address)
 
-    #-----------------
+    # -----------------
     # Setup the stack
 
     uc.mem_map(STACK_ADDRESS, STACK_SIZE)
@@ -127,10 +170,10 @@ def main():
     # reserve some space for data
     uc.mem_map(DATA_ADDRESS, DATA_SIZE_MAX)
 
-    #-----------------------------------------------------
+    # -----------------------------------------------------
     #   Kick off AFL's fork server
-    #   THIS MUST BE DONE BEFORE LOADING USER DATA! 
-    #   If this isn't done every single run, the AFL fork server 
+    #   THIS MUST BE DONE BEFORE LOADING USER DATA!
+    #   If this isn't done every single run, the AFL fork server
     #   will not be started appropriately and you'll get erratic results!
 
     print("Starting the AFL forkserver")
@@ -142,12 +185,12 @@ def main():
     else:
         out = lambda x, y: print(x.format(y))
 
-    #-----------------------------------------------
+    # -----------------------------------------------
     # Load the mutated input and map it into memory
 
     # Load the mutated input from disk
     out("Loading data input from {}", args.input_file)
-    input_file = open(args.input_file, 'rb')
+    input_file = open(args.input_file, "rb")
     input = input_file.read()
     input_file.close()
 
@@ -159,7 +202,7 @@ def main():
     # Write the mutated command into the data buffer
     uc.mem_write(DATA_ADDRESS, input)
 
-    #------------------------------------------------------------
+    # ------------------------------------------------------------
     # Emulate the code, allowing it to process the mutated input
 
     out("Executing until a crash or execution reaches 0x{0:016x}", end_address)
@@ -175,5 +218,6 @@ def main():
     # UC_AFL_RET_FINISHED = 3
     out("Done. AFL Mode is {}", afl_mode)
 
+
 if __name__ == "__main__":
     main()
diff --git a/unicorn_mode/samples/speedtest/get_offsets.py b/unicorn_mode/samples/speedtest/get_offsets.py
index c9dc76df..c9dc76df 100644..100755
--- a/unicorn_mode/samples/speedtest/get_offsets.py
+++ b/unicorn_mode/samples/speedtest/get_offsets.py
diff --git a/unicorn_mode/samples/speedtest/python/harness.py b/unicorn_mode/samples/speedtest/python/harness.py
index f72eb32b..801ef4d1 100644
--- a/unicorn_mode/samples/speedtest/python/harness.py
+++ b/unicorn_mode/samples/speedtest/python/harness.py
@@ -256,17 +256,17 @@ def main():
         input_len = len(input)
         # global input_len
         if input_len > INPUT_MAX:
-            #print("Test input is too long (> {} bytes)")
+            # print("Test input is too long (> {} bytes)")
             return False
 
         # print(f"Placing input: {input} in round {persistent_round}")
 
         # Make sure the string is always 0-terminated (as it would be "in the wild")
-        input[-1] = b'\0'
+        input[-1] = b"\0"
 
         # Write the mutated command into the data buffer
         uc.mem_write(INPUT_ADDRESS, input)
-        #uc.reg_write(UC_X86_REG_RIP, main_offset)
+        # uc.reg_write(UC_X86_REG_RIP, main_offset)
 
     print(f"Starting to fuzz. Running from addr {main_offset} to one of {main_ends}")
     # Start the fuzzer.
diff --git a/unicorn_mode/unicornafl b/unicorn_mode/unicornafl
-Subproject 80d31ef367f7a1a75fc48e08e129d10f2ffa049
+Subproject fb2fc9f25df32f17f6b6b859e4dbd70f9a857e0
diff --git a/utils/afl_network_proxy/GNUmakefile b/utils/afl_network_proxy/GNUmakefile
index 25a3df82..0b55dc2c 100644
--- a/utils/afl_network_proxy/GNUmakefile
+++ b/utils/afl_network_proxy/GNUmakefile
@@ -1,5 +1,6 @@
 PREFIX   ?= /usr/local
 BIN_PATH  = $(PREFIX)/bin
+HELPER_PATH = $(PREFIX)/lib/afl
 DOC_PATH  = $(PREFIX)/share/doc/afl
 
 PROGRAMS = afl-network-client afl-network-server
@@ -31,7 +32,7 @@ afl-network-client:	afl-network-client.c
 	$(CC) $(CFLAGS) -I../../include -o afl-network-client afl-network-client.c $(LDFLAGS)
 
 afl-network-server:	afl-network-server.c
-	$(CC) $(CFLAGS) -I../../include -o afl-network-server afl-network-server.c ../../src/afl-forkserver.c ../../src/afl-sharedmem.c ../../src/afl-common.c -DBIN_PATH=\"$(BIN_PATH)\" $(LDFLAGS)
+	$(CC) $(CFLAGS) -I../../include -o afl-network-server afl-network-server.c ../../src/afl-forkserver.c ../../src/afl-sharedmem.c ../../src/afl-common.c -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" $(LDFLAGS)
 
 clean:
 	rm -f $(PROGRAMS) *~ core
diff --git a/utils/afl_network_proxy/afl-network-server.c b/utils/afl_network_proxy/afl-network-server.c
index fe225416..0dfae658 100644
--- a/utils/afl_network_proxy/afl-network-server.c
+++ b/utils/afl_network_proxy/afl-network-server.c
@@ -237,38 +237,7 @@ static void set_up_environment(afl_forkserver_t *fsrv) {
 
     if (fsrv->qemu_mode) {
 
-      u8 *qemu_preload = getenv("QEMU_SET_ENV");
-      u8 *afl_preload = getenv("AFL_PRELOAD");
-      u8 *buf;
-
-      s32 i, afl_preload_size = strlen(afl_preload);
-      for (i = 0; i < afl_preload_size; ++i) {
-
-        if (afl_preload[i] == ',') {
-
-          PFATAL(
-              "Comma (',') is not allowed in AFL_PRELOAD when -Q is "
-              "specified!");
-
-        }
-
-      }
-
-      if (qemu_preload) {
-
-        buf = alloc_printf("%s,LD_PRELOAD=%s,DYLD_INSERT_LIBRARIES=%s",
-                           qemu_preload, afl_preload, afl_preload);
-
-      } else {
-
-        buf = alloc_printf("LD_PRELOAD=%s,DYLD_INSERT_LIBRARIES=%s",
-                           afl_preload, afl_preload);
-
-      }
-
-      setenv("QEMU_SET_ENV", buf, 1);
-
-      afl_free(buf);
+      /* afl-qemu-trace takes care of converting AFL_PRELOAD. */
 
     } else {
 
diff --git a/utils/afl_untracer/afl-untracer.c b/utils/afl_untracer/afl-untracer.c
index 1f1a10ea..2baeb58d 100644
--- a/utils/afl_untracer/afl-untracer.c
+++ b/utils/afl_untracer/afl-untracer.c
@@ -480,9 +480,9 @@ void setup_trap_instrumentation(void) {
     // Index into the coverage bitmap for the current trap instruction.
 #ifdef __aarch64__
   uint64_t bitmap_index = 0;
-#ifdef __APPLE__
+  #ifdef __APPLE__
   pthread_jit_write_protect_np(0);
-#endif
+  #endif
 #else
   uint32_t bitmap_index = 0;
 #endif
@@ -627,13 +627,13 @@ static void sigtrap_handler(int signum, siginfo_t *si, void *context) {
   // Must re-execute the instruction, so decrement PC by one instruction.
   ucontext_t *ctx = (ucontext_t *)context;
 #if defined(__APPLE__) && defined(__LP64__)
-#if defined(__x86_64__)
+  #if defined(__x86_64__)
   ctx->uc_mcontext->__ss.__rip -= 1;
   addr = ctx->uc_mcontext->__ss.__rip;
-#else
+  #else
   ctx->uc_mcontext->__ss.__pc -= 4;
   addr = ctx->uc_mcontext->__ss.__pc;
-#endif
+  #endif
 #elif defined(__linux__)
   #if defined(__x86_64__) || defined(__i386__)
   ctx->uc_mcontext.gregs[REG_RIP] -= 1;
diff --git a/utils/afl_untracer/ida_get_patchpoints.py b/utils/afl_untracer/ida_get_patchpoints.py
index 43cf6d89..807685b3 100644
--- a/utils/afl_untracer/ida_get_patchpoints.py
+++ b/utils/afl_untracer/ida_get_patchpoints.py
@@ -11,6 +11,7 @@ import idc
 # See https://www.hex-rays.com/products/ida/support/ida74_idapython_no_bc695_porting_guide.shtml
 
 from os.path import expanduser
+
 home = expanduser("~")
 
 patchpoints = set()
@@ -18,7 +19,7 @@ patchpoints = set()
 max_offset = 0
 for seg_ea in idautils.Segments():
     name = idc.get_segm_name(seg_ea)
-    #print("Segment: " + name)
+    # print("Segment: " + name)
     if name != "__text" and name != ".text":
         continue
 
@@ -26,7 +27,7 @@ for seg_ea in idautils.Segments():
     end = idc.get_segm_end(seg_ea)
     first = 0
     subtract_addr = 0
-    #print("Start: " + hex(start) + " End: " + hex(end))
+    # print("Start: " + hex(start) + " End: " + hex(end))
     for func_ea in idautils.Functions(start, end):
         f = idaapi.get_func(func_ea)
         if not f:
@@ -37,10 +38,10 @@ for seg_ea in idautils.Segments():
                     if block.start_ea >= 0x1000:
                         subtract_addr = 0x1000
                         first = 1
-                        
+
                 max_offset = max(max_offset, block.start_ea)
                 patchpoints.add(block.start_ea - subtract_addr)
-            #else:
+            # else:
             #    print("Warning: broken CFG?")
 
 # Round up max_offset to page size
@@ -52,11 +53,11 @@ if rem != 0:
 print("Writing to " + home + "/Desktop/patches.txt")
 
 with open(home + "/Desktop/patches.txt", "w") as f:
-    f.write(ida_nalt.get_root_filename() + ':' + hex(size) + '\n')
-    f.write('\n'.join(map(hex, sorted(patchpoints))))
-    f.write('\n')
+    f.write(ida_nalt.get_root_filename() + ":" + hex(size) + "\n")
+    f.write("\n".join(map(hex, sorted(patchpoints))))
+    f.write("\n")
 
 print("Done, found {} patchpoints".format(len(patchpoints)))
 
 # For headless script running remove the comment from the next line
-#ida_pro.qexit()
+# ida_pro.qexit()
diff --git a/utils/aflpp_driver/aflpp_driver.c b/utils/aflpp_driver/aflpp_driver.c
index 7bb929b2..ad781e64 100644
--- a/utils/aflpp_driver/aflpp_driver.c
+++ b/utils/aflpp_driver/aflpp_driver.c
@@ -173,7 +173,7 @@ size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) {
 // Execute any files provided as parameters.
 static int ExecuteFilesOnyByOne(int argc, char **argv) {
 
-  unsigned char *buf = malloc(MAX_FILE);
+  unsigned char *buf = (unsigned char *)malloc(MAX_FILE);
   for (int i = 1; i < argc; i++) {
 
     int fd = open(argv[i], O_RDONLY);
@@ -187,6 +187,8 @@ static int ExecuteFilesOnyByOne(int argc, char **argv) {
 
     }
 
+    close(fd);
+
   }
 
   free(buf);
@@ -204,10 +206,20 @@ int main(int argc, char **argv) {
       "To fuzz with afl-fuzz execute this:\n"
       "  afl-fuzz [afl-flags] -- %s [-N]\n"
       "afl-fuzz will run N iterations before re-spawning the process (default: "
-      "1000)\n"
+      "INT_MAX)\n"
       "======================================================\n",
       argv[0], argv[0]);
 
+  if (getenv("AFL_GDB")) {
+
+    char cmd[64];
+    snprintf(cmd, sizeof(cmd), "cat /proc/%d/maps", getpid());
+    system(cmd);
+    fprintf(stderr, "DEBUG: aflpp_driver pid is %d\n", getpid());
+    sleep(1);
+
+  }
+
   output_file = stderr;
   maybe_duplicate_stderr();
   maybe_close_fd_mask();
diff --git a/utils/crash_triage/triage_crashes.sh b/utils/crash_triage/triage_crashes.sh
index bf763cba..c9ca1f79 100755
--- a/utils/crash_triage/triage_crashes.sh
+++ b/utils/crash_triage/triage_crashes.sh
@@ -60,12 +60,12 @@ if
 fi
 
 if [ ! -f "$BIN" -o ! -x "$BIN" ]; then
-  echo "[-] Error: binary '$2' not found or is not executable." 1>&2
+  echo "[-] Error: binary '$BIN' not found or is not executable." 1>&2
   exit 1
 fi
 
 if [ ! -d "$DIR/queue" ]; then
-  echo "[-] Error: directory '$1' not found or not created by afl-fuzz." 1>&2
+  echo "[-] Error: directory '$DIR' not found or not created by afl-fuzz." 1>&2
   exit 1
 fi
 
@@ -90,8 +90,9 @@ for crash in $DIR/crashes/id:*; do
 
   for a in $@; do
 
-    if [ "$a" = "@@" ] ; then
-      use_args="$use_args $crash"
+    if echo "$a" | grep -qF '@@'; then
+      escaped_fname=`echo $crash | sed 's:/:\\\\/:g'`
+      use_args="$use_args `echo $a | sed "s/@@/$escaped_fname/g"`"
       unset use_stdio
     else
       use_args="$use_args $a"
diff --git a/utils/custom_mutators/XmlMutatorMin.py b/utils/custom_mutators/XmlMutatorMin.py
index 4c80a2ba..3e6cd0ff 100644
--- a/utils/custom_mutators/XmlMutatorMin.py
+++ b/utils/custom_mutators/XmlMutatorMin.py
@@ -12,12 +12,13 @@ import random, re, io
 # The XmlMutatorMin class #
 ###########################
 
+
 class XmlMutatorMin:
 
     """
-        Optionals parameters:
-            seed        Seed used by the PRNG (default: "RANDOM")
-            verbose     Verbosity (default: False)
+    Optionals parameters:
+        seed        Seed used by the PRNG (default: "RANDOM")
+        verbose     Verbosity (default: False)
     """
 
     def __init__(self, seed="RANDOM", verbose=False):
@@ -41,7 +42,12 @@ class XmlMutatorMin:
         self.tree = None
 
         # High-level mutators (no database needed)
-        hl_mutators_delete = ["del_node_and_children", "del_node_but_children", "del_attribute", "del_content"]  # Delete items
+        hl_mutators_delete = [
+            "del_node_and_children",
+            "del_node_but_children",
+            "del_attribute",
+            "del_content",
+        ]  # Delete items
         hl_mutators_fuzz = ["fuzz_attribute"]  # Randomly change attribute values
 
         # Exposed mutators
@@ -74,7 +80,9 @@ class XmlMutatorMin:
 
         """ Serialize a XML document. Basic wrapper around lxml.tostring() """
 
-        return ET.tostring(tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding)
+        return ET.tostring(
+            tree, with_tail=False, xml_declaration=True, encoding=tree.docinfo.encoding
+        )
 
     def __ver(self, version):
 
@@ -161,7 +169,7 @@ class XmlMutatorMin:
             # Randomly pick one the function calls
             (func, args) = random.choice(l)
             # Split by "," and randomly pick one of the arguments
-            value = random.choice(args.split(','))
+            value = random.choice(args.split(","))
             # Remove superfluous characters
             unclean_value = value
             value = value.strip(" ").strip("'")
@@ -170,49 +178,49 @@ class XmlMutatorMin:
             value = attrib_value
 
         # For each type, define some possible replacement values
-        choices_number =    ( \
-                                "0", \
-                                "11111", \
-                                "-128", \
-                                "2", \
-                                "-1", \
-                                "1/3", \
-                                "42/0", \
-                                "1094861636 idiv 1.0", \
-                                "-1123329771506872 idiv 3.8", \
-                                "17=$numericRTF", \
-                                str(3 + random.randrange(0, 100)), \
-                            )
-
-        choices_letter =    ( \
-                                "P" * (25 * random.randrange(1, 100)), \
-                                "%s%s%s%s%s%s", \
-                                "foobar", \
-                            )
-
-        choices_alnum =     ( \
-                                "Abc123", \
-                                "020F0302020204030204", \
-                                "020F0302020204030204" * (random.randrange(5, 20)), \
-                            )
+        choices_number = (
+            "0",
+            "11111",
+            "-128",
+            "2",
+            "-1",
+            "1/3",
+            "42/0",
+            "1094861636 idiv 1.0",
+            "-1123329771506872 idiv 3.8",
+            "17=$numericRTF",
+            str(3 + random.randrange(0, 100)),
+        )
+
+        choices_letter = (
+            "P" * (25 * random.randrange(1, 100)),
+            "%s%s%s%s%s%s",
+            "foobar",
+        )
+
+        choices_alnum = (
+            "Abc123",
+            "020F0302020204030204",
+            "020F0302020204030204" * (random.randrange(5, 20)),
+        )
 
         # Fuzz the value
-        if random.choice((True,False)) and value == "":
+        if random.choice((True, False)) and value == "":
 
             # Empty
             new_value = value
 
-        elif random.choice((True,False)) and value.isdigit():
+        elif random.choice((True, False)) and value.isdigit():
 
             # Numbers
             new_value = random.choice(choices_number)
 
-        elif random.choice((True,False)) and value.isalpha():
+        elif random.choice((True, False)) and value.isalpha():
 
             # Letters
             new_value = random.choice(choices_letter)
 
-        elif random.choice((True,False)) and value.isalnum():
+        elif random.choice((True, False)) and value.isalnum():
 
             # Alphanumeric
             new_value = random.choice(choices_alnum)
@@ -232,22 +240,25 @@ class XmlMutatorMin:
 
         # Log something
         if self.verbose:
-            print("Fuzzing attribute #%i '%s' of tag #%i '%s'" % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag))
+            print(
+                "Fuzzing attribute #%i '%s' of tag #%i '%s'"
+                % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag)
+            )
 
         # Modify the attribute
         rand_elem.set(rand_attrib, new_value.decode("utf-8"))
 
     def __del_node_and_children(self):
 
-        """ High-level minimizing mutator
-            Delete a random node and its children (i.e. delete a random tree) """
+        """High-level minimizing mutator
+        Delete a random node and its children (i.e. delete a random tree)"""
 
         self.__del_node(True)
 
     def __del_node_but_children(self):
 
-        """ High-level minimizing mutator
-            Delete a random node but its children (i.e. link them to the parent of the deleted node) """
+        """High-level minimizing mutator
+        Delete a random node but its children (i.e. link them to the parent of the deleted node)"""
 
         self.__del_node(False)
 
@@ -270,7 +281,10 @@ class XmlMutatorMin:
         # Log something
         if self.verbose:
             but_or_and = "and" if delete_children else "but"
-            print("Deleting tag #%i '%s' %s its children" % (rand_elem_id, rand_elem.tag, but_or_and))
+            print(
+                "Deleting tag #%i '%s' %s its children"
+                % (rand_elem_id, rand_elem.tag, but_or_and)
+            )
 
         if delete_children is False:
             # Link children of the random (soon to be deleted) node to its parent
@@ -282,8 +296,8 @@ class XmlMutatorMin:
 
     def __del_content(self):
 
-        """ High-level minimizing mutator
-            Delete the attributes and children of a random node """
+        """High-level minimizing mutator
+        Delete the attributes and children of a random node"""
 
         # Select a node to modify
         (rand_elem_id, rand_elem) = self.__pick_element()
@@ -297,8 +311,8 @@ class XmlMutatorMin:
 
     def __del_attribute(self):
 
-        """ High-level minimizing mutator
-            Delete a random attribute from a random node """
+        """High-level minimizing mutator
+        Delete a random attribute from a random node"""
 
         # Select a node to modify
         (rand_elem_id, rand_elem) = self.__pick_element()
@@ -318,7 +332,10 @@ class XmlMutatorMin:
 
         # Log something
         if self.verbose:
-            print("Deleting attribute #%i '%s' of tag #%i '%s'" % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag))
+            print(
+                "Deleting attribute #%i '%s' of tag #%i '%s'"
+                % (rand_attrib_id, rand_attrib, rand_elem_id, rand_elem.tag)
+            )
 
         # Delete the attribute
         rand_elem.attrib.pop(rand_attrib)
@@ -329,4 +346,3 @@ class XmlMutatorMin:
 
         # High-level mutation
         self.__exec_among(self, self.hl_mutators_all, min, max)
-
diff --git a/utils/custom_mutators/common.py b/utils/custom_mutators/common.py
index 9a1ef0a3..44a5056a 100644
--- a/utils/custom_mutators/common.py
+++ b/utils/custom_mutators/common.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # encoding: utf-8
-'''
+"""
 Module containing functions shared between multiple AFL modules
 
 @author:     Christian Holler (:decoder)
@@ -12,7 +12,7 @@ License, v. 2.0. If a copy of the MPL was not distributed with this
 file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 @contact:    choller@mozilla.com
-'''
+"""
 
 from __future__ import print_function
 import random
@@ -23,18 +23,18 @@ import re
 def randel(l):
     if not l:
         return None
-    return l[random.randint(0, len(l)-1)]
+    return l[random.randint(0, len(l) - 1)]
 
 
 def randel_pop(l):
     if not l:
         return None
-    return l.pop(random.randint(0, len(l)-1))
+    return l.pop(random.randint(0, len(l) - 1))
 
 
 def write_exc_example(data, exc):
-    exc_name = re.sub(r'[^a-zA-Z0-9]', '_', repr(exc))
+    exc_name = re.sub(r"[^a-zA-Z0-9]", "_", repr(exc))
 
     if not os.path.exists(exc_name):
-        with open(exc_name, 'w') as f:
+        with open(exc_name, "w") as f:
             f.write(data)
diff --git a/utils/custom_mutators/example.py b/utils/custom_mutators/example.py
index cf659e5a..3a6d22e4 100644
--- a/utils/custom_mutators/example.py
+++ b/utils/custom_mutators/example.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # encoding: utf-8
-'''
+"""
 Example Python Module for AFLFuzz
 
 @author:     Christian Holler (:decoder)
@@ -12,7 +12,7 @@ License, v. 2.0. If a copy of the MPL was not distributed with this
 file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 @contact:    choller@mozilla.com
-'''
+"""
 
 import random
 
@@ -26,12 +26,12 @@ COMMANDS = [
 
 
 def init(seed):
-    '''
+    """
     Called once when AFLFuzz starts up. Used to seed our RNG.
 
     @type seed: int
     @param seed: A 32-bit random value
-    '''
+    """
     random.seed(seed)
 
 
@@ -40,7 +40,7 @@ def deinit():
 
 
 def fuzz(buf, add_buf, max_size):
-    '''
+    """
     Called per fuzzing iteration.
 
     @type buf: bytearray
@@ -55,13 +55,14 @@ def fuzz(buf, add_buf, max_size):
 
     @rtype: bytearray
     @return: A new bytearray containing the mutated data
-    '''
+    """
     ret = bytearray(100)
 
     ret[:3] = random.choice(COMMANDS)
 
     return ret
 
+
 # Uncomment and implement the following methods if you want to use a custom
 # trimming algorithm. See also the documentation for a better API description.
 
diff --git a/utils/custom_mutators/simple-chunk-replace.py b/utils/custom_mutators/simple-chunk-replace.py
index df2f4ca7..c57218dd 100644
--- a/utils/custom_mutators/simple-chunk-replace.py
+++ b/utils/custom_mutators/simple-chunk-replace.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # encoding: utf-8
-'''
+"""
 Simple Chunk Cross-Over Replacement Module for AFLFuzz
 
 @author:     Christian Holler (:decoder)
@@ -12,24 +12,24 @@ License, v. 2.0. If a copy of the MPL was not distributed with this
 file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 @contact:    choller@mozilla.com
-'''
+"""
 
 import random
 
 
 def init(seed):
-    '''
+    """
     Called once when AFLFuzz starts up. Used to seed our RNG.
 
     @type seed: int
     @param seed: A 32-bit random value
-    '''
+    """
     # Seed our RNG
     random.seed(seed)
 
 
 def fuzz(buf, add_buf, max_size):
-    '''
+    """
     Called per fuzzing iteration.
 
     @type buf: bytearray
@@ -44,7 +44,7 @@ def fuzz(buf, add_buf, max_size):
 
     @rtype: bytearray
     @return: A new bytearray containing the mutated data
-    '''
+    """
     # Make a copy of our input buffer for returning
     ret = bytearray(buf)
 
@@ -58,7 +58,9 @@ def fuzz(buf, add_buf, max_size):
     rand_dst_idx = random.randint(0, len(buf))
 
     # Make the chunk replacement
-    ret[rand_dst_idx:rand_dst_idx + fragment_len] = add_buf[rand_src_idx:rand_src_idx + fragment_len]
+    ret[rand_dst_idx : rand_dst_idx + fragment_len] = add_buf[
+        rand_src_idx : rand_src_idx + fragment_len
+    ]
 
     # Return data
     return ret
diff --git a/utils/custom_mutators/wrapper_afl_min.py b/utils/custom_mutators/wrapper_afl_min.py
index ecb03b55..5cd60031 100644
--- a/utils/custom_mutators/wrapper_afl_min.py
+++ b/utils/custom_mutators/wrapper_afl_min.py
@@ -27,7 +27,7 @@ def log(text):
 
 def init(seed):
     """
-          Called once when AFL starts up. Seed is used to identify the AFL instance in log files
+    Called once when AFL starts up. Seed is used to identify the AFL instance in log files
     """
 
     global __mutator__
@@ -72,7 +72,10 @@ def fuzz(buf, add_buf, max_size):
     if via_buffer:
         try:
             __mutator__.init_from_string(buf_str)
-            log("fuzz(): Mutator successfully initialized with AFL buffer (%d bytes)" % len(buf_str))
+            log(
+                "fuzz(): Mutator successfully initialized with AFL buffer (%d bytes)"
+                % len(buf_str)
+            )
         except Exception:
             via_buffer = False
             log("fuzz(): Can't initialize mutator with AFL buffer")
@@ -104,7 +107,7 @@ def fuzz(buf, add_buf, max_size):
 
 
 # Main (for debug)
-if __name__ == '__main__':
+if __name__ == "__main__":
 
     __log__ = True
     __log_file__ = "/dev/stdout"
@@ -112,7 +115,9 @@ if __name__ == '__main__':
 
     init(__seed__)
 
-    in_1 = bytearray("<foo ddd='eeee'>ffff<a b='c' d='456' eee='ffffff'>zzzzzzzzzzzz</a><b yyy='YYY' zzz='ZZZ'></b></foo>")
+    in_1 = bytearray(
+        "<foo ddd='eeee'>ffff<a b='c' d='456' eee='ffffff'>zzzzzzzzzzzz</a><b yyy='YYY' zzz='ZZZ'></b></foo>"
+    )
     in_2 = bytearray("<abc abc123='456' abcCBA='ppppppppppppppppppppppppppppp'/>")
     out = fuzz(in_1, in_2)
     print(out)
diff --git a/utils/libdislocator/libdislocator.so.c b/utils/libdislocator/libdislocator.so.c
index c041fec6..1b247c86 100644
--- a/utils/libdislocator/libdislocator.so.c
+++ b/utils/libdislocator/libdislocator.so.c
@@ -168,7 +168,7 @@ static void *__dislocator_alloc(size_t len) {
 
   u8 *   ret, *base;
   size_t tlen;
-  int    flags, fd, sp;
+  int    flags, protflags, fd, sp;
 
   if (total_mem + len > max_mem || total_mem + len < total_mem) {
 
@@ -191,8 +191,14 @@ static void *__dislocator_alloc(size_t len) {
 
   base = NULL;
   tlen = (1 + PG_COUNT(rlen + 8)) * PAGE_SIZE;
+  protflags = PROT_READ | PROT_WRITE;
   flags = MAP_PRIVATE | MAP_ANONYMOUS;
   fd = -1;
+#if defined(PROT_MAX)
+  // apply when sysctl vm.imply_prot_max is set to 1
+  // no-op otherwise
+  protflags |= PROT_MAX(PROT_READ | PROT_WRITE);
+#endif
 #if defined(USEHUGEPAGE)
   sp = (rlen >= SUPER_PAGE_SIZE && !(rlen % SUPER_PAGE_SIZE));
 
@@ -215,7 +221,7 @@ static void *__dislocator_alloc(size_t len) {
   (void)sp;
 #endif
 
-  ret = (u8 *)mmap(base, tlen, PROT_READ | PROT_WRITE, flags, fd, 0);
+  ret = (u8 *)mmap(base, tlen, protflags, flags, fd, 0);
 #if defined(USEHUGEPAGE)
   /* We try one more time with regular call */
   if (ret == MAP_FAILED) {
@@ -229,7 +235,7 @@ static void *__dislocator_alloc(size_t len) {
   #elif defined(__sun)
     flags &= -MAP_ALIGN;
   #endif
-    ret = (u8 *)mmap(NULL, tlen, PROT_READ | PROT_WRITE, flags, fd, 0);
+    ret = (u8 *)mmap(NULL, tlen, protflags, flags, fd, 0);
 
   }
 
diff --git a/utils/persistent_mode/persistent_demo_new.c b/utils/persistent_mode/persistent_demo_new.c
index 7e694696..285f50aa 100644
--- a/utils/persistent_mode/persistent_demo_new.c
+++ b/utils/persistent_mode/persistent_demo_new.c
@@ -70,7 +70,7 @@ int main(int argc, char **argv) {
 
     len = __AFL_FUZZ_TESTCASE_LEN;  // do not use the macro directly in a call!
 
-    fprintf(stderr, "input: %zd \"%s\"\n", len, buf);
+    // fprintf(stderr, "input: %zd \"%s\"\n", len, buf);
 
     /* do we have enough data? */
     if (len < 8) continue;